From 16cfc61df8d3fb798727c32c80e71a32b5142740 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 23 Oct 2024 13:51:44 +1100 Subject: [PATCH] Updated vkd3d-latest patchset --- ...03ad04c89004c7f800c5b1a0ea7ba286229.patch} | 10514 +++++++++++++--- ...-4889c71857ce2152a9c9e014b9f5831f96d.patch | 2560 ++++ ...-cd74461d6dabae4e702de61a90533d811aa.patch | 1718 --- ...-9cb4207c92ec3ee05fce15580c89f2e5146.patch | 386 - ...-cd249a47b86545fe0b3a4b477f854965e85.patch | 1533 --- ...-9dd42d15ddca66458042b5e4b7775fa054b.patch | 1465 --- ...-c89e547e3ef767da28be46bc37ac2ba71ea.patch | 2764 ---- ...-03ad04c89004c7f800c5b1a0ea7ba286229.patch | 1584 --- 8 files changed, 11265 insertions(+), 11259 deletions(-) rename patches/vkd3d-latest/{0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch => 0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch} (62%) create mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch delete mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch delete mode 100644 patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch delete mode 100644 patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch delete mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch delete mode 100644 patches/vkd3d-latest/0006-Updated-vkd3d-to-c89e547e3ef767da28be46bc37ac2ba71ea.patch delete mode 100644 patches/vkd3d-latest/0007-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch similarity index 62% rename from patches/vkd3d-latest/0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch rename to patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch index 64b9c578..4bbca3d7 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch @@ -1,40 +1,42 @@ -From 7fb609a62cc524d0d886ae20412492af36e71821 Mon Sep 17 00:00:00 2001 +From 0633df4d8d67331fb19a60d22494b13136b6d263 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Sep 2024 07:18:49 +1000 -Subject: [PATCH] Updated vkd3d to 25232f2b2b35bcf1c265bc380c31cd1d32e4f4a6. +Subject: [PATCH] Updated vkd3d to 03ad04c89004c7f800c5b1a0ea7ba28622916328. --- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 4 +- - libs/vkd3d/include/vkd3d_shader.h | 102 +- + libs/vkd3d/include/vkd3d_shader.h | 159 +- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/checksum.c | 49 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 35 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1249 +++------- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 19 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 90 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 423 +++- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 1479 +++++++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 347 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 107 +- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 94 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1270 ++----- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 21 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 116 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 1001 ++++- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 2107 ++++++++++- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 349 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 134 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 778 +++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1452 +++++++++++- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1046 ++++-- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1789 ++++++++- .../libs/vkd3d-shader/hlsl_constant_ops.c | 20 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 2050 ++++++++++------- - libs/vkd3d/libs/vkd3d-shader/msl.c | 319 +++ - libs/vkd3d/libs/vkd3d-shader/preproc.l | 3 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 354 +-- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 752 ++++-- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 124 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 64 +- - libs/vkd3d/libs/vkd3d/command.c | 50 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 3347 +++++++++++------ + libs/vkd3d/libs/vkd3d-shader/msl.c | 881 +++++ + libs/vkd3d/libs/vkd3d-shader/preproc.h | 3 +- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 56 +- + libs/vkd3d/libs/vkd3d-shader/preproc.y | 13 - + libs/vkd3d/libs/vkd3d-shader/spirv.c | 433 ++- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 1360 +++++-- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 145 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 105 +- + libs/vkd3d/libs/vkd3d/command.c | 91 +- libs/vkd3d/libs/vkd3d/device.c | 1 + - libs/vkd3d/libs/vkd3d/state.c | 7 +- + libs/vkd3d/libs/vkd3d/state.c | 383 +- libs/vkd3d/libs/vkd3d/utils.c | 3 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 1 + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 4 +- - 30 files changed, 7256 insertions(+), 2635 deletions(-) + 32 files changed, 11274 insertions(+), 3716 deletions(-) create mode 100644 libs/vkd3d/libs/vkd3d-shader/msl.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in @@ -72,7 +74,7 @@ index 39145a97df1..fd62730f948 100644 #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d9a355d3bc9..d08ee74a3a0 100644 +index d9a355d3bc9..5c0d13ea9e2 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -190,6 +190,17 @@ enum vkd3d_shader_compile_option_backward_compatibility @@ -118,7 +120,7 @@ index d9a355d3bc9..d08ee74a3a0 100644 VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), }; -@@ -578,6 +596,58 @@ enum vkd3d_shader_parameter_name +@@ -578,6 +596,110 @@ enum vkd3d_shader_parameter_name * \since 1.13 */ VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, @@ -174,10 +176,62 @@ index d9a355d3bc9..d08ee74a3a0 100644 + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_5, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_6, + VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_7, ++ /** ++ * Point size. ++ * ++ * When this parameter is provided to a vertex, tessellation, or geometry ++ * shader, and the source shader does not write point size, it specifies a ++ * uniform value which will be written to point size. ++ * If the source shader writes point size, this parameter is ignored. ++ * ++ * This parameter can be used to implement fixed function point size, as ++ * present in Direct3D versions 8 and 9, if the target environment does not ++ * support point size as part of its own fixed-function API (as Vulkan and ++ * core OpenGL). ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * \since 1.14 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, ++ /** ++ * Minimum point size. ++ * ++ * When this parameter is provided to a vertex, tessellation, or geometry ++ * shader, and the source shader writes point size or uses the ++ * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE parameter, the point size will ++ * be clamped to the provided minimum value. ++ * If point size is not written in one of these ways, ++ * this parameter is ignored. ++ * If this parameter is not provided, the point size will not be clamped ++ * to a minimum size by vkd3d-shader. ++ * ++ * This parameter can be used to implement fixed function point size, as ++ * present in Direct3D versions 8 and 9, if the target environment does not ++ * support point size as part of its own fixed-function API (as Vulkan and ++ * core OpenGL). ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * \since 1.14 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, ++ /** ++ * Maximum point size. ++ * ++ * This parameter has identical behaviour to ++ * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, except that it provides ++ * the maximum size rather than the minimum. ++ * ++ * \since 1.14 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), }; -@@ -625,6 +695,13 @@ struct vkd3d_shader_parameter_immediate_constant1 +@@ -625,6 +747,13 @@ struct vkd3d_shader_parameter_immediate_constant1 * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. */ float f32; @@ -191,7 +245,7 @@ index d9a355d3bc9..d08ee74a3a0 100644 void *_pointer_pad; uint32_t _pad[4]; } u; -@@ -636,7 +713,13 @@ struct vkd3d_shader_parameter_immediate_constant1 +@@ -636,7 +765,13 @@ struct vkd3d_shader_parameter_immediate_constant1 */ struct vkd3d_shader_parameter_specialization_constant { @@ -206,7 +260,19 @@ index d9a355d3bc9..d08ee74a3a0 100644 uint32_t id; }; -@@ -1087,6 +1170,10 @@ enum vkd3d_shader_target_type +@@ -1046,6 +1181,11 @@ enum vkd3d_shader_source_type + * the format used for Direct3D shader model 6 shaders. \since 1.9 + */ + VKD3D_SHADER_SOURCE_DXBC_DXIL, ++ /** ++ * Binary format used by Direct3D 9/10.x/11 effects. ++ * Input is a raw FX section without container. \since 1.14 ++ */ ++ VKD3D_SHADER_SOURCE_FX, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), + }; +@@ -1087,6 +1227,10 @@ enum vkd3d_shader_target_type * Output is a raw FX section without container. \since 1.11 */ VKD3D_SHADER_TARGET_FX, @@ -217,7 +283,7 @@ index d9a355d3bc9..d08ee74a3a0 100644 VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET_TYPE), }; -@@ -1292,7 +1379,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, +@@ -1292,7 +1436,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, * vkd3d_shader_preprocess_info. * * \param code Contents of the included file, which were allocated by the @@ -227,7 +293,7 @@ index d9a355d3bc9..d08ee74a3a0 100644 * * \param context The user-defined pointer passed to struct * vkd3d_shader_preprocess_info. -@@ -1319,8 +1407,8 @@ struct vkd3d_shader_preprocess_info +@@ -1319,8 +1464,8 @@ struct vkd3d_shader_preprocess_info /** * Pointer to an array of predefined macros. Each macro in this array will @@ -238,7 +304,7 @@ index d9a355d3bc9..d08ee74a3a0 100644 * * If the same macro is specified multiple times, only the last value is * used. -@@ -2798,7 +2886,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_ +@@ -2798,7 +2943,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_ * \param input_signature The input signature of the second shader. * * \param count On output, contains the number of entries written into @@ -356,7 +422,7 @@ index d9560628c77..45de1c92513 100644 memcpy(checksum, ctx.digest, sizeof(ctx.digest)); } diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 77e9711300f..9fe4b74486a 100644 +index 77e9711300f..38d566d9fe0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -79,7 +79,7 @@ static const char * const shader_opcode_names[] = @@ -438,14 +504,106 @@ index 77e9711300f..9fe4b74486a 100644 else if (reg->type != VKD3DSPR_RASTOUT && reg->type != VKD3DSPR_MISCTYPE && reg->type != VKD3DSPR_NULL -@@ -2491,10 +2501,11 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, +@@ -2258,7 +2268,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic + } + } - void vkd3d_shader_trace(const struct vsir_program *program) +-static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, ++static enum vkd3d_result dump_dxbc_signature(struct vkd3d_d3d_asm_compiler *compiler, + const char *name, const char *register_name, const struct shader_signature *signature) + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; +@@ -2325,21 +2335,21 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, + return VKD3D_OK; + } + +-static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, ++static enum vkd3d_result dump_dxbc_signatures(struct vkd3d_d3d_asm_compiler *compiler, + const struct vsir_program *program) + { + enum vkd3d_result ret; + +- if ((ret = dump_signature(compiler, ".input", ++ if ((ret = dump_dxbc_signature(compiler, ".input", + program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", + &program->input_signature)) < 0) + return ret; + +- if ((ret = dump_signature(compiler, ".output", "o", ++ if ((ret = dump_dxbc_signature(compiler, ".output", "o", + &program->output_signature)) < 0) + return ret; + +- if ((ret = dump_signature(compiler, ".patch_constant", ++ if ((ret = dump_dxbc_signature(compiler, ".patch_constant", + program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", + &program->patch_constant_signature)) < 0) + return ret; +@@ -2427,7 +2437,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + * doesn't even have an explicit concept of signature. */ + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) + { +- if ((result = dump_signatures(&compiler, program)) < 0) ++ if ((result = dump_dxbc_signatures(&compiler, program)) < 0) + { + vkd3d_string_buffer_cleanup(buffer); + return result; +@@ -2489,12 +2499,58 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + return result; + } + +-void vkd3d_shader_trace(const struct vsir_program *program) ++/* This is meant exclusively for development use. Therefore, differently from ++ * dump_dxbc_signature(), it doesn't try particularly hard to make the output ++ * nice or easily parsable, and it dumps all fields, not just the DXBC ones. ++ * This format isn't meant to be stable. */ ++static void trace_signature(const struct shader_signature *signature, const char *signature_type) { - const char *p, *q, *end; ++ struct vkd3d_string_buffer buffer; ++ unsigned int i; ++ ++ TRACE("%s signature:%s\n", signature_type, signature->element_count == 0 ? " empty" : ""); ++ ++ vkd3d_string_buffer_init(&buffer); ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ const struct signature_element *element = &signature->elements[i]; ++ ++ vkd3d_string_buffer_clear(&buffer); ++ ++ vkd3d_string_buffer_printf(&buffer, "Element %u: %s %u-%u %s", i, ++ get_component_type_name(element->component_type), ++ element->register_index, element->register_index + element->register_count, ++ element->semantic_name); ++ if (element->semantic_index != -1) ++ vkd3d_string_buffer_printf(&buffer, "%u", element->semantic_index); ++ vkd3d_string_buffer_printf(&buffer, ++ " mask %#x used_mask %#x sysval %s min_precision %s interpolation %u stream %u", ++ element->mask, element->used_mask, get_sysval_semantic_name(element->sysval_semantic), ++ get_minimum_precision_name(element->min_precision), element->interpolation_mode, ++ element->stream_index); ++ if (element->target_location != -1) ++ vkd3d_string_buffer_printf(&buffer, " target %u", element->target_location); ++ else ++ vkd3d_string_buffer_printf(&buffer, " unused"); ++ ++ TRACE("%s\n", buffer.buffer); ++ } ++ ++ vkd3d_string_buffer_cleanup(&buffer); ++} ++ ++void vsir_program_trace(const struct vsir_program *program) ++{ + const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; struct vkd3d_shader_code code; + const char *p, *q, *end; ++ ++ trace_signature(&program->input_signature, "Input"); ++ trace_signature(&program->output_signature, "Output"); ++ trace_signature(&program->patch_constant_signature, "Patch-constant"); - if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) + if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) @@ -453,10 +611,10 @@ index 77e9711300f..9fe4b74486a 100644 end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index d05394c3ab7..267cf410cbe 100644 +index d05394c3ab7..ae8e864c179 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -104,6 +104,19 @@ enum vkd3d_sm1_resource_type +@@ -104,6 +104,12 @@ enum vkd3d_sm1_resource_type VKD3D_SM1_RESOURCE_TEXTURE_3D = 0x4, }; @@ -465,18 +623,11 @@ index d05394c3ab7..267cf410cbe 100644 + VKD3D_SM1_MISC_POSITION = 0x0, + VKD3D_SM1_MISC_FACE = 0x1, +}; -+ -+enum vkd3d_sm1_rastout_register -+{ -+ VKD3D_SM1_RASTOUT_POSITION = 0x0, -+ VKD3D_SM1_RASTOUT_FOG = 0x1, -+ VKD3D_SM1_RASTOUT_POINT_SIZE = 0x2, -+}; + enum vkd3d_sm1_opcode { VKD3D_SM1_OP_NOP = 0x00, -@@ -444,17 +457,36 @@ static uint32_t swizzle_from_sm1(uint32_t swizzle) +@@ -444,17 +450,36 @@ static uint32_t swizzle_from_sm1(uint32_t swizzle) shader_sm1_get_swizzle_component(swizzle, 3)); } @@ -516,7 +667,7 @@ index d05394c3ab7..267cf410cbe 100644 if (src->reg.type == VKD3DSPR_SAMPLER) src->reg.dimension = VSIR_DIMENSION_NONE; else if (src->reg.type == VKD3DSPR_DEPTHOUT) -@@ -470,12 +502,16 @@ static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_p +@@ -470,12 +495,16 @@ static void shader_sm1_parse_dst_param(uint32_t param, struct vkd3d_shader_src_p { enum vkd3d_shader_register_type reg_type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT) | ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2); @@ -536,7 +687,47 @@ index d05394c3ab7..267cf410cbe 100644 if (dst->reg.type == VKD3DSPR_SAMPLER) dst->reg.dimension = VSIR_DIMENSION_NONE; else if (dst->reg.type == VKD3DSPR_DEPTHOUT) -@@ -601,7 +637,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * +@@ -532,6 +561,21 @@ static struct signature_element *find_signature_element_by_register_index( + return NULL; + } + ++/* Add missing bits to a mask to make it contiguous. */ ++static unsigned int make_mask_contiguous(unsigned int mask) ++{ ++ static const unsigned int table[] = ++ { ++ 0x0, 0x1, 0x2, 0x3, ++ 0x4, 0x7, 0x6, 0x7, ++ 0x8, 0xf, 0xe, 0xf, ++ 0xc, 0xf, 0xe, 0xf, ++ }; ++ ++ VKD3D_ASSERT(mask < ARRAY_SIZE(table)); ++ return table[mask]; ++} ++ + static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, + const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, + unsigned int register_index, bool is_dcl, unsigned int mask) +@@ -547,7 +591,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + + if ((element = find_signature_element(signature, name, index))) + { +- element->mask |= mask; ++ element->mask = make_mask_contiguous(element->mask | mask); + if (!is_dcl) + element->used_mask |= mask; + return true; +@@ -567,7 +611,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; +- element->mask = mask; ++ element->mask = make_mask_contiguous(mask); + element->used_mask = is_dcl ? 0 : mask; + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + element->interpolation_mode = VKD3DSIM_LINEAR; +@@ -601,7 +645,7 @@ static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser * const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) { const struct vkd3d_shader_version *version = &sm1->p.program->shader_version; @@ -545,17 +736,27 @@ index d05394c3ab7..267cf410cbe 100644 switch (reg->type) { -@@ -1272,7 +1308,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st +@@ -921,6 +965,9 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const + shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); + } + shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); ++ ++ if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) ++ sm1->p.program->has_point_size = true; + } + + static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, +@@ -1272,7 +1319,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st sm1->end = &code[token_count]; /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + if (!vsir_program_init(program, compile_info, &version, -+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED)) ++ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, false)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -@@ -1338,9 +1375,6 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c +@@ -1338,9 +1386,6 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) program->flat_constant_count[i] = get_external_constant_count(&sm1, i); @@ -565,7 +766,7 @@ index d05394c3ab7..267cf410cbe 100644 if (sm1.p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; -@@ -1351,7 +1385,18 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c +@@ -1351,10 +1396,21 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c return ret; } @@ -575,7 +776,7 @@ index d05394c3ab7..267cf410cbe 100644 + WARN("Failed to validate shader after parsing, ret %d.\n", ret); + + if (TRACE_ON()) -+ vkd3d_shader_trace(program); ++ vsir_program_trace(program); + + vsir_program_cleanup(program); + return ret; @@ -584,8 +785,12 @@ index d05394c3ab7..267cf410cbe 100644 + return VKD3D_OK; } - bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -@@ -1384,22 +1429,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, +-bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) + { + unsigned int i; +@@ -1384,22 +1440,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, @@ -601,10 +806,10 @@ index d05394c3ab7..267cf410cbe 100644 - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, @@ -612,18 +817,20 @@ index d05394c3ab7..267cf410cbe 100644 - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, }; -@@ -1423,32 +1468,32 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, +@@ -1422,33 +1478,33 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, + return false; } - bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +-bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) ++bool sm1_usage_from_semantic_name(const char *semantic_name, + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) { static const struct @@ -671,7 +878,7 @@ index d05394c3ab7..267cf410cbe 100644 }; unsigned int i; -@@ -1468,21 +1513,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +@@ -1468,21 +1524,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name, struct d3dbc_compiler { @@ -697,7 +904,7 @@ index d05394c3ab7..267cf410cbe 100644 } D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) -@@ -1512,6 +1553,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) +@@ -1512,6 +1564,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -705,7 +912,7 @@ index d05394c3ab7..267cf410cbe 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1617,6 +1659,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) +@@ -1617,6 +1670,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -713,7 +920,7 @@ index d05394c3ab7..267cf410cbe 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1709,7 +1752,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) +@@ -1709,7 +1763,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) { @@ -722,7 +929,12 @@ index d05394c3ab7..267cf410cbe 100644 unsigned int uniform_count = 0; struct hlsl_ir_var *var; -@@ -1745,11 +1788,12 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff +@@ -1741,15 +1795,16 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + +- ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); ++ ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ creator_offset = put_u32(buffer, 0); put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); put_u32(buffer, uniform_count); @@ -736,7 +948,7 @@ index d05394c3ab7..267cf410cbe 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { -@@ -1825,8 +1869,10 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff +@@ -1825,8 +1880,10 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff switch (comp_type->e.numeric.type) { case HLSL_TYPE_DOUBLE: @@ -749,7 +961,7 @@ index d05394c3ab7..267cf410cbe 100644 break; case HLSL_TYPE_INT: -@@ -1860,24 +1906,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff +@@ -1860,24 +1917,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff set_u32(buffer, creator_offset, offset - ctab_start); ctab_end = bytecode_align(buffer); @@ -779,7 +991,7 @@ index d05394c3ab7..267cf410cbe 100644 unsigned int writemask; uint32_t reg; } dst; -@@ -1885,7 +1931,7 @@ struct sm1_instruction +@@ -1885,7 +1942,7 @@ struct sm1_instruction struct sm1_src_register { enum vkd3d_shader_register_type type; @@ -788,7 +1000,7 @@ index d05394c3ab7..267cf410cbe 100644 unsigned int swizzle; uint32_t reg; } srcs[4]; -@@ -1900,11 +1946,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) +@@ -1900,11 +1957,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) const struct sm1_dst_register *dst = &instr->dst; unsigned int i; @@ -803,7 +1015,7 @@ index d05394c3ab7..267cf410cbe 100644 return false; if (src->type != dst->type) return false; -@@ -1923,13 +1969,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) +@@ -1923,13 +1980,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) { VKD3D_ASSERT(reg->writemask); @@ -825,7 +1037,7 @@ index d05394c3ab7..267cf410cbe 100644 } static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) -@@ -1945,7 +1997,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s +@@ -1945,7 +2008,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); if (version->major > 1) @@ -834,7 +1046,7 @@ index d05394c3ab7..267cf410cbe 100644 put_u32(buffer, token); if (instr->has_dst) -@@ -1955,346 +2007,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s +@@ -1955,346 +2018,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s write_sm1_src_register(buffer, &instr->srcs[i]); }; @@ -1011,14 +1223,14 @@ index d05394c3ab7..267cf410cbe 100644 - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; - struct hlsl_ctx *ctx = d3dbc->ctx; -- -- /* Narrowing casts were already lowered. */ -- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + src->mod = param->modifiers; + src->reg = param->reg.idx[0].offset; + src->type = param->reg.type; + src->swizzle = swizzle_from_vsir(param->swizzle); +- /* Narrowing casts were already lowered. */ +- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); +- - switch (dst_type->e.numeric.type) + if (param->reg.idx[0].rel_addr) { @@ -1140,13 +1352,13 @@ index d05394c3ab7..267cf410cbe 100644 - if (hlsl_sm1_register_from_semantic(version, element->semantic_name, - element->semantic_index, output, ®.type, ®.reg)) -+ const struct sm1_dst_register reg = - { +- { - usage = 0; - usage_idx = 0; - } - else -- { ++ const struct sm1_dst_register reg = + { - ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); - VKD3D_ASSERT(ret); - reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; @@ -1244,7 +1456,7 @@ index d05394c3ab7..267cf410cbe 100644 token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); -@@ -2305,618 +2123,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, +@@ -2305,618 +2134,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, write_sm1_dst_register(buffer, ®); } @@ -1428,20 +1640,20 @@ index d05394c3ab7..267cf410cbe 100644 - - case HLSL_OP1_NEG: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -- break; -- -- case HLSL_OP1_SAT: -- d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + case VKD3D_SHADER_RESOURCE_TEXTURE_2D: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_2D); break; -- case HLSL_OP1_RCP: -- d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); +- case HLSL_OP1_SAT: +- d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: + d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_CUBE); break; +- case HLSL_OP1_RCP: +- d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); +- break; +- - case HLSL_OP1_RSQ: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); - break; @@ -1830,7 +2042,7 @@ index d05394c3ab7..267cf410cbe 100644 + bool ret; - if (store->lhs.var->is_output_semantic) -+ if (hlsl_sm1_register_from_semantic(version, element->semantic_name, ++ if (sm1_register_from_semantic_name(version, element->semantic_name, + element->semantic_index, output, ®.type, ®.reg)) { - if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) @@ -1864,7 +2076,7 @@ index d05394c3ab7..267cf410cbe 100644 - struct sm1_instruction sm1_instr = { - .opcode = D3DSIO_MOV, -+ ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); ++ ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg = element->register_index; @@ -2047,7 +2259,7 @@ index d05394c3ab7..267cf410cbe 100644 { out->code = buffer->data; out->size = buffer->size; -@@ -2925,5 +2408,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, +@@ -2925,5 +2419,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, { vkd3d_free(buffer->data); } @@ -2055,7 +2267,7 @@ index d05394c3ab7..267cf410cbe 100644 + return result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 184788dc57e..93fc993e0d1 100644 +index 184788dc57e..f6ac8e0829e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -20,6 +20,19 @@ @@ -2096,7 +2308,16 @@ index 184788dc57e..93fc993e0d1 100644 if (memcmp(checksum, calculated_checksum, sizeof(checksum))) { WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " -@@ -1488,7 +1501,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro +@@ -406,8 +419,6 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s + const char *name; + uint32_t mask; + +- e[i].sort_index = i; +- + if (has_stream_index) + e[i].stream_index = read_u32(&ptr); + else +@@ -1488,7 +1499,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro dxbc->code = context.buffer.data; dxbc->size = total_size; @@ -2106,7 +2327,7 @@ index 184788dc57e..93fc993e0d1 100644 set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 4a17c62292b..d4296ef4bc5 100644 +index 4a17c62292b..f9f44f34bcf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -3888,7 +3888,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade @@ -2128,7 +2349,42 @@ index 4a17c62292b..d4296ef4bc5 100644 dst_params[1].reg.idx[1].rel_addr = NULL; dst_params[1].reg.idx[1].offset = ~0u; dst_params[1].reg.idx_count = 1; -@@ -4865,8 +4864,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr +@@ -4175,6 +4174,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty + const struct sm6_type *type_b, struct sm6_parser *sm6) + { + bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); ++ bool is_double = sm6_type_is_double(type_a); + bool is_bool = sm6_type_is_bool(type_a); + enum vkd3d_shader_opcode op; + bool is_valid; +@@ -4199,7 +4199,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty + case BINOP_ADD: + case BINOP_SUB: + /* NEG is applied later for subtraction. */ +- op = is_int ? VKD3DSIH_IADD : VKD3DSIH_ADD; ++ op = is_int ? VKD3DSIH_IADD : (is_double ? VKD3DSIH_DADD : VKD3DSIH_ADD); + is_valid = !is_bool; + break; + case BINOP_AND: +@@ -4215,7 +4215,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty + is_valid = is_int && !is_bool; + break; + case BINOP_MUL: +- op = is_int ? VKD3DSIH_UMUL : VKD3DSIH_MUL; ++ op = is_int ? VKD3DSIH_UMUL : (is_double ? VKD3DSIH_DMUL : VKD3DSIH_MUL); + is_valid = !is_bool; + break; + case BINOP_OR: +@@ -4223,7 +4223,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty + is_valid = is_int; + break; + case BINOP_SDIV: +- op = is_int ? VKD3DSIH_IDIV : VKD3DSIH_DIV; ++ op = is_int ? VKD3DSIH_IDIV : (is_double ? VKD3DSIH_DDIV : VKD3DSIH_DIV); + is_valid = !is_bool; + break; + case BINOP_SREM: +@@ -4865,8 +4865,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); @@ -2140,7 +2396,7 @@ index 4a17c62292b..d4296ef4bc5 100644 type = sm6_type_get_scalar_type(dst->type, 0); VKD3D_ASSERT(type); -@@ -4965,8 +4966,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int +@@ -4965,8 +4967,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int dst->u.handle.d = d; reg = &dst->u.handle.reg; @@ -2150,7 +2406,16 @@ index 4a17c62292b..d4296ef4bc5 100644 reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = id; register_index_address_init(®->idx[1], operands[2], sm6); -@@ -6861,7 +6861,6 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re +@@ -5871,6 +5872,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + return; + } + e = &signature->elements[row_index]; ++ if (!e->sysval_semantic) ++ column_index += vsir_write_mask_get_component_idx(e->mask); + + if (column_index >= VKD3D_VEC4_SIZE) + { +@@ -6861,7 +6864,6 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr, *cmp, *new; @@ -2158,7 +2423,7 @@ index 4a17c62292b..d4296ef4bc5 100644 unsigned int i = 0; bool is_volatile; uint64_t code; -@@ -6887,9 +6886,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re +@@ -6887,9 +6889,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re return; } @@ -2172,7 +2437,7 @@ index 4a17c62292b..d4296ef4bc5 100644 if (!cmp || !new) return; -@@ -7287,7 +7287,6 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco +@@ -7287,7 +7290,6 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco unsigned int i = 0, alignment, operand_count; struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; @@ -2180,7 +2445,7 @@ index 4a17c62292b..d4296ef4bc5 100644 const struct sm6_value *ptr, *src; uint64_t alignment_code; -@@ -7299,13 +7298,14 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco +@@ -7299,13 +7301,14 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco return; } @@ -2198,7 +2463,7 @@ index 4a17c62292b..d4296ef4bc5 100644 { WARN("Type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -@@ -8908,7 +8908,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, +@@ -8908,7 +8911,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, d->resource_type = ins->resource_type; d->kind = kind; d->reg_type = VKD3DSPR_RESOURCE; @@ -2207,7 +2472,7 @@ index 4a17c62292b..d4296ef4bc5 100644 d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; -@@ -8982,7 +8982,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, +@@ -8982,7 +8985,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, d->resource_type = ins->resource_type; d->kind = values[0]; d->reg_type = VKD3DSPR_UAV; @@ -2216,7 +2481,7 @@ index 4a17c62292b..d4296ef4bc5 100644 d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; -@@ -9346,7 +9346,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9346,7 +9349,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Signature element is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element is not a metadata node."); @@ -2225,7 +2490,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } element_node = m->u.node; -@@ -9355,7 +9355,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9355,7 +9358,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Invalid operand count %u.\n", element_node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Invalid signature element operand count %u.", element_node->operand_count); @@ -2234,7 +2499,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } if (element_node->operand_count > 11) { -@@ -9374,7 +9374,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9374,7 +9377,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Failed to load uint value at index %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element value at index %u is not an integer.", j); @@ -2243,7 +2508,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } } -@@ -9385,7 +9385,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9385,7 +9388,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const FIXME("Unsupported element id %u not equal to its index %u.\n", values[0], i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A non-sequential and non-zero-based element id is not supported."); @@ -2252,7 +2517,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } if (!sm6_metadata_value_is_string(element_node->operands[1])) -@@ -9393,7 +9393,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9393,7 +9396,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Element name is not a string.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element name is not a metadata string."); @@ -2261,7 +2526,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } e->semantic_name = element_node->operands[1]->u.string_value; -@@ -9407,7 +9407,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9407,7 +9410,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled.", j); @@ -2270,7 +2535,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } if ((e->interpolation_mode = values[5]) >= VKD3DSIM_COUNT) -@@ -9415,7 +9415,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9415,7 +9418,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled interpolation mode %u.\n", e->interpolation_mode); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Interpolation mode %u is unhandled.", e->interpolation_mode); @@ -2279,7 +2544,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } e->register_count = values[6]; -@@ -9430,7 +9430,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9430,7 +9433,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled for an I/O register.", j); @@ -2288,7 +2553,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } } else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) -@@ -9439,7 +9439,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9439,7 +9442,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting row of %u with count %u is invalid.", e->register_index, e->register_count); @@ -2297,7 +2562,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } index = values[9]; -@@ -9448,7 +9448,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9448,7 +9451,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Invalid column start %u with count %u.\n", index, column_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting column %u with count %u is invalid.", index, column_count); @@ -2306,7 +2571,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } e->mask = vkd3d_write_mask_from_component_count(column_count); -@@ -9471,7 +9471,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9471,7 +9474,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Semantic index list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element semantic index list is not a metadata node."); @@ -2315,7 +2580,7 @@ index 4a17c62292b..d4296ef4bc5 100644 } element_node = m->u.node; -@@ -9516,6 +9516,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9516,6 +9519,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const s->element_count = operand_count; return VKD3D_OK; @@ -2326,16 +2591,45 @@ index 4a17c62292b..d4296ef4bc5 100644 } static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -@@ -10303,7 +10307,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro +@@ -9633,6 +9640,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co + ins->declaration.thread_group_size.x = group_sizes[0]; + ins->declaration.thread_group_size.y = group_sizes[1]; + ins->declaration.thread_group_size.z = group_sizes[2]; ++ sm6->p.program->thread_group_size = ins->declaration.thread_group_size; + + return VKD3D_OK; + } +@@ -10303,12 +10311,28 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; - if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) -+ if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS)) ++ if (!vsir_program_init(program, compile_info, &version, ++ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, true)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; -@@ -10526,9 +10530,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro + sm6->bitpos = 2; + ++ switch (program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ break; ++ ++ default: ++ if (program->patch_constant_signature.element_count != 0) ++ { ++ WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); ++ shader_signature_cleanup(&program->patch_constant_signature); ++ } ++ break; ++ } ++ + input_signature = &program->input_signature; + output_signature = &program->output_signature; + patch_constant_signature = &program->patch_constant_signature; +@@ -10526,9 +10550,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro dxil_block_destroy(&sm6->root_block); @@ -2352,7 +2646,7 @@ index 4a17c62292b..d4296ef4bc5 100644 vsir_program_cleanup(program); return ret; } -@@ -10570,18 +10581,25 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co +@@ -10570,18 +10601,25 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(byte_code); @@ -2375,7 +2669,7 @@ index 4a17c62292b..d4296ef4bc5 100644 + WARN("Failed to validate shader after parsing, ret %d.\n", ret); + + if (TRACE_ON()) -+ vkd3d_shader_trace(program); ++ vsir_program_trace(program); + + sm6_parser_cleanup(&sm6); + vsir_program_cleanup(program); @@ -2387,10 +2681,28 @@ index 4a17c62292b..d4296ef4bc5 100644 + return VKD3D_OK; } diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index a1d1fd6572f..cc18857a010 100644 +index a1d1fd6572f..f3f7a2c765c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -38,6 +38,7 @@ struct type_entry +@@ -25,6 +25,17 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin + return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); + } + ++struct fx_4_binary_type ++{ ++ uint32_t name; ++ uint32_t class; ++ uint32_t element_count; ++ uint32_t unpacked_size; ++ uint32_t stride; ++ uint32_t packed_size; ++ uint32_t typeinfo; ++}; ++ + struct string_entry + { + struct rb_entry entry; +@@ -38,6 +49,7 @@ struct type_entry struct list entry; const char *name; uint32_t elements_count; @@ -2398,7 +2710,7 @@ index a1d1fd6572f..cc18857a010 100644 uint32_t offset; }; -@@ -181,6 +182,7 @@ struct fx_write_context +@@ -181,6 +193,7 @@ struct fx_write_context struct vkd3d_bytecode_buffer unstructured; struct vkd3d_bytecode_buffer structured; @@ -2406,7 +2718,7 @@ index a1d1fd6572f..cc18857a010 100644 struct rb_tree strings; struct list types; -@@ -223,11 +225,6 @@ static void set_status(struct fx_write_context *fx, int status) +@@ -223,11 +236,6 @@ static void set_status(struct fx_write_context *fx, int status) fx->status = status; } @@ -2418,7 +2730,7 @@ index a1d1fd6572f..cc18857a010 100644 static uint32_t write_string(const char *string, struct fx_write_context *fx) { return fx->ops->write_string(string, fx); -@@ -278,9 +275,9 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i +@@ -278,9 +286,9 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { @@ -2429,7 +2741,7 @@ index a1d1fd6572f..cc18857a010 100644 const char *name; VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); -@@ -297,6 +294,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context +@@ -297,6 +305,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context } name = get_fx_4_type_name(element_type); @@ -2437,7 +2749,7 @@ index a1d1fd6572f..cc18857a010 100644 LIST_FOR_EACH_ENTRY(type_entry, &fx->types, struct type_entry, entry) { -@@ -306,6 +304,9 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context +@@ -306,6 +315,9 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context if (type_entry->elements_count != elements_count) continue; @@ -2447,7 +2759,7 @@ index a1d1fd6572f..cc18857a010 100644 return type_entry->offset; } -@@ -315,6 +316,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context +@@ -315,6 +327,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context type_entry->offset = write_fx_4_type(type, fx); type_entry->name = name; type_entry->elements_count = elements_count; @@ -2455,7 +2767,7 @@ index a1d1fd6572f..cc18857a010 100644 list_add_tail(&fx->types, &type_entry->entry); -@@ -429,17 +431,26 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx +@@ -429,17 +442,26 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx write_fx_4_state_block(var, 0, count_offset, fx); } @@ -2485,15 +2797,68 @@ index a1d1fd6572f..cc18857a010 100644 /* TODO: assignments */ if (var->state_block_count && var->state_blocks[0]->count) -@@ -461,6 +472,7 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) +@@ -459,25 +481,48 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) + return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; + } - static const uint32_t fx_4_numeric_base_type[] = +-static const uint32_t fx_4_numeric_base_type[] = ++enum fx_4_type_constants { -+ [HLSL_TYPE_HALF] = 1, - [HLSL_TYPE_FLOAT] = 1, - [HLSL_TYPE_INT ] = 2, - [HLSL_TYPE_UINT ] = 3, -@@ -497,6 +509,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, +- [HLSL_TYPE_FLOAT] = 1, +- [HLSL_TYPE_INT ] = 2, +- [HLSL_TYPE_UINT ] = 3, +- [HLSL_TYPE_BOOL ] = 4, ++ /* Numeric types encoding */ ++ FX_4_NUMERIC_TYPE_FLOAT = 1, ++ FX_4_NUMERIC_TYPE_INT = 2, ++ FX_4_NUMERIC_TYPE_UINT = 3, ++ FX_4_NUMERIC_TYPE_BOOL = 4, ++ ++ FX_4_NUMERIC_CLASS_SCALAR = 1, ++ FX_4_NUMERIC_CLASS_VECTOR = 2, ++ FX_4_NUMERIC_CLASS_MATRIX = 3, ++ ++ FX_4_NUMERIC_BASE_TYPE_SHIFT = 3, ++ FX_4_NUMERIC_ROWS_SHIFT = 8, ++ FX_4_NUMERIC_COLUMNS_SHIFT = 11, ++ FX_4_NUMERIC_COLUMN_MAJOR_MASK = 0x4000, ++ ++ /* Object types */ ++ FX_4_OBJECT_TYPE_STRING = 1, ++ ++ /* Types */ ++ FX_4_TYPE_CLASS_NUMERIC = 1, ++ FX_4_TYPE_CLASS_OBJECT = 2, ++ FX_4_TYPE_CLASS_STRUCT = 3, ++}; ++ ++static const uint32_t fx_4_numeric_base_types[] = ++{ ++ [HLSL_TYPE_HALF ] = FX_4_NUMERIC_TYPE_FLOAT, ++ [HLSL_TYPE_FLOAT] = FX_4_NUMERIC_TYPE_FLOAT, ++ [HLSL_TYPE_INT ] = FX_4_NUMERIC_TYPE_INT, ++ [HLSL_TYPE_UINT ] = FX_4_NUMERIC_TYPE_UINT, ++ [HLSL_TYPE_BOOL ] = FX_4_NUMERIC_TYPE_BOOL, + }; + + static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) + { +- static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; +- static const unsigned int NUMERIC_ROWS_SHIFT = 8; +- static const unsigned int NUMERIC_COLUMNS_SHIFT = 11; +- static const unsigned int NUMERIC_COLUMN_MAJOR_MASK = 0x4000; + static const uint32_t numeric_type_class[] = + { +- [HLSL_CLASS_SCALAR] = 1, +- [HLSL_CLASS_VECTOR] = 2, +- [HLSL_CLASS_MATRIX] = 3, ++ [HLSL_CLASS_SCALAR] = FX_4_NUMERIC_CLASS_SCALAR, ++ [HLSL_CLASS_VECTOR] = FX_4_NUMERIC_CLASS_VECTOR, ++ [HLSL_CLASS_MATRIX] = FX_4_NUMERIC_CLASS_MATRIX, + }; + struct hlsl_ctx *ctx = fx->ctx; + uint32_t value = 0; +@@ -497,20 +542,21 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, switch (type->e.numeric.type) { case HLSL_TYPE_FLOAT: @@ -2501,7 +2866,25 @@ index a1d1fd6572f..cc18857a010 100644 case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: -@@ -564,17 +577,32 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) +- value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); ++ value |= (fx_4_numeric_base_types[type->e.numeric.type] << FX_4_NUMERIC_BASE_TYPE_SHIFT); + break; + default: + hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); + return 0; + } + +- value |= (type->dimy & 0x7) << NUMERIC_ROWS_SHIFT; +- value |= (type->dimx & 0x7) << NUMERIC_COLUMNS_SHIFT; ++ value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; ++ value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) +- value |= NUMERIC_COLUMN_MAJOR_MASK; ++ value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK; + + return value; + } +@@ -564,17 +610,32 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_VERTEX_SHADER: return "VertexShader"; @@ -2534,7 +2917,7 @@ index a1d1fd6572f..cc18857a010 100644 static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) { struct field_offsets -@@ -584,43 +612,41 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -584,48 +645,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co uint32_t offset; uint32_t type; }; @@ -2588,7 +2971,24 @@ index a1d1fd6572f..cc18857a010 100644 { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: -@@ -652,6 +678,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_MATRIX: +- put_u32_unaligned(buffer, 1); ++ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_NUMERIC); + break; + + case HLSL_CLASS_DEPTH_STENCIL_STATE: +@@ -643,15 +702,16 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STRING: +- put_u32_unaligned(buffer, 2); ++ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_OBJECT); + break; + + case HLSL_CLASS_STRUCT: +- put_u32_unaligned(buffer, 3); ++ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_STRUCT); + break; case HLSL_CLASS_ARRAY: case HLSL_CLASS_EFFECT_GROUP: @@ -2596,7 +2996,7 @@ index a1d1fd6572f..cc18857a010 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: -@@ -659,32 +686,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -659,32 +719,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co vkd3d_unreachable(); case HLSL_CLASS_VOID: @@ -2640,7 +3040,7 @@ index a1d1fd6572f..cc18857a010 100644 { const struct field_offsets *field = &field_offsets[i]; -@@ -700,7 +727,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -700,7 +760,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, 0); /* Interface count */ } } @@ -2649,7 +3049,7 @@ index a1d1fd6572f..cc18857a010 100644 { static const uint32_t texture_type[] = { -@@ -716,13 +743,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -716,13 +776,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, }; @@ -2666,7 +3066,7 @@ index a1d1fd6572f..cc18857a010 100644 { static const uint32_t uav_type[] = { -@@ -735,60 +762,60 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -735,60 +795,60 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, }; @@ -2711,7 +3111,8 @@ index a1d1fd6572f..cc18857a010 100644 - else if (type->class == HLSL_CLASS_STRING) + else if (element_type->class == HLSL_CLASS_STRING) { - put_u32_unaligned(buffer, 1); +- put_u32_unaligned(buffer, 1); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_STRING); } - else if (hlsl_is_numeric_type(type)) + else if (hlsl_is_numeric_type(element_type)) @@ -2742,7 +3143,7 @@ index a1d1fd6572f..cc18857a010 100644 set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); } -@@ -963,16 +990,16 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n +@@ -963,16 +1023,16 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_context *fx) { @@ -2763,7 +3164,7 @@ index a1d1fd6572f..cc18857a010 100644 LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) { -@@ -980,47 +1007,128 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex +@@ -980,47 +1040,128 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex ++count; } @@ -2883,10 +3284,10 @@ index a1d1fd6572f..cc18857a010 100644 + vkd3d_unreachable(); + } + id = fx->object_variable_count++; -+ -+ put_u32(&fx->unstructured, id); - return size; ++ put_u32(&fx->unstructured, id); ++ + put_u32(buffer, id); + put_u32(buffer, size); + if (size) @@ -2917,7 +3318,7 @@ index a1d1fd6572f..cc18857a010 100644 /* Note that struct fields must all be numeric; * this was validated in check_invalid_object_fields(). */ -@@ -1030,21 +1138,20 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f +@@ -1030,21 +1171,20 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: case HLSL_CLASS_STRUCT: @@ -2949,7 +3350,7 @@ index a1d1fd6572f..cc18857a010 100644 break; } -@@ -1070,6 +1177,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type +@@ -1070,6 +1210,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type return is_type_supported_fx_2(ctx, type->e.array.type, loc); case HLSL_CLASS_TEXTURE: @@ -2957,7 +3358,7 @@ index a1d1fd6572f..cc18857a010 100644 switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: -@@ -1083,9 +1191,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type +@@ -1083,9 +1224,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type } break; @@ -2970,7 +3371,7 @@ index a1d1fd6572f..cc18857a010 100644 case HLSL_CLASS_VERTEX_SHADER: hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); return false; -@@ -1104,6 +1213,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type +@@ -1104,6 +1246,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type return false; case HLSL_CLASS_EFFECT_GROUP: @@ -2978,7 +3379,7 @@ index a1d1fd6572f..cc18857a010 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: -@@ -1117,8 +1227,8 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type +@@ -1117,8 +1260,8 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type static void write_fx_2_parameters(struct fx_write_context *fx) { @@ -2988,7 +3389,7 @@ index a1d1fd6572f..cc18857a010 100644 struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_var *var; enum fx_2_parameter_flags -@@ -1138,23 +1248,35 @@ static void write_fx_2_parameters(struct fx_write_context *fx) +@@ -1138,23 +1281,35 @@ static void write_fx_2_parameters(struct fx_write_context *fx) if (var->storage_modifiers & HLSL_STORAGE_SHARED) flags |= IS_SHARED; @@ -3030,7 +3431,7 @@ index a1d1fd6572f..cc18857a010 100644 }; static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) -@@ -1180,19 +1302,18 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -1180,19 +1335,18 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) object_count = put_u32(structured, 0); write_fx_2_parameters(&fx); @@ -3057,7 +3458,7 @@ index a1d1fd6572f..cc18857a010 100644 size = align(fx.unstructured.size, 4); set_u32(&buffer, offset, size); -@@ -1201,6 +1322,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -1201,6 +1355,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data); @@ -3065,7 +3466,7 @@ index a1d1fd6572f..cc18857a010 100644 if (!fx.technique_count) hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); -@@ -1252,6 +1374,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl +@@ -1252,6 +1407,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl switch (type->e.numeric.type) { case HLSL_TYPE_FLOAT: @@ -3073,7 +3474,16 @@ index a1d1fd6572f..cc18857a010 100644 case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: -@@ -1438,11 +1561,14 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s +@@ -1420,7 +1576,7 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: +- type = fx_4_numeric_base_type[data_type->e.numeric.type]; ++ type = fx_4_numeric_base_types[data_type->e.numeric.type]; + break; + default: + type = 0; +@@ -1438,11 +1594,14 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, struct fx_write_context *fx) { @@ -3091,7 +3501,7 @@ index a1d1fd6572f..cc18857a010 100644 put_u32(buffer, entry->name_id); put_u32(buffer, entry->lhs_index); -@@ -1453,7 +1579,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl +@@ -1453,7 +1612,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl { case HLSL_IR_CONSTANT: { @@ -3100,7 +3510,7 @@ index a1d1fd6572f..cc18857a010 100644 value_offset = write_fx_4_state_numeric_value(c, fx); assignment_type = 1; -@@ -1461,15 +1587,71 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl +@@ -1461,15 +1620,71 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl } case HLSL_IR_LOAD: { @@ -3175,7 +3585,7 @@ index a1d1fd6572f..cc18857a010 100644 default: hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); } -@@ -2118,7 +2300,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, +@@ -2118,7 +2333,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, const struct function_component *comp = &components[i]; unsigned int arg_index = (i + 1) % entry->args_count; block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, @@ -3184,7 +3594,7 @@ index a1d1fd6572f..cc18857a010 100644 } hlsl_free_state_block_entry(entry); -@@ -2126,7 +2308,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, +@@ -2126,7 +2341,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, } /* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState @@ -3193,7 +3603,7 @@ index a1d1fd6572f..cc18857a010 100644 static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, unsigned int entry_index, struct fx_write_context *fx) { -@@ -2140,7 +2322,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * +@@ -2140,7 +2355,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * if (type->class != HLSL_CLASS_BLEND_STATE) return 1; @@ -3202,7 +3612,7 @@ index a1d1fd6572f..cc18857a010 100644 return 1; if (entry->lhs_has_index) return 1; -@@ -2164,7 +2346,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * +@@ -2164,7 +2379,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * for (i = 1; i < array_size; ++i) { block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, @@ -3211,7 +3621,7 @@ index a1d1fd6572f..cc18857a010 100644 } return array_size; -@@ -2401,6 +2583,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx +@@ -2401,6 +2616,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx size = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -3221,8 +3631,515 @@ index a1d1fd6572f..cc18857a010 100644 if (var->buffer != b) continue; +@@ -2629,3 +2847,506 @@ int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + vkd3d_unreachable(); + } + } ++ ++struct fx_parser ++{ ++ const uint8_t *ptr, *start, *end; ++ struct vkd3d_shader_message_context *message_context; ++ struct vkd3d_string_buffer buffer; ++ unsigned int indent; ++ struct ++ { ++ const uint8_t *ptr; ++ const uint8_t *end; ++ uint32_t size; ++ } unstructured; ++ uint32_t buffer_count; ++ uint32_t object_count; ++ bool failed; ++}; ++ ++static uint32_t fx_parser_read_u32(struct fx_parser *parser) ++{ ++ uint32_t ret; ++ ++ if ((parser->end - parser->ptr) < sizeof(uint32_t)) ++ { ++ parser->failed = true; ++ return 0; ++ } ++ ++ ret = *(uint32_t *)parser->ptr; ++ parser->ptr += sizeof(uint32_t); ++ ++ return ret; ++} ++ ++static void fx_parser_read_u32s(struct fx_parser *parser, void *dst, size_t size) ++{ ++ uint32_t *ptr = dst; ++ size_t i; ++ ++ for (i = 0; i < size / sizeof(uint32_t); ++i) ++ ptr[i] = fx_parser_read_u32(parser); ++} ++ ++static void fx_parser_skip(struct fx_parser *parser, size_t size) ++{ ++ if ((parser->end - parser->ptr) < size) ++ { ++ parser->ptr = parser->end; ++ parser->failed = true; ++ return; ++ } ++ parser->ptr += size; ++} ++ ++static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, enum vkd3d_shader_error error, ++ const char *format, ...) ++{ ++ va_list args; ++ ++ va_start(args, format); ++ vkd3d_shader_verror(parser->message_context, NULL, error, format, args); ++ va_end(args); ++ ++ parser->failed = true; ++} ++ ++static int fx_2_parse(struct fx_parser *parser) ++{ ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); ++ ++ return -1; ++} ++ ++static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) ++{ ++ const uint8_t *ptr = parser->unstructured.ptr; ++ ++ memset(dst, 0, size); ++ if (offset >= parser->unstructured.size ++ || size > parser->unstructured.size - offset) ++ { ++ parser->failed = true; ++ return; ++ } ++ ++ ptr += offset; ++ memcpy(dst, ptr, size); ++} ++ ++static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset) ++{ ++ const uint8_t *ptr = parser->unstructured.ptr; ++ const uint8_t *end = parser->unstructured.end; ++ ++ if (offset >= parser->unstructured.size) ++ { ++ parser->failed = true; ++ return ""; ++ } ++ ++ ptr += offset; ++ ++ while (ptr < end && *ptr) ++ ++ptr; ++ ++ if (*ptr) ++ { ++ parser->failed = true; ++ return ""; ++ } ++ ++ return (const char *)(parser->unstructured.ptr + offset); ++} ++ ++static void parse_fx_start_indent(struct fx_parser *parser) ++{ ++ ++parser->indent; ++} ++ ++static void parse_fx_end_indent(struct fx_parser *parser) ++{ ++ --parser->indent; ++} ++ ++static void parse_fx_print_indent(struct fx_parser *parser) ++{ ++ vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); ++} ++ ++static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset, ++ const struct fx_4_binary_type *type) ++{ ++ unsigned int base_type, comp_count; ++ size_t i; ++ ++ base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; ++ ++ comp_count = type->packed_size / sizeof(uint32_t); ++ for (i = 0; i < comp_count; ++i) ++ { ++ union hlsl_constant_value_component value; ++ ++ fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); ++ ++ if (base_type == FX_4_NUMERIC_TYPE_FLOAT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); ++ else if (base_type == FX_4_NUMERIC_TYPE_INT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); ++ else if (base_type == FX_4_NUMERIC_TYPE_UINT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); ++ else if (base_type == FX_4_NUMERIC_TYPE_BOOL) ++ vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); ++ else ++ vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); ++ ++ if (i < comp_count - 1) ++ vkd3d_string_buffer_printf(&parser->buffer, ", "); ++ ++ offset += sizeof(uint32_t); ++ } ++} ++ ++static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) ++{ ++ const char *str = fx_4_get_string(parser, offset); ++ vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); ++} ++ ++static void fx_parse_fx_4_annotations(struct fx_parser *parser) ++{ ++ struct fx_4_annotation ++ { ++ uint32_t name; ++ uint32_t type; ++ } var; ++ struct fx_4_binary_type type; ++ const char *name, *type_name; ++ uint32_t count, i, value; ++ ++ count = fx_parser_read_u32(parser); ++ ++ if (!count) ++ return; ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "<\n"); ++ parse_fx_start_indent(parser); ++ ++ for (i = 0; i < count; ++i) ++ { ++ fx_parser_read_u32s(parser, &var, sizeof(var)); ++ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); ++ ++ name = fx_4_get_string(parser, var.name); ++ type_name = fx_4_get_string(parser, type.name); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); ++ vkd3d_string_buffer_printf(&parser->buffer, " = "); ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "{ "); ++ ++ if (type.class == FX_4_TYPE_CLASS_NUMERIC) ++ { ++ value = fx_parser_read_u32(parser); ++ parse_fx_4_numeric_value(parser, value, &type); ++ } ++ else if (type.class == FX_4_TYPE_CLASS_OBJECT && type.typeinfo == FX_4_OBJECT_TYPE_STRING) ++ { ++ uint32_t element_count = max(type.element_count, 1); ++ ++ for (uint32_t j = 0; j < element_count; ++j) ++ { ++ value = fx_parser_read_u32(parser); ++ fx_4_parse_string_initializer(parser, value); ++ if (j < element_count - 1) ++ vkd3d_string_buffer_printf(&parser->buffer, ", "); ++ } ++ } ++ else ++ { ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, ++ "Only numeric and string types are supported in annotations.\n"); ++ } ++ ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, " }"); ++ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); ++ } ++ parse_fx_end_indent(parser); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, ">"); ++} ++ ++ ++static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) ++{ ++ struct fx_4_numeric_variable ++ { ++ uint32_t name; ++ uint32_t type; ++ uint32_t semantic; ++ uint32_t offset; ++ uint32_t value; ++ uint32_t flags; ++ } var; ++ const char *name, *semantic, *type_name; ++ struct fx_4_binary_type type; ++ uint32_t i; ++ ++ for (i = 0; i < count; ++i) ++ { ++ fx_parser_read_u32s(parser, &var, sizeof(var)); ++ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); ++ ++ name = fx_4_get_string(parser, var.name); ++ type_name = fx_4_get_string(parser, type.name); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, " %s %s", type_name, name); ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); ++ ++ if (var.semantic) ++ { ++ semantic = fx_4_get_string(parser, var.semantic); ++ vkd3d_string_buffer_printf(&parser->buffer, " : %s", semantic); ++ } ++ fx_parse_fx_4_annotations(parser); ++ ++ if (var.value) ++ { ++ vkd3d_string_buffer_printf(&parser->buffer, " = { "); ++ parse_fx_4_numeric_value(parser, var.value, &type); ++ vkd3d_string_buffer_printf(&parser->buffer, " }"); ++ } ++ vkd3d_string_buffer_printf(&parser->buffer, "; // Offset: %u, size %u.\n", var.offset, type.unpacked_size); ++ } ++} ++ ++static void fx_parse_buffers(struct fx_parser *parser) ++{ ++ struct fx_buffer ++ { ++ uint32_t name; ++ uint32_t size; ++ uint32_t flags; ++ uint32_t count; ++ uint32_t bind_point; ++ } buffer; ++ const char *name; ++ uint32_t i; ++ ++ if (parser->failed) ++ return; ++ ++ for (i = 0; i < parser->buffer_count; ++i) ++ { ++ fx_parser_read_u32s(parser, &buffer, sizeof(buffer)); ++ ++ name = fx_4_get_string(parser, buffer.name); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s", name); ++ fx_parse_fx_4_annotations(parser); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); ++ parse_fx_start_indent(parser); ++ fx_parse_fx_4_numeric_variables(parser, buffer.count); ++ parse_fx_end_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); ++ } ++} ++ ++static void fx_4_parse_objects(struct fx_parser *parser) ++{ ++ struct fx_4_object_variable ++ { ++ uint32_t name; ++ uint32_t type; ++ uint32_t semantic; ++ uint32_t bind_point; ++ } var; ++ uint32_t i, j, value, element_count; ++ struct fx_4_binary_type type; ++ const char *name, *type_name; ++ ++ if (parser->failed) ++ return; ++ ++ for (i = 0; i < parser->object_count; ++i) ++ { ++ fx_parser_read_u32s(parser, &var, sizeof(var)); ++ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); ++ ++ name = fx_4_get_string(parser, var.name); ++ type_name = fx_4_get_string(parser, type.name); ++ vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); ++ vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); ++ ++ element_count = max(type.element_count, 1); ++ for (j = 0; j < element_count; ++j) ++ { ++ switch (type.typeinfo) ++ { ++ case FX_4_OBJECT_TYPE_STRING: ++ vkd3d_string_buffer_printf(&parser->buffer, " "); ++ value = fx_parser_read_u32(parser); ++ fx_4_parse_string_initializer(parser, value); ++ break; ++ default: ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, ++ "Parsing object type %u is not implemented.\n", type.typeinfo); ++ return; ++ } ++ vkd3d_string_buffer_printf(&parser->buffer, ",\n"); ++ } ++ vkd3d_string_buffer_printf(&parser->buffer, "};\n"); ++ } ++} ++ ++static int fx_4_parse(struct fx_parser *parser) ++{ ++ struct fx_4_header ++ { ++ uint32_t version; ++ uint32_t buffer_count; ++ uint32_t numeric_variable_count; ++ uint32_t object_count; ++ uint32_t shared_buffer_count; ++ uint32_t shared_numeric_variable_count; ++ uint32_t shared_object_count; ++ uint32_t technique_count; ++ uint32_t unstructured_size; ++ uint32_t string_count; ++ uint32_t texture_count; ++ uint32_t depth_stencil_state_count; ++ uint32_t blend_state_count; ++ uint32_t rasterizer_state_count; ++ uint32_t sampler_state_count; ++ uint32_t rtv_count; ++ uint32_t dsv_count; ++ uint32_t shader_count; ++ uint32_t inline_shader_count; ++ } header; ++ ++ fx_parser_read_u32s(parser, &header, sizeof(header)); ++ parser->buffer_count = header.buffer_count; ++ parser->object_count = header.object_count; ++ ++ if (parser->end - parser->ptr < header.unstructured_size) ++ { ++ parser->failed = true; ++ return -1; ++ } ++ ++ parser->unstructured.ptr = parser->ptr; ++ parser->unstructured.end = parser->ptr + header.unstructured_size; ++ parser->unstructured.size = header.unstructured_size; ++ fx_parser_skip(parser, header.unstructured_size); ++ ++ fx_parse_buffers(parser); ++ fx_4_parse_objects(parser); ++ ++ return parser->failed ? - 1 : 0; ++} ++ ++static int fx_5_parse(struct fx_parser *parser) ++{ ++ struct fx_5_header ++ { ++ uint32_t version; ++ uint32_t buffer_count; ++ uint32_t numeric_variable_count; ++ uint32_t object_count; ++ uint32_t shared_buffer_count; ++ uint32_t shared_numeric_variable_count; ++ uint32_t shared_object_count; ++ uint32_t technique_count; ++ uint32_t unstructured_size; ++ uint32_t string_count; ++ uint32_t texture_count; ++ uint32_t depth_stencil_state_count; ++ uint32_t blend_state_count; ++ uint32_t rasterizer_state_count; ++ uint32_t sampler_state_count; ++ uint32_t rtv_count; ++ uint32_t dsv_count; ++ uint32_t shader_count; ++ uint32_t inline_shader_count; ++ uint32_t group_count; ++ uint32_t uav_count; ++ uint32_t interface_variable_count; ++ uint32_t interface_variable_element_count; ++ uint32_t class_instance_element_count; ++ } header; ++ ++ fx_parser_read_u32s(parser, &header, sizeof(header)); ++ parser->buffer_count = header.buffer_count; ++ parser->object_count = header.object_count; ++ ++ if (parser->end - parser->ptr < header.unstructured_size) ++ { ++ parser->failed = true; ++ return -1; ++ } ++ ++ parser->unstructured.ptr = parser->ptr; ++ parser->unstructured.end = parser->ptr + header.unstructured_size; ++ parser->unstructured.size = header.unstructured_size; ++ fx_parser_skip(parser, header.unstructured_size); ++ ++ fx_parse_buffers(parser); ++ fx_4_parse_objects(parser); ++ ++ return parser->failed ? - 1 : 0; ++} ++ ++int fx_parse(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++{ ++ struct fx_parser parser = ++ { ++ .start = compile_info->source.code, ++ .ptr = compile_info->source.code, ++ .end = (uint8_t *)compile_info->source.code + compile_info->source.size, ++ .message_context = message_context, ++ }; ++ uint32_t version; ++ int ret; ++ ++ vkd3d_string_buffer_init(&parser.buffer); ++ ++ if (parser.end - parser.start < sizeof(version)) ++ return -1; ++ version = *(uint32_t *)parser.ptr; ++ ++ switch (version) ++ { ++ case 0xfeff0901: ++ ret = fx_2_parse(&parser); ++ break; ++ case 0xfeff1001: ++ case 0xfeff1011: ++ ret = fx_4_parse(&parser); ++ break; ++ case 0xfeff2001: ++ ret = fx_5_parse(&parser); ++ break; ++ default: ++ fx_parser_error(&parser, VKD3D_SHADER_ERROR_FX_INVALID_VERSION, ++ "Invalid effect binary version value 0x%08x.", version); ++ ret = -1; ++ } ++ ++ vkd3d_shader_code_from_string_buffer(out, &parser.buffer); ++ ++ return ret; ++} diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index d1f02ab568b..c2fb58c55e6 100644 +index d1f02ab568b..46515818d07 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -18,6 +18,13 @@ @@ -3233,13 +4150,13 @@ index d1f02ab568b..c2fb58c55e6 100644 +{ + size_t coord_size; + bool shadow; -+ const char *sampler_type; ++ const char *type_suffix; +}; + struct glsl_src { struct vkd3d_string_buffer *str; -@@ -38,7 +45,21 @@ struct vkd3d_glsl_generator +@@ -38,9 +45,26 @@ struct vkd3d_glsl_generator struct vkd3d_shader_location location; struct vkd3d_shader_message_context *message_context; unsigned int indent; @@ -3260,8 +4177,13 @@ index d1f02ab568b..c2fb58c55e6 100644 + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; }; ++static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_src_param *rel_addr, unsigned int offset); ++ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( -@@ -53,11 +74,102 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( + struct vkd3d_glsl_generator *generator, + enum vkd3d_shader_error error, const char *fmt, ...) +@@ -53,11 +77,110 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( generator->failed = true; } @@ -3290,17 +4212,17 @@ index d1f02ab568b..c2fb58c55e6 100644 +{ + static const struct glsl_resource_type_info info[] = + { -+ {0, 0, "samplerNone"}, /* VKD3D_SHADER_RESOURCE_NONE */ -+ {1, 0, "samplerBuffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ -+ {1, 1, "sampler1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ -+ {2, 1, "sampler2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ -+ {2, 0, "sampler2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ -+ {3, 0, "sampler3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ -+ {3, 1, "samplerCube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ -+ {2, 1, "sampler1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ -+ {3, 1, "sampler2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ -+ {3, 0, "sampler2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ -+ {4, 1, "samplerCubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ ++ {0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ ++ {1, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ ++ {1, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ ++ {2, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ ++ {2, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ ++ {3, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ ++ {3, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ ++ {2, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ ++ {3, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ ++ {3, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ ++ {4, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ + }; + + if (!t || t >= ARRAY_SIZE(info)) @@ -3360,11 +4282,19 @@ index d1f02ab568b..c2fb58c55e6 100644 + vkd3d_string_buffer_printf(buffer, "_%u", sampler_space); + } +} ++ ++static void shader_glsl_print_image_name(struct vkd3d_string_buffer *buffer, ++ struct vkd3d_glsl_generator *gen, unsigned int idx, unsigned int space) ++{ ++ vkd3d_string_buffer_printf(buffer, "%s_image_%u", gen->prefix, idx); ++ if (space) ++ vkd3d_string_buffer_printf(buffer, "_%u", space); ++} + static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) { -@@ -67,6 +179,90 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, +@@ -67,6 +190,95 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); break; @@ -3451,14 +4381,21 @@ index d1f02ab568b..c2fb58c55e6 100644 + vkd3d_string_buffer_printf(buffer, "%s_cb_%u[%u]", + gen->prefix, reg->idx[0].offset, reg->idx[2].offset); + break; ++ ++ case VKD3DSPR_IDXTEMP: ++ vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); ++ shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); ++ break; + default: vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled register type %#x.", reg->type); -@@ -106,23 +302,112 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca +@@ -106,23 +318,118 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca vkd3d_string_buffer_release(cache, src->str); } +-static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, +- const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) +static void shader_glsl_print_bitcast(struct vkd3d_string_buffer *dst, struct vkd3d_glsl_generator *gen, + const char *src, enum vkd3d_data_type dst_data_type, enum vkd3d_data_type src_data_type, unsigned int size) +{ @@ -3512,15 +4449,15 @@ index d1f02ab568b..c2fb58c55e6 100644 + vkd3d_string_buffer_printf(dst, "%s", src); +} + - static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, - const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) ++static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask, enum vkd3d_data_type data_type) { const struct vkd3d_shader_register *reg = &vsir_src->reg; + struct vkd3d_string_buffer *register_name, *str; + enum vkd3d_data_type src_data_type; + unsigned int size; - glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); +- glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + register_name = vkd3d_string_buffer_get(&gen->string_buffers); if (reg->non_uniform) @@ -3539,12 +4476,12 @@ index d1f02ab568b..c2fb58c55e6 100644 + shader_glsl_print_register_name(register_name, gen, reg); + + if (!vsir_src->modifiers) -+ str = glsl_src->str; ++ str = buffer; + else + str = vkd3d_string_buffer_get(&gen->string_buffers); + + size = reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1; -+ shader_glsl_print_bitcast(str, gen, register_name->buffer, reg->data_type, src_data_type, size); ++ shader_glsl_print_bitcast(str, gen, register_name->buffer, data_type, src_data_type, size); if (reg->dimension == VSIR_DIMENSION_VEC4) - shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); + shader_glsl_print_swizzle(str, vsir_src->swizzle, mask); @@ -3554,35 +4491,62 @@ index d1f02ab568b..c2fb58c55e6 100644 + case VKD3DSPSM_NONE: + break; + case VKD3DSPSM_NEG: -+ vkd3d_string_buffer_printf(glsl_src->str, "-%s", str->buffer); ++ vkd3d_string_buffer_printf(buffer, "-%s", str->buffer); + break; + case VKD3DSPSM_ABS: -+ vkd3d_string_buffer_printf(glsl_src->str, "abs(%s)", str->buffer); ++ vkd3d_string_buffer_printf(buffer, "abs(%s)", str->buffer); + break; + default: -+ vkd3d_string_buffer_printf(glsl_src->str, "(%s)", ++ vkd3d_string_buffer_printf(buffer, "(%s)", + vsir_src->modifiers, str->buffer); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + break; + } + -+ if (str != glsl_src->str) ++ if (str != buffer) + vkd3d_string_buffer_release(&gen->string_buffers, str); + vkd3d_string_buffer_release(&gen->string_buffers, register_name); ++} ++ ++static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) ++{ ++ glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); ++ shader_glsl_print_src(glsl_src->str, gen, vsir_src, mask, vsir_src->reg.data_type); } static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) -@@ -153,26 +438,69 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener +@@ -153,26 +460,89 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener return write_mask; } -static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( - struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) -+static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_glsl_generator *gen, -+ struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, va_list args) ++static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_src_param *rel_addr, unsigned int offset) { - va_list args; ++ struct glsl_src r; ++ ++ if (!rel_addr) ++ { ++ vkd3d_string_buffer_printf(buffer, "[%u]", offset); ++ return; ++ } ++ ++ glsl_src_init(&r, gen, rel_addr, VKD3DSP_WRITEMASK_0); ++ vkd3d_string_buffer_printf(buffer, "[%s", r.str->buffer); ++ if (offset) ++ vkd3d_string_buffer_printf(buffer, " + %u", offset); ++ else ++ vkd3d_string_buffer_printf(buffer, "]"); ++ glsl_src_cleanup(&r, &gen->string_buffers); ++} ++ ++static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_glsl_generator *gen, ++ struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, va_list args) ++{ + struct vkd3d_string_buffer *buffer = gen->buffer; + uint32_t modifiers = dst->vsir->modifiers; + bool close = true; @@ -3652,36 +4616,52 @@ index d1f02ab568b..c2fb58c55e6 100644 } static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -@@ -183,6 +511,319 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct +@@ -183,138 +553,1671 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } +-static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +static void shader_glsl_binop(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *op) -+{ + { +- struct glsl_src src; + struct glsl_src src[2]; -+ struct glsl_dst dst; -+ uint32_t mask; -+ -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); +- glsl_src_init(&src, gen, &ins->src[0], mask); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); -+ + +- shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); + shader_glsl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); -+ + +- glsl_src_cleanup(&src, &gen->string_buffers); + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ + glsl_dst_cleanup(&dst, &gen->string_buffers); + } + +-static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +static void shader_glsl_dot(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, uint32_t src_mask) -+{ + { +- const struct vkd3d_shader_version *version = &gen->program->shader_version; + unsigned int component_count; + struct glsl_src src[2]; + struct glsl_dst dst; + uint32_t dst_mask; -+ + +- /* +- * TODO: Implement in_subroutine +- * TODO: shader_glsl_generate_shader_epilogue(generator); +- */ +- if (version->major >= 4) +- { +- shader_glsl_print_indent(gen->buffer, gen->indent); +- vkd3d_string_buffer_printf(gen->buffer, "return;\n"); +- } + dst_mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src[0], gen, &ins->src[0], src_mask); + glsl_src_init(&src[1], gen, &ins->src[1], src_mask); @@ -3696,40 +4676,63 @@ index d1f02ab568b..c2fb58c55e6 100644 + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ + } + +-static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, +- const struct vkd3d_shader_instruction *ins) +static void shader_glsl_intrinsic(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *op) -+{ + { +- gen->location = ins->location; + struct vkd3d_string_buffer *args; + struct glsl_src src; + struct glsl_dst dst; + unsigned int i; + uint32_t mask; -+ + +- switch (ins->opcode) + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + args = vkd3d_string_buffer_get(&gen->string_buffers); + + for (i = 0; i < ins->src_count; ++i) -+ { + { +- case VKD3DSIH_DCL_INPUT: +- case VKD3DSIH_DCL_OUTPUT: +- case VKD3DSIH_DCL_OUTPUT_SIV: +- case VKD3DSIH_NOP: +- break; +- case VKD3DSIH_MOV: +- shader_glsl_mov(gen, ins); +- break; +- case VKD3DSIH_RET: +- shader_glsl_ret(gen, ins); +- break; +- default: +- shader_glsl_unhandled(gen, ins); +- break; + glsl_src_init(&src, gen, &ins->src[i], mask); + vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer); + glsl_src_cleanup(&src, &gen->string_buffers); -+ } + } + shader_glsl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, args); + glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ + } + +-static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) +static void shader_glsl_relop(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *scalar_op, const char *vector_op) -+{ + { +- const struct vsir_program *program = gen->program; +- struct vkd3d_string_buffer *buffer = gen->buffer; + unsigned int mask_size; + struct glsl_src src[2]; + struct glsl_dst dst; + uint32_t mask; -+ + +- if (program->temp_count) +- vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src[0], gen, &ins->src[0], mask); + glsl_src_init(&src[1], gen, &ins->src[1], mask); @@ -3744,60 +4747,161 @@ index d1f02ab568b..c2fb58c55e6 100644 + glsl_src_cleanup(&src[1], &gen->string_buffers); + glsl_src_cleanup(&src[0], &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ + } + +-static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) +static void shader_glsl_cast(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins, + const char *scalar_constructor, const char *vector_constructor) -+{ + { +- const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; +- struct vkd3d_string_buffer *buffer = gen->buffer; +- unsigned int i; +- void *code; +- +- MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + unsigned int component_count; + struct glsl_src src; + struct glsl_dst dst; + uint32_t mask; -+ + +- vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src, gen, &ins->src[0], mask); -+ + +- vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + if ((component_count = vsir_write_mask_component_count(mask)) > 1) + shader_glsl_print_assignment(gen, &dst, "%s%u(%s)", + vector_constructor, component_count, src.str->buffer); + else + shader_glsl_print_assignment(gen, &dst, "%s(%s)", + scalar_constructor, src.str->buffer); -+ + +- shader_glsl_generate_declarations(gen); + glsl_src_cleanup(&src, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} -+ + +- vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); ++static void shader_glsl_end_block(struct vkd3d_glsl_generator *gen) ++{ ++ --gen->indent; ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "}\n"); ++} + ++static void shader_glsl_begin_block(struct vkd3d_glsl_generator *gen) ++{ ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "{\n"); + ++gen->indent; +- for (i = 0; i < instructions->count; ++i) +- { +- vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); +- } ++} + +- vkd3d_string_buffer_printf(buffer, "}\n"); +static void shader_glsl_if(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + const char *condition; + struct glsl_src src; + +- if (TRACE_ON()) +- vkd3d_string_buffer_trace(buffer); ++ glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); + +- if (gen->failed) +- return VKD3D_ERROR_INVALID_SHADER; ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; ++ vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); + +- if ((code = vkd3d_malloc(buffer->buffer_size))) +- { +- memcpy(code, buffer->buffer, buffer->content_size); +- out->size = buffer->content_size; +- out->code = code; +- } +- else return VKD3D_ERROR_OUT_OF_MEMORY; ++ glsl_src_cleanup(&src, &gen->string_buffers); + +- return VKD3D_OK; ++ shader_glsl_begin_block(gen); + } + +-static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) ++static void shader_glsl_else(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) + { +- vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); +- vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); ++ shader_glsl_end_block(gen); ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "else\n"); ++ shader_glsl_begin_block(gen); + } + +-static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, +- struct vsir_program *program, struct vkd3d_shader_message_context *message_context) ++static void shader_glsl_loop(struct vkd3d_glsl_generator *gen) + { +- memset(gen, 0, sizeof(*gen)); +- gen->program = program; +- vkd3d_string_buffer_cache_init(&gen->string_buffers); +- gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); +- gen->message_context = message_context; ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "for (;;)\n"); ++ shader_glsl_begin_block(gen); + } + +-int glsl_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, +- struct vkd3d_shader_message_context *message_context) ++static void shader_glsl_break(struct vkd3d_glsl_generator *gen) + { +- struct vkd3d_glsl_generator generator; +- int ret; ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "break;\n"); ++} + +- if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) +- return ret; ++static void shader_glsl_continue(struct vkd3d_glsl_generator *gen) ++{ ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "continue;\n"); ++} ++ ++static void shader_glsl_switch(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ struct glsl_src src; + + glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); + + shader_glsl_print_indent(gen->buffer, gen->indent); -+ condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; -+ vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); ++ vkd3d_string_buffer_printf(gen->buffer, "switch (%s)\n", src.str->buffer); ++ shader_glsl_begin_block(gen); + + glsl_src_cleanup(&src, &gen->string_buffers); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "{\n"); -+ ++gen->indent; +} + -+static void shader_glsl_else(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++static void shader_glsl_case(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ -+ unsigned int i = 4 * (gen->indent - 1); ++ struct glsl_src src; + -+ vkd3d_string_buffer_printf(gen->buffer, "%*s}\n%*selse\n%*s{\n", i, "", i, "", i, ""); ++ glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); ++ ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "case %s:\n", src.str->buffer); ++ ++ glsl_src_cleanup(&src, &gen->string_buffers); +} + -+static void shader_glsl_endif(struct vkd3d_glsl_generator *gen) ++static void shader_glsl_default(struct vkd3d_glsl_generator *gen) +{ -+ --gen->indent; + shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "}\n"); ++ vkd3d_string_buffer_printf(gen->buffer, "default:\n"); +} + +static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) @@ -3953,6 +5057,85 @@ index d1f02ab568b..c2fb58c55e6 100644 + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + ++static void shader_glsl_store_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ const struct glsl_resource_type_info *resource_type_info; ++ enum vkd3d_shader_component_type component_type; ++ const struct vkd3d_shader_descriptor_info1 *d; ++ enum vkd3d_shader_resource_type resource_type; ++ unsigned int uav_id, uav_idx, uav_space; ++ struct vkd3d_string_buffer *image_data; ++ struct glsl_src image_coord; ++ uint32_t coord_mask; ++ ++ if (ins->dst[0].reg.idx[0].rel_addr || ins->dst[0].reg.idx[1].rel_addr) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, ++ "Descriptor indexing is not supported."); ++ ++ uav_id = ins->dst[0].reg.idx[0].offset; ++ uav_idx = ins->dst[0].reg.idx[1].offset; ++ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) ++ { ++ resource_type = d->resource_type; ++ uav_space = d->register_space; ++ component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); ++ } ++ else ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); ++ uav_space = 0; ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ } ++ ++ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) ++ { ++ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); ++ } ++ else ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled UAV type %#x.", resource_type); ++ coord_mask = vkd3d_write_mask_from_component_count(2); ++ } ++ ++ glsl_src_init(&image_coord, gen, &ins->src[0], coord_mask); ++ image_data = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) ++ { ++ switch (component_type) ++ { ++ case VKD3D_SHADER_COMPONENT_UINT: ++ vkd3d_string_buffer_printf(image_data, "uvec4("); ++ break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(image_data, "ivec4("); ++ break; ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled component type %#x.", component_type); ++ /* fall through */ ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ vkd3d_string_buffer_printf(image_data, "vec4("); ++ break; ++ } ++ } ++ shader_glsl_print_src(image_data, gen, &ins->src[1], VKD3DSP_WRITEMASK_ALL, ++ vkd3d_data_type_from_component_type(component_type)); ++ if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) ++ vkd3d_string_buffer_printf(image_data, ", 0, 0, 0)"); ++ ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "imageStore("); ++ shader_glsl_print_image_name(gen->buffer, gen, uav_idx, uav_space); ++ vkd3d_string_buffer_printf(gen->buffer, ", %s, %s);\n", image_coord.str->buffer, image_data->buffer); ++ ++ vkd3d_string_buffer_release(&gen->string_buffers, image_data); ++ glsl_src_cleanup(&image_coord, &gen->string_buffers); ++} ++ +static void shader_glsl_unary_op(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const char *op) +{ @@ -3969,13 +5152,21 @@ index d1f02ab568b..c2fb58c55e6 100644 + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + - static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - struct glsl_src src; -@@ -198,16 +839,203 @@ static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d - glsl_dst_cleanup(&dst, &gen->string_buffers); - } - ++static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ struct glsl_src src; ++ struct glsl_dst dst; ++ uint32_t mask; ++ ++ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ glsl_src_init(&src, gen, &ins->src[0], mask); ++ ++ shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); ++ ++ glsl_src_cleanup(&src, &gen->string_buffers); ++ glsl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ +static void shader_glsl_movc(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + unsigned int component_count; @@ -4040,19 +5231,27 @@ index d1f02ab568b..c2fb58c55e6 100644 + switch (sysval) + { + case VKD3D_SHADER_SV_POSITION: -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL || version->type == VKD3D_SHADER_TYPE_COMPUTE) ++ if (version->type == VKD3D_SHADER_TYPE_COMPUTE) + { + vkd3d_string_buffer_printf(buffer, "", sysval); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x.", sysval); ++ "Internal compiler error: Unhandled SV_POSITION in shader type #%x.", version->type); ++ break; + } ++ if (idx) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled SV_POSITION index %u.", idx); ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); + else -+ { + vkd3d_string_buffer_printf(buffer, "gl_Position"); -+ if (idx) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_POSITION index %u.", idx); -+ } ++ break; ++ ++ case VKD3D_SHADER_SV_VERTEX_ID: ++ if (version->type != VKD3D_SHADER_TYPE_VERTEX) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled SV_VERTEX_ID in shader type #%x.", version->type); ++ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_VertexID, 0, 0, 0))"); + break; + + case VKD3D_SHADER_SV_IS_FRONT_FACE: @@ -4109,7 +5308,22 @@ index d1f02ab568b..c2fb58c55e6 100644 + } + else + { -+ vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i); ++ switch (e->component_type) ++ { ++ case VKD3D_SHADER_COMPONENT_UINT: ++ vkd3d_string_buffer_printf(buffer, " = uintBitsToFloat(shader_in_%u)", i); ++ break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, " = intBitsToFloat(shader_in_%u)", i); ++ break; ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled input component type %#x.", e->component_type); ++ /* fall through */ ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i); ++ break; ++ } + } + } + else @@ -4126,6 +5340,7 @@ index d1f02ab568b..c2fb58c55e6 100644 +{ + const struct shader_signature *signature = &gen->program->output_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; ++ enum vkd3d_shader_component_type type; + const struct signature_element *e; + unsigned int i; + @@ -4136,11 +5351,13 @@ index d1f02ab568b..c2fb58c55e6 100644 + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + ++ type = e->component_type; + shader_glsl_print_indent(buffer, gen->indent); + if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) + { + if (gen->interstage_output) + { ++ type = VKD3D_SHADER_COMPONENT_FLOAT; + vkd3d_string_buffer_printf(buffer, "shader_out.reg_%u", e->target_location); + if (e->target_location >= gen->limits.output_count) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, @@ -4160,43 +5377,83 @@ index d1f02ab568b..c2fb58c55e6 100644 + shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); + } + shader_glsl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); ++ switch (type) ++ { ++ case VKD3D_SHADER_COMPONENT_UINT: ++ vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); ++ break; ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled output component type %#x.", e->component_type); ++ /* fall through */ ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); ++ break; ++ } + shader_glsl_print_write_mask(buffer, e->mask); + vkd3d_string_buffer_printf(buffer, ";\n"); + } +} + - static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - const struct vkd3d_shader_version *version = &gen->program->shader_version; - -- /* -- * TODO: Implement in_subroutine -- * TODO: shader_glsl_generate_shader_epilogue(generator); -- */ - if (version->major >= 4) - { ++static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ const struct vkd3d_shader_version *version = &gen->program->shader_version; ++ ++ if (version->major >= 4) ++ { + shader_glsl_shader_epilogue(gen); - shader_glsl_print_indent(gen->buffer, gen->indent); - vkd3d_string_buffer_printf(gen->buffer, "return;\n"); - } -@@ -220,30 +1048,582 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - - switch (ins->opcode) - { ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "return;\n"); ++ } ++} ++ ++static void shader_glsl_dcl_indexable_temp(struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_instruction *ins) ++{ ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "vec4 x%u[%u];\n", ++ ins->declaration.indexable_temp.register_idx, ++ ins->declaration.indexable_temp.register_size); ++} ++ ++static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_instruction *ins) ++{ ++ gen->location = ins->location; ++ ++ switch (ins->opcode) ++ { + case VKD3DSIH_ADD: ++ case VKD3DSIH_IADD: + shader_glsl_binop(gen, ins, "+"); + break; + case VKD3DSIH_AND: + shader_glsl_binop(gen, ins, "&"); + break; - case VKD3DSIH_DCL_INPUT: ++ case VKD3DSIH_BREAK: ++ shader_glsl_break(gen); ++ break; ++ case VKD3DSIH_CASE: ++ shader_glsl_case(gen, ins); ++ break; ++ case VKD3DSIH_CONTINUE: ++ shader_glsl_continue(gen); ++ break; ++ case VKD3DSIH_DCL_INDEXABLE_TEMP: ++ shader_glsl_dcl_indexable_temp(gen, ins); ++ break; ++ case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_INPUT_PS: ++ case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; ++ case VKD3DSIH_DCL_INPUT_SGV: ++ case VKD3DSIH_DCL_OUTPUT: ++ case VKD3DSIH_DCL_OUTPUT_SIV: ++ case VKD3DSIH_NOP: ++ break; ++ case VKD3DSIH_DEFAULT: ++ shader_glsl_default(gen); ++ break; + case VKD3DSIH_DIV: + shader_glsl_binop(gen, ins, "/"); + break; @@ -4213,8 +5470,11 @@ index d1f02ab568b..c2fb58c55e6 100644 + shader_glsl_else(gen, ins); + break; + case VKD3DSIH_ENDIF: -+ shader_glsl_endif(gen); ++ case VKD3DSIH_ENDLOOP: ++ case VKD3DSIH_ENDSWITCH: ++ shader_glsl_end_block(gen); + break; ++ case VKD3DSIH_EQO: + case VKD3DSIH_IEQ: + shader_glsl_relop(gen, ins, "==", "equal"); + break; @@ -4242,11 +5502,26 @@ index d1f02ab568b..c2fb58c55e6 100644 + break; + case VKD3DSIH_ILT: + case VKD3DSIH_LTO: ++ case VKD3DSIH_ULT: + shader_glsl_relop(gen, ins, "<", "lessThan"); + break; ++ case VKD3DSIH_IMAX: ++ case VKD3DSIH_MAX: ++ shader_glsl_intrinsic(gen, ins, "max"); ++ break; ++ case VKD3DSIH_MIN: ++ shader_glsl_intrinsic(gen, ins, "min"); ++ break; + case VKD3DSIH_IMUL: + shader_glsl_mul_extended(gen, ins); + break; ++ case VKD3DSIH_INE: ++ case VKD3DSIH_NEU: ++ shader_glsl_relop(gen, ins, "!=", "notEqual"); ++ break; ++ case VKD3DSIH_INEG: ++ shader_glsl_unary_op(gen, ins, "-"); ++ break; + case VKD3DSIH_ISHL: + shader_glsl_binop(gen, ins, "<<"); + break; @@ -4254,16 +5529,6 @@ index d1f02ab568b..c2fb58c55e6 100644 + case VKD3DSIH_USHR: + shader_glsl_binop(gen, ins, ">>"); + break; -+ case VKD3DSIH_MAX: -+ shader_glsl_intrinsic(gen, ins, "max"); -+ break; -+ case VKD3DSIH_MIN: -+ shader_glsl_intrinsic(gen, ins, "min"); -+ break; -+ case VKD3DSIH_INE: -+ case VKD3DSIH_NEU: -+ shader_glsl_relop(gen, ins, "!=", "notEqual"); -+ break; + case VKD3DSIH_ITOF: + case VKD3DSIH_UTOF: + shader_glsl_cast(gen, ins, "float", "vec"); @@ -4274,9 +5539,12 @@ index d1f02ab568b..c2fb58c55e6 100644 + case VKD3DSIH_LOG: + shader_glsl_intrinsic(gen, ins, "log2"); + break; - case VKD3DSIH_MOV: - shader_glsl_mov(gen, ins); - break; ++ case VKD3DSIH_LOOP: ++ shader_glsl_loop(gen); ++ break; ++ case VKD3DSIH_MOV: ++ shader_glsl_mov(gen, ins); ++ break; + case VKD3DSIH_MOVC: + shader_glsl_movc(gen, ins); + break; @@ -4289,9 +5557,9 @@ index d1f02ab568b..c2fb58c55e6 100644 + case VKD3DSIH_OR: + shader_glsl_binop(gen, ins, "|"); + break; - case VKD3DSIH_RET: - shader_glsl_ret(gen, ins); - break; ++ case VKD3DSIH_RET: ++ shader_glsl_ret(gen, ins); ++ break; + case VKD3DSIH_ROUND_NE: + shader_glsl_intrinsic(gen, ins, "roundEven"); + break; @@ -4313,12 +5581,18 @@ index d1f02ab568b..c2fb58c55e6 100644 + case VKD3DSIH_SQRT: + shader_glsl_intrinsic(gen, ins, "sqrt"); + break; - default: - shader_glsl_unhandled(gen, ins); - break; - } - } - ++ case VKD3DSIH_STORE_UAV_TYPED: ++ shader_glsl_store_uav_typed(gen, ins); ++ break; ++ case VKD3DSIH_SWITCH: ++ shader_glsl_switch(gen, ins); ++ break; ++ default: ++ shader_glsl_unhandled(gen, ins); ++ break; ++ } ++} ++ +static bool shader_glsl_check_shader_visibility(const struct vkd3d_glsl_generator *gen, + enum vkd3d_shader_visibility visibility) +{ @@ -4346,6 +5620,137 @@ index d1f02ab568b..c2fb58c55e6 100644 + } +} + ++static bool shader_glsl_get_uav_binding(const struct vkd3d_glsl_generator *gen, unsigned int register_space, ++ unsigned int register_idx, enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx) ++{ ++ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; ++ const struct vkd3d_shader_resource_binding *binding; ++ enum vkd3d_shader_binding_flag resource_type_flag; ++ unsigned int i; ++ ++ if (!interface_info) ++ return false; ++ ++ resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER ++ ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; ++ ++ for (i = 0; i < interface_info->binding_count; ++i) ++ { ++ binding = &interface_info->bindings[i]; ++ ++ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) ++ continue; ++ if (binding->register_space != register_space) ++ continue; ++ if (binding->register_index != register_idx) ++ continue; ++ if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility)) ++ continue; ++ if (!(binding->flags & resource_type_flag)) ++ continue; ++ *binding_idx = i; ++ return true; ++ } ++ ++ return false; ++} ++ ++static void shader_glsl_generate_uav_declaration(struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_descriptor_info1 *uav) ++{ ++ const struct glsl_resource_type_info *resource_type_info; ++ const char *image_type_prefix, *image_type, *read_format; ++ const struct vkd3d_shader_descriptor_binding *binding; ++ const struct vkd3d_shader_descriptor_offset *offset; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ enum vkd3d_shader_component_type component_type; ++ unsigned int binding_idx; ++ ++ if (uav->count != 1) ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, ++ "UAV %u has unsupported descriptor array size %u.", uav->register_id, uav->count); ++ return; ++ } ++ ++ if (!shader_glsl_get_uav_binding(gen, uav->register_space, ++ uav->register_index, uav->resource_type, &binding_idx)) ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, ++ "No descriptor binding specified for UAV %u.", uav->register_id); ++ return; ++ } ++ ++ binding = &gen->interface_info->bindings[binding_idx].binding; ++ ++ if (binding->set != 0) ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, ++ "Unsupported binding set %u specified for UAV %u.", binding->set, uav->register_id); ++ return; ++ } ++ ++ if (binding->count != 1) ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, ++ "Unsupported binding count %u specified for UAV %u.", binding->count, uav->register_id); ++ return; ++ } ++ ++ if (gen->offset_info && gen->offset_info->binding_offsets) ++ { ++ offset = &gen->offset_info->binding_offsets[binding_idx]; ++ if (offset->static_offset || offset->dynamic_offset_index != ~0u) ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled descriptor offset specified for UAV %u.", ++ uav->register_id); ++ return; ++ } ++ } ++ ++ if ((resource_type_info = shader_glsl_get_resource_type_info(uav->resource_type))) ++ { ++ image_type = resource_type_info->type_suffix; ++ } ++ else ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled UAV type %#x.", uav->resource_type); ++ image_type = ""; ++ } ++ ++ switch ((component_type = vkd3d_component_type_from_resource_data_type(uav->resource_data_type))) ++ { ++ case VKD3D_SHADER_COMPONENT_UINT: ++ image_type_prefix = "u"; ++ read_format = "r32ui"; ++ break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ image_type_prefix = "i"; ++ read_format = "r32i"; ++ break; ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled component type %#x for UAV %u.", ++ component_type, uav->register_id); ++ /* fall through */ ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ image_type_prefix = ""; ++ read_format = "r32f"; ++ break; ++ } ++ ++ vkd3d_string_buffer_printf(buffer, "layout(binding = %u", binding->binding); ++ if (uav->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ) ++ vkd3d_string_buffer_printf(buffer, ", %s) ", read_format); ++ else ++ vkd3d_string_buffer_printf(buffer, ") writeonly "); ++ vkd3d_string_buffer_printf(buffer, "uniform %simage%s ", image_type_prefix, image_type); ++ shader_glsl_print_image_name(buffer, gen, uav->register_index, uav->register_space); ++ vkd3d_string_buffer_printf(buffer, ";\n"); ++} ++ +static bool shader_glsl_get_cbv_binding(const struct vkd3d_glsl_generator *gen, + unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) +{ @@ -4389,7 +5794,7 @@ index d1f02ab568b..c2fb58c55e6 100644 + + if (cbv->count != 1) + { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); + return; + } @@ -4514,7 +5919,7 @@ index d1f02ab568b..c2fb58c55e6 100644 + + if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type))) + { -+ sampler_type = resource_type_info->sampler_type; ++ sampler_type = resource_type_info->type_suffix; + if (shadow && !resource_type_info->shadow) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Comparison samplers are not supported with resource type %#x.", srv->resource_type); @@ -4577,7 +5982,7 @@ index d1f02ab568b..c2fb58c55e6 100644 + return; + } + -+ vkd3d_string_buffer_printf(buffer, "layout(binding = %u) uniform %s%s%s ", ++ vkd3d_string_buffer_printf(buffer, "layout(binding = %u) uniform %ssampler%s%s ", + binding->binding, sampler_type_prefix, sampler_type, shadow ? "Shadow" : ""); + shader_glsl_print_combined_sampler_name(buffer, gen, crs->resource_index, + crs->resource_space, crs->sampler_index, crs->sampler_space); @@ -4602,6 +6007,10 @@ index d1f02ab568b..c2fb58c55e6 100644 + /* GLSL uses combined resource/sampler descriptors.*/ + break; + ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: ++ shader_glsl_generate_uav_declaration(gen, descriptor); ++ break; ++ + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + shader_glsl_generate_cbv_declaration(gen, descriptor); + break; @@ -4621,15 +6030,62 @@ index d1f02ab568b..c2fb58c55e6 100644 + vkd3d_string_buffer_printf(gen->buffer, "\n"); +} + -+static void shader_glsl_generate_interface_block(struct vkd3d_string_buffer *buffer, -+ const char *type, unsigned int count) ++static const struct signature_element *signature_get_element_by_location( ++ const struct shader_signature *signature, unsigned int location) +{ ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ e = &signature->elements[i]; ++ ++ if (e->target_location != location) ++ continue; ++ ++ return e; ++ } ++ ++ return NULL; ++} ++ ++static const char *shader_glsl_get_interpolation(struct vkd3d_glsl_generator *gen, ++ const struct shader_signature *signature, const char *type, unsigned int location) ++{ ++ enum vkd3d_shader_interpolation_mode m; ++ const struct signature_element *e; ++ ++ if ((e = signature_get_element_by_location(signature, location))) ++ m = e->interpolation_mode; ++ else ++ m = VKD3DSIM_NONE; ++ ++ switch (m) ++ { ++ case VKD3DSIM_NONE: ++ case VKD3DSIM_LINEAR: ++ return ""; ++ case VKD3DSIM_CONSTANT: ++ return "flat "; ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled interpolation mode %#x for %s location %u.", m, type, location); ++ return ""; ++ } ++} ++ ++static void shader_glsl_generate_interface_block(struct vkd3d_glsl_generator *gen, ++ const struct shader_signature *signature, const char *type, unsigned int count) ++{ ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ const char *interpolation; + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "%s shader_in_out\n{\n", type); + for (i = 0; i < count; ++i) + { -+ vkd3d_string_buffer_printf(buffer, " vec4 reg_%u;\n", i); ++ interpolation = shader_glsl_get_interpolation(gen, signature, type, i); ++ vkd3d_string_buffer_printf(buffer, " %svec4 reg_%u;\n", interpolation, i); + } + vkd3d_string_buffer_printf(buffer, "} shader_%s;\n", type); +} @@ -4639,31 +6095,17 @@ index d1f02ab568b..c2fb58c55e6 100644 + const struct shader_signature *signature = &gen->program->input_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; -+ unsigned int i; ++ unsigned int i, count; + + if (!gen->interstage_input) + { -+ for (i = 0; i < signature->element_count; ++i) ++ for (i = 0, count = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) ++ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED || e->sysval_semantic) + continue; + -+ if (e->sysval_semantic) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); -+ continue; -+ } -+ -+ if (e->component_type != VKD3D_SHADER_COMPONENT_FLOAT) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", e->component_type); -+ continue; -+ } -+ + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, @@ -4678,15 +6120,35 @@ index d1f02ab568b..c2fb58c55e6 100644 + continue; + } + -+ vkd3d_string_buffer_printf(buffer, -+ "layout(location = %u) in vec4 shader_in_%u;\n", e->target_location, i); ++ vkd3d_string_buffer_printf(buffer, "layout(location = %u) in ", e->target_location); ++ switch (e->component_type) ++ { ++ case VKD3D_SHADER_COMPONENT_UINT: ++ vkd3d_string_buffer_printf(buffer, "uvec4"); ++ break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, "ivec4"); ++ break; ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "vec4"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "", e->component_type); ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled input component type %#x.", e->component_type); ++ break; ++ } ++ vkd3d_string_buffer_printf(buffer, " shader_in_%u;\n", i); ++ ++count; + } ++ if (count) ++ vkd3d_string_buffer_printf(buffer, "\n"); + } + else if (gen->limits.input_count) + { -+ shader_glsl_generate_interface_block(buffer, "in", gen->limits.input_count); ++ shader_glsl_generate_interface_block(gen, signature, "in", gen->limits.input_count); ++ vkd3d_string_buffer_printf(buffer, "\n"); + } -+ vkd3d_string_buffer_printf(buffer, "\n"); +} + +static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator *gen) @@ -4694,11 +6156,11 @@ index d1f02ab568b..c2fb58c55e6 100644 + const struct shader_signature *signature = &gen->program->output_signature; + struct vkd3d_string_buffer *buffer = gen->buffer; + const struct signature_element *e; -+ unsigned int i; ++ unsigned int i, count; + + if (!gen->interstage_output) + { -+ for (i = 0; i < signature->element_count; ++i) ++ for (i = 0, count = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + @@ -4712,13 +6174,6 @@ index d1f02ab568b..c2fb58c55e6 100644 + continue; + } + -+ if (e->component_type != VKD3D_SHADER_COMPONENT_FLOAT) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", e->component_type); -+ continue; -+ } -+ + if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, @@ -4733,22 +6188,47 @@ index d1f02ab568b..c2fb58c55e6 100644 + continue; + } + -+ vkd3d_string_buffer_printf(buffer, -+ "layout(location = %u) out vec4 shader_out_%u;\n", e->target_location, i); ++ vkd3d_string_buffer_printf(buffer, "layout(location = %u) out ", e->target_location); ++ switch (e->component_type) ++ { ++ case VKD3D_SHADER_COMPONENT_UINT: ++ vkd3d_string_buffer_printf(buffer, "uvec4"); ++ break; ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "vec4"); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, "", e->component_type); ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled output component type %#x.", e->component_type); ++ break; ++ } ++ vkd3d_string_buffer_printf(buffer, " shader_out_%u;\n", e->semantic_index); ++ ++count; + } ++ if (count) ++ vkd3d_string_buffer_printf(buffer, "\n"); + } + else if (gen->limits.output_count) + { -+ shader_glsl_generate_interface_block(buffer, "out", gen->limits.output_count); ++ shader_glsl_generate_interface_block(gen, signature, "out", gen->limits.output_count); ++ vkd3d_string_buffer_printf(buffer, "\n"); + } -+ vkd3d_string_buffer_printf(buffer, "\n"); +} + - static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) - { - const struct vsir_program *program = gen->program; - struct vkd3d_string_buffer *buffer = gen->buffer; - ++static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) ++{ ++ const struct vsir_program *program = gen->program; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ const struct vsir_thread_group_size *group_size; ++ ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) ++ { ++ group_size = &program->thread_group_size; ++ vkd3d_string_buffer_printf(buffer, "layout(local_size_x = %u, local_size_y = %u, local_size_z = %u) in;\n\n", ++ group_size->x, group_size->y, group_size->z); ++ } ++ + shader_glsl_generate_descriptor_declarations(gen); + shader_glsl_generate_input_declarations(gen); + shader_glsl_generate_output_declarations(gen); @@ -4757,25 +6237,60 @@ index d1f02ab568b..c2fb58c55e6 100644 + vkd3d_string_buffer_printf(buffer, "vec4 %s_in[%u];\n", gen->prefix, gen->limits.input_count); + if (gen->limits.output_count) + vkd3d_string_buffer_printf(buffer, "vec4 %s_out[%u];\n", gen->prefix, gen->limits.output_count); - if (program->temp_count) -- vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); ++ if (program->temp_count) + vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n", program->temp_count); + vkd3d_string_buffer_printf(buffer, "\n"); - } - - static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) -@@ -264,6 +1644,7 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc - vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); - - ++gen->indent; ++} ++ ++static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) ++{ ++ const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ unsigned int i; ++ void *code; ++ ++ MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); ++ ++ vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); ++ ++ vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); ++ ++ shader_glsl_generate_declarations(gen); ++ ++ vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); ++ ++ ++gen->indent; + shader_glsl_shader_prologue(gen); - for (i = 0; i < instructions->count; ++i) - { - vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); -@@ -294,27 +1675,77 @@ static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) - vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); - } - ++ for (i = 0; i < instructions->count; ++i) ++ { ++ vkd3d_glsl_handle_instruction(gen, &instructions->elements[i]); ++ } ++ ++ vkd3d_string_buffer_printf(buffer, "}\n"); ++ ++ if (TRACE_ON()) ++ vkd3d_string_buffer_trace(buffer); ++ ++ if (gen->failed) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ if ((code = vkd3d_malloc(buffer->buffer_size))) ++ { ++ memcpy(code, buffer->buffer, buffer->content_size); ++ out->size = buffer->content_size; ++ out->code = code; ++ } ++ else return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ return VKD3D_OK; ++} ++ ++static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) ++{ ++ vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); ++ vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); ++} ++ +static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_version *version) +{ + struct shader_limits *limits = &gen->limits; @@ -4794,6 +6309,10 @@ index d1f02ab568b..c2fb58c55e6 100644 + limits->input_count = 32; + limits->output_count = 8; + break; ++ case VKD3D_SHADER_TYPE_COMPUTE: ++ limits->input_count = 0; ++ limits->output_count = 0; ++ break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled shader type %#x.", version->type); @@ -4803,21 +6322,20 @@ index d1f02ab568b..c2fb58c55e6 100644 + } +} + - static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, -- struct vsir_program *program, struct vkd3d_shader_message_context *message_context) ++static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, + struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + struct vkd3d_shader_message_context *message_context) - { ++{ + enum vkd3d_shader_type type = program->shader_version.type; + - memset(gen, 0, sizeof(*gen)); - gen->program = program; - vkd3d_string_buffer_cache_init(&gen->string_buffers); - gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); ++ memset(gen, 0, sizeof(*gen)); ++ gen->program = program; ++ vkd3d_string_buffer_cache_init(&gen->string_buffers); ++ gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); + gen->location.source_name = compile_info->source_name; - gen->message_context = message_context; ++ gen->message_context = message_context; + if (!(gen->prefix = shader_glsl_get_prefix(type))) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, @@ -4825,29 +6343,29 @@ index d1f02ab568b..c2fb58c55e6 100644 + gen->prefix = "unknown"; + } + shader_glsl_init_limits(gen, &program->shader_version); -+ gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX; -+ gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL; ++ gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX && type != VKD3D_SHADER_TYPE_COMPUTE; ++ gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL && type != VKD3D_SHADER_TYPE_COMPUTE; + + gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); + gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO); + gen->descriptor_info = descriptor_info; + gen->combined_sampler_info = combined_sampler_info; - } - - int glsl_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, -- struct vkd3d_shader_message_context *message_context) ++} ++ ++int glsl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_glsl_generator generator; - int ret; - -- if ((ret = vsir_program_normalise(program, config_flags, compile_info, message_context)) < 0) ++{ ++ struct vkd3d_glsl_generator generator; ++ int ret; ++ + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; ++ return ret; ++ ++ VKD3D_ASSERT(program->normalised_io); ++ VKD3D_ASSERT(program->normalised_hull_cp_io); - vkd3d_glsl_generator_init(&generator, program, message_context); + vkd3d_glsl_generator_init(&generator, program, compile_info, @@ -4856,7 +6374,7 @@ index d1f02ab568b..c2fb58c55e6 100644 vkd3d_glsl_generator_cleanup(&generator); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index bd5baacd83d..9ace1930c1b 100644 +index bd5baacd83d..6ad0117fd5c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -254,6 +254,46 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) @@ -4954,7 +6472,15 @@ index bd5baacd83d..9ace1930c1b 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_RASTERIZER_STATE: -@@ -1589,6 +1635,16 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex +@@ -1575,7 +1621,6 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; + +- VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); + } + +@@ -1589,6 +1634,16 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -4971,7 +6497,7 @@ index bd5baacd83d..9ace1930c1b 100644 struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { -@@ -1792,6 +1848,118 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned +@@ -1792,6 +1847,118 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned return &swizzle->node; } @@ -5090,7 +6616,7 @@ index bd5baacd83d..9ace1930c1b 100644 struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc) { -@@ -2142,6 +2310,51 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr +@@ -2142,6 +2309,51 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } @@ -5142,7 +6668,7 @@ index bd5baacd83d..9ace1930c1b 100644 static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) { -@@ -2149,8 +2362,8 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, +@@ -2149,8 +2361,8 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, } struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, @@ -5153,7 +6679,7 @@ index bd5baacd83d..9ace1930c1b 100644 { struct hlsl_state_block_entry *entry; struct clone_instr_map map = { 0 }; -@@ -2166,7 +2379,11 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, +@@ -2166,7 +2378,11 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, return NULL; } @@ -5166,7 +6692,7 @@ index bd5baacd83d..9ace1930c1b 100644 if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) { hlsl_free_state_block_entry(entry); -@@ -2179,7 +2396,16 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, +@@ -2179,7 +2395,16 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, hlsl_free_state_block_entry(entry); return NULL; } @@ -5184,7 +6710,7 @@ index bd5baacd83d..9ace1930c1b 100644 vkd3d_free(map.instrs); return entry; -@@ -2284,6 +2510,12 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, +@@ -2284,6 +2509,12 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); @@ -5197,6 +6723,14 @@ index bd5baacd83d..9ace1930c1b 100644 case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); } +@@ -2314,6 +2545,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + decl->return_type = return_type; + decl->parameters = *parameters; + decl->loc = *loc; ++ list_init(&decl->extern_vars); + + if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) + { @@ -2570,6 +2802,10 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } return string; @@ -5403,7 +6937,7 @@ index bd5baacd83d..9ace1930c1b 100644 * refer to them. */ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 22e25b23988..4082b14fe04 100644 +index 22e25b23988..efe3aec024b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ @@ -5457,7 +6991,16 @@ index 22e25b23988..4082b14fe04 100644 }; /* A reference to an instruction node (struct hlsl_ir_node), usable as a field in other structs. -@@ -591,6 +605,8 @@ struct hlsl_ir_function_decl +@@ -474,6 +488,8 @@ struct hlsl_ir_var + * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 + * means function entry. */ + unsigned int first_write, last_read; ++ /* Whether the variable is read in any entry function. */ ++ bool is_read; + /* Offset where the variable's value is stored within its buffer in numeric register components. + * This in case the variable is uniform. */ + unsigned int buffer_offset; +@@ -591,10 +607,18 @@ struct hlsl_ir_function_decl unsigned int attr_count; const struct hlsl_attribute *const *attrs; @@ -5466,7 +7009,17 @@ index 22e25b23988..4082b14fe04 100644 /* Synthetic boolean variable marking whether a return statement has been * executed. Needed to deal with return statements in non-uniform control * flow, since some backends can't handle them. */ -@@ -646,6 +662,7 @@ struct hlsl_ir_switch + struct hlsl_ir_var *early_return_var; ++ ++ /* List of all the extern semantic variables; linked by the ++ * hlsl_ir_var.extern_entry fields. This exists as a convenience because ++ * it is often necessary to iterate all extern variables and these can be ++ * declared in as function parameters, or as the function return value. */ ++ struct list extern_vars; + }; + + struct hlsl_ir_call +@@ -646,6 +670,7 @@ struct hlsl_ir_switch enum hlsl_ir_expr_op { @@ -5474,7 +7027,7 @@ index 22e25b23988..4082b14fe04 100644 HLSL_OP0_VOID, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, -@@ -703,7 +720,7 @@ enum hlsl_ir_expr_op +@@ -703,7 +728,7 @@ enum hlsl_ir_expr_op HLSL_OP2_SLT, /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, @@ -5483,7 +7036,7 @@ index 22e25b23988..4082b14fe04 100644 HLSL_OP3_DP2ADD, /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ -@@ -854,6 +871,43 @@ struct hlsl_ir_string_constant +@@ -854,6 +879,43 @@ struct hlsl_ir_string_constant char *string; }; @@ -5527,7 +7080,23 @@ index 22e25b23988..4082b14fe04 100644 /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ struct hlsl_ir_stateblock_constant -@@ -1003,8 +1057,12 @@ struct hlsl_ctx +@@ -965,10 +1027,11 @@ struct hlsl_ctx + struct hlsl_scope *dummy_scope; + /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */ + struct list scopes; +- /* List of all the extern variables; linked by the hlsl_ir_var.extern_entry fields. +- * This exists as a convenience because it is often necessary to iterate all extern variables +- * and these can be declared in global scope, as function parameters, or as the function +- * return value. */ ++ ++ /* List of all the extern variables, excluding semantic variables; linked ++ * by the hlsl_ir_var.extern_entry fields. This exists as a convenience ++ * because it is often necessary to iterate all extern variables declared ++ * in the global scope or as function parameters. */ + struct list extern_vars; + + /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared +@@ -1003,8 +1066,12 @@ struct hlsl_ctx struct hlsl_type *string; struct hlsl_type *Void; struct hlsl_type *null; @@ -5540,7 +7109,7 @@ index 22e25b23988..4082b14fe04 100644 /* List of the instruction nodes for initializing static variables. */ struct hlsl_block static_initializers; -@@ -1016,6 +1074,7 @@ struct hlsl_ctx +@@ -1016,19 +1083,23 @@ struct hlsl_ctx { uint32_t index; struct hlsl_vec4 value; @@ -5548,7 +7117,13 @@ index 22e25b23988..4082b14fe04 100644 } *regs; size_t count, size; } constant_defs; -@@ -1029,6 +1088,12 @@ struct hlsl_ctx + /* 'c' registers where the constants expected by SM2 sincos are stored. */ + struct hlsl_reg d3dsincosconst1, d3dsincosconst2; +- /* Number of temp. registers required for the shader to run, i.e. the largest temp register +- * index that will be used in the output bytecode (+1). */ +- uint32_t temp_count; + + /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ uint32_t thread_count[3]; @@ -5561,7 +7136,7 @@ index 22e25b23988..4082b14fe04 100644 /* In some cases we generate opcodes by parsing an HLSL function and then * invoking it. If not NULL, this field is the name of the function that we * are currently parsing, "mangled" with an internal prefix to avoid -@@ -1044,6 +1109,7 @@ struct hlsl_ctx +@@ -1044,6 +1115,7 @@ struct hlsl_ctx bool child_effect; bool include_empty_buffers; bool warn_implicit_truncation; @@ -5569,7 +7144,7 @@ index 22e25b23988..4082b14fe04 100644 }; static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -@@ -1149,6 +1215,18 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n +@@ -1149,6 +1221,18 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } @@ -5588,7 +7163,7 @@ index 22e25b23988..4082b14fe04 100644 static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); -@@ -1158,16 +1236,19 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co +@@ -1158,16 +1242,19 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -5608,7 +7183,7 @@ index 22e25b23988..4082b14fe04 100644 } static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) -@@ -1330,12 +1411,15 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const +@@ -1330,13 +1417,19 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); @@ -5624,9 +7199,13 @@ index 22e25b23988..4082b14fe04 100644 +void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); ++uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); ++void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); ++void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); -@@ -1428,6 +1512,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); + int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); +@@ -1428,6 +1521,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); @@ -5636,7 +7215,7 @@ index 22e25b23988..4082b14fe04 100644 struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -@@ -1440,6 +1527,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, +@@ -1440,6 +1536,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); @@ -5645,7 +7224,7 @@ index 22e25b23988..4082b14fe04 100644 struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, -@@ -1493,6 +1582,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type); +@@ -1493,6 +1591,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); bool hlsl_type_is_resource(const struct hlsl_type *type); @@ -5653,27 +7232,33 @@ index 22e25b23988..4082b14fe04 100644 unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); -@@ -1528,16 +1618,15 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); - bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, - unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); - bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +@@ -1525,23 +1624,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, + + D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); + D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +-bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, +- unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); +-bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); -+ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); ++ ++int tpf_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); -+bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, -+ struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output); - bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_shader_register_type *type, bool *has_idx); - int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); +-bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, enum vkd3d_shader_register_type *type, bool *has_idx); +-int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); +- + struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); + + int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 0c02b27817e..97d8b13772b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -5697,10 +7282,18 @@ index 0c02b27817e..97d8b13772b 100644 export {return KW_EXPORT; } extern {return KW_EXTERN; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 3f319dea0d8..ef37eb75f03 100644 +index 3f319dea0d8..213cec79c3d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -147,7 +147,7 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha +@@ -40,6 +40,7 @@ struct parse_initializer + unsigned int args_count; + struct hlsl_block *instrs; + bool braces; ++ struct vkd3d_shader_location loc; + }; + + struct parse_parameter +@@ -147,7 +148,7 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) { @@ -5709,7 +7302,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) -@@ -437,6 +437,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct +@@ -437,6 +438,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct if (hlsl_types_are_equal(src_type, dst_type)) return node; @@ -5719,7 +7312,48 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!implicit_compatible_data_types(ctx, src_type, dst_type)) { struct vkd3d_string_buffer *src_string, *dst_string; -@@ -489,9 +492,10 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co +@@ -458,6 +462,40 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct + return add_cast(ctx, block, node, dst_type, loc); + } + ++static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_type *dst_type, const struct parse_array_sizes *arrays, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *instr = node_from_block(block); ++ struct hlsl_type *src_type = instr->data_type; ++ unsigned int i; ++ ++ for (i = 0; i < arrays->count; ++i) ++ { ++ if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); ++ dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); ++ } ++ ++ if (instr->data_type->class == HLSL_CLASS_ERROR) ++ return true; ++ ++ if (!explicit_compatible_data_types(ctx, src_type, dst_type)) ++ { ++ struct vkd3d_string_buffer *src_string, *dst_string; ++ ++ src_string = hlsl_type_to_string(ctx, src_type); ++ dst_string = hlsl_type_to_string(ctx, dst_type); ++ if (src_string && dst_string) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", ++ src_string->buffer, dst_string->buffer); ++ hlsl_release_string_buffer(ctx, src_string); ++ hlsl_release_string_buffer(ctx, dst_string); ++ return false; ++ } ++ ++ return add_cast(ctx, block, instr, dst_type, loc); ++} ++ + static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod, + const struct vkd3d_shader_location *loc) + { +@@ -489,9 +527,10 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co check_condition_type(ctx, condition); bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); @@ -5732,7 +7366,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) return false; -@@ -516,7 +520,7 @@ enum loop_type +@@ -516,7 +555,7 @@ enum loop_type LOOP_DO_WHILE }; @@ -5741,7 +7375,7 @@ index 3f319dea0d8..ef37eb75f03 100644 { unsigned int i, j; -@@ -525,11 +529,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att +@@ -525,11 +564,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att for (j = i + 1; j < attrs->count; ++j) { if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) @@ -5755,7 +7389,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, -@@ -610,8 +613,10 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx +@@ -610,8 +648,10 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx { switch (node->type) { @@ -5766,7 +7400,7 @@ index 3f319dea0d8..ef37eb75f03 100644 case HLSL_IR_STRING_CONSTANT: case HLSL_IR_SWIZZLE: case HLSL_IR_LOAD: -@@ -639,14 +644,15 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx +@@ -639,14 +679,15 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx return ret; hlsl_block_add_block(&expr, block); @@ -5784,7 +7418,7 @@ index 3f319dea0d8..ef37eb75f03 100644 hlsl_run_const_passes(ctx, &expr); node = src.node; hlsl_src_remove(&src); -@@ -697,9 +703,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -697,9 +738,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; @@ -5795,7 +7429,53 @@ index 3f319dea0d8..ef37eb75f03 100644 check_loop_attributes(ctx, attributes, loc); /* Ignore unroll(0) attribute, and any invalid attribute. */ -@@ -1227,7 +1231,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, +@@ -974,6 +1013,12 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str + const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; + struct hlsl_ir_node *return_index, *cast; + ++ if (array->data_type->class == HLSL_CLASS_ERROR || index->data_type->class == HLSL_CLASS_ERROR) ++ { ++ block->value = ctx->error_instr; ++ return true; ++ } ++ + if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) + && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) + { +@@ -1164,6 +1209,32 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + return true; + } + ++static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const char *name, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *record = node_from_block(block); ++ const struct hlsl_type *type = record->data_type; ++ const struct hlsl_struct_field *field, *base; ++ ++ if ((field = get_struct_field(type->e.record.fields, type->e.record.field_count, name))) ++ { ++ unsigned int field_idx = field - type->e.record.fields; ++ ++ return add_record_access(ctx, block, record, field_idx, loc); ++ } ++ else if ((base = get_struct_field(type->e.record.fields, type->e.record.field_count, "$super"))) ++ { ++ unsigned int base_idx = base - type->e.record.fields; ++ ++ if (!add_record_access(ctx, block, record, base_idx, loc)) ++ return false; ++ return add_record_access_recurse(ctx, block, name, loc); ++ } ++ ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); ++ return false; ++} ++ + static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) + { + struct parse_variable_def *v, *v_next; +@@ -1227,7 +1298,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, } static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, @@ -5805,7 +7485,7 @@ index 3f319dea0d8..ef37eb75f03 100644 static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, struct parse_parameter *param, const struct vkd3d_shader_location *loc) -@@ -1285,7 +1290,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters +@@ -1285,7 +1357,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters for (i = 0; i < param->initializer.args_count; ++i) { @@ -5815,7 +7495,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } free_parse_initializer(¶m->initializer); -@@ -1673,25 +1679,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl +@@ -1673,25 +1746,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl return expr; } @@ -5864,7 +7544,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1699,12 +1716,18 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru +@@ -1699,12 +1783,18 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; @@ -5883,7 +7563,7 @@ index 3f319dea0d8..ef37eb75f03 100644 check_integer_type(ctx, arg); return add_unary_arithmetic_expr(ctx, block, op, arg, loc); -@@ -1716,6 +1739,9 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct +@@ -1716,6 +1806,9 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type; @@ -5893,7 +7573,7 @@ index 3f319dea0d8..ef37eb75f03 100644 bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy); -@@ -1745,7 +1771,11 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str +@@ -1745,7 +1838,11 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *common_type; @@ -5906,7 +7586,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL; -@@ -1942,6 +1972,12 @@ static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hls +@@ -1942,6 +2039,12 @@ static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hls hlsl_block_add_block(block1, block2); destroy_block(block2); @@ -5919,7 +7599,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL) return NULL; -@@ -2048,18 +2084,23 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un +@@ -2048,18 +2151,23 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un return true; } @@ -5946,7 +7626,7 @@ index 3f319dea0d8..ef37eb75f03 100644 assign_op = ASSIGN_OP_ADD; } if (assign_op != ASSIGN_OP_ASSIGN) -@@ -2068,7 +2109,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2068,7 +2176,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo VKD3D_ASSERT(op); if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) @@ -5955,7 +7635,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } if (hlsl_is_numeric_type(lhs_type)) -@@ -2078,14 +2119,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2078,14 +2186,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo } if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) @@ -5972,7 +7652,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } else if (lhs->type == HLSL_IR_SWIZZLE) { -@@ -2100,25 +2141,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2100,25 +2208,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) { hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); @@ -6002,7 +7682,7 @@ index 3f319dea0d8..ef37eb75f03 100644 hlsl_block_add_instr(block, new_swizzle); lhs = swizzle->val.node; -@@ -2127,7 +2166,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2127,7 +2233,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo else { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid lvalue."); @@ -6011,7 +7691,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } } -@@ -2142,11 +2181,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2142,11 +2248,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) { hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource store."); @@ -6025,7 +7705,7 @@ index 3f319dea0d8..ef37eb75f03 100644 resource_type = hlsl_deref_get_type(ctx, &resource_deref); VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); -@@ -2168,7 +2207,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2168,7 +2274,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) { hlsl_cleanup_deref(&resource_deref); @@ -6034,7 +7714,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); -@@ -2195,13 +2234,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2195,13 +2301,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) { hlsl_cleanup_deref(&deref); @@ -6050,7 +7730,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } hlsl_block_add_block(block, &store_block); } -@@ -2226,23 +2265,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2226,23 +2332,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo continue; if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) @@ -6079,7 +7759,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); -@@ -2254,24 +2293,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2254,24 +2360,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo struct hlsl_deref deref; if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) @@ -6108,7 +7788,17 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, -@@ -2307,57 +2341,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d +@@ -2280,6 +2381,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d + struct hlsl_ir_node *lhs = node_from_block(block); + struct hlsl_ir_node *one; + ++ if (lhs->data_type->class == HLSL_CLASS_ERROR) ++ return true; ++ + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); +@@ -2307,57 +2411,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d return true; } @@ -6168,7 +7858,7 @@ index 3f319dea0d8..ef37eb75f03 100644 { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); struct hlsl_deref dst_deref; -@@ -2376,32 +2362,75 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i +@@ -2376,38 +2432,107 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); @@ -6177,18 +7867,16 @@ index 3f319dea0d8..ef37eb75f03 100644 { struct hlsl_default_value default_value = {0}; - unsigned int dst_index; - +- - if (!hlsl_clone_block(ctx, &block, instrs)) - return; - default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); -- + - if (dst->is_param) - dst_index = *store_index; -- else -- dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); -+ if (hlsl_is_numeric_type(dst_comp_type)) ++ if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) + { -+ if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) ++ if (hlsl_is_numeric_type(dst_comp_type)) + { + /* Default values are discarded if they contain an object + * literal expression for a numeric component. */ @@ -6201,19 +7889,20 @@ index 3f319dea0d8..ef37eb75f03 100644 + dst->default_values = NULL; + } + } -+ else -+ { -+ if (!hlsl_clone_block(ctx, &block, instrs)) -+ return; -+ default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); ++ } + else +- dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); ++ { ++ if (!hlsl_clone_block(ctx, &block, instrs)) ++ return; ++ default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - dst->default_values[dst_index] = default_value; -+ if (dst->default_values) -+ dst->default_values[*store_index] = default_value; ++ if (dst->default_values) ++ dst->default_values[*store_index] = default_value; - hlsl_block_cleanup(&block); -+ hlsl_block_cleanup(&block); -+ } ++ hlsl_block_cleanup(&block); + } } else @@ -6225,14 +7914,14 @@ index 3f319dea0d8..ef37eb75f03 100644 + /* Sampler states end up in the variable's state_blocks instead of + * being used to initialize its value. */ + struct hlsl_ir_sampler_state *sampler_state = hlsl_ir_sampler_state(src); -+ -+ if (dst_comp_type->class != HLSL_CLASS_SAMPLER) -+ { -+ struct vkd3d_string_buffer *dst_string; - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; - hlsl_block_add_block(instrs, &block); ++ if (dst_comp_type->class != HLSL_CLASS_SAMPLER) ++ { ++ struct vkd3d_string_buffer *dst_string; ++ + dst_string = hlsl_type_to_string(ctx, dst_comp_type); + if (dst_string) + hlsl_error(ctx, &src->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, @@ -6261,14 +7950,45 @@ index 3f319dea0d8..ef37eb75f03 100644 } ++*store_index; -@@ -2733,13 +2762,17 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + } + } + ++static void initialize_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *dst, ++ const struct parse_initializer *initializer, bool is_default_values_initializer) ++{ ++ unsigned int store_index = 0; ++ ++ /* If any of the elements has an error type, then initializer_size() is not ++ * meaningful. */ ++ for (unsigned int i = 0; i < initializer->args_count; ++i) ++ { ++ if (initializer->args[i]->data_type->class == HLSL_CLASS_ERROR) ++ return; ++ } ++ ++ if (initializer_size(initializer) != hlsl_type_component_count(dst->data_type)) ++ { ++ hlsl_error(ctx, &initializer->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected %u components in initializer, but got %u.", ++ hlsl_type_component_count(dst->data_type), initializer_size(initializer)); ++ return; ++ } ++ ++ for (unsigned int i = 0; i < initializer->args_count; ++i) ++ initialize_var_components(ctx, initializer->instrs, dst, &store_index, ++ initializer->args[i], is_default_values_initializer); ++} ++ + static bool type_has_object_components(const struct hlsl_type *type) + { + if (type->class == HLSL_CLASS_ARRAY) +@@ -2733,13 +2858,15 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var if (v->initializer.args_count) { - unsigned int store_index = 0; bool is_default_values_initializer; -+ unsigned int store_index = 0; - unsigned int size, k; +- unsigned int size, k; is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) @@ -6280,17 +8000,28 @@ index 3f319dea0d8..ef37eb75f03 100644 if (is_default_values_initializer) { -@@ -2780,7 +2813,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - - for (k = 0; k < v->initializer.args_count; ++k) - { -- initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); -+ initialize_var_components(ctx, v->initializer.instrs, var, -+ &store_index, v->initializer.args[k], is_default_values_initializer); +@@ -2769,19 +2896,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + v->initializer.args[0] = node_from_block(v->initializer.instrs); } +- size = initializer_size(&v->initializer); +- if (component_count != size) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u components in initializer, but got %u.", component_count, size); +- free_parse_variable_def(v); +- continue; +- } +- +- for (k = 0; k < v->initializer.args_count; ++k) +- { +- initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); +- } ++ initialize_var(ctx, var, &v->initializer, is_default_values_initializer); + if (is_default_values_initializer) -@@ -2795,6 +2829,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + { +@@ -2795,6 +2910,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var { hlsl_block_add_block(initializers, v->initializer.instrs); } @@ -6300,7 +8031,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { -@@ -2835,28 +2872,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2835,28 +2953,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var return initializers; } @@ -6310,11 +8041,11 @@ index 3f319dea0d8..ef37eb75f03 100644 + bool is_compile, const struct parse_initializer *args) { - unsigned int i; -- -- if (decl->parameters.count < args->args_count) -- return false; + unsigned int i, k; +- if (decl->parameters.count < args->args_count) +- return false; +- - for (i = 0; i < args->args_count; ++i) + k = 0; + for (i = 0; i < decl->parameters.count; ++i) @@ -6349,7 +8080,7 @@ index 3f319dea0d8..ef37eb75f03 100644 const struct vkd3d_shader_location *loc) { struct hlsl_ir_function_decl *decl, *compatible_match = NULL; -@@ -2869,7 +2914,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, +@@ -2869,7 +2995,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { @@ -6358,7 +8089,7 @@ index 3f319dea0d8..ef37eb75f03 100644 { if (compatible_match) { -@@ -2890,26 +2935,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc +@@ -2890,26 +3016,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); } @@ -6401,7 +8132,7 @@ index 3f319dea0d8..ef37eb75f03 100644 arg = cast; } -@@ -2918,13 +2972,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2918,13 +3053,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu struct hlsl_ir_node *store; if (!(store = hlsl_new_simple_store(ctx, param, arg))) @@ -6419,7 +8150,7 @@ index 3f319dea0d8..ef37eb75f03 100644 { struct hlsl_ir_var *param = func->parameters.vars[i]; unsigned int comp_count = hlsl_type_component_count(param->data_type); -@@ -2932,6 +2988,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2932,6 +3069,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu VKD3D_ASSERT(param->default_values); @@ -6429,7 +8160,7 @@ index 3f319dea0d8..ef37eb75f03 100644 hlsl_init_simple_deref_from_var(¶m_deref, param); for (j = 0; j < comp_count; ++j) -@@ -2945,20 +3004,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2945,20 +3085,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu { value.u[0] = param->default_values[j].number; if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) @@ -6456,7 +8187,7 @@ index 3f319dea0d8..ef37eb75f03 100644 for (i = 0; i < args->args_count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; -@@ -2973,11 +3035,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2973,11 +3116,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu "Output argument to \"%s\" is const.", func->func->name); if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) @@ -6470,7 +8201,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } } -@@ -2998,7 +3060,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2998,7 +3141,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu hlsl_block_add_instr(args->instrs, expr); } @@ -6479,7 +8210,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, -@@ -3006,7 +3068,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, +@@ -3006,7 +3149,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, { struct hlsl_type *type = arg->data_type; @@ -6488,7 +8219,7 @@ index 3f319dea0d8..ef37eb75f03 100644 return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -@@ -3094,14 +3156,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, +@@ -3094,14 +3237,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -6505,7 +8236,7 @@ index 3f319dea0d8..ef37eb75f03 100644 return convert_args(ctx, params, type, loc); } -@@ -3129,6 +3189,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, +@@ -3129,6 +3270,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) { struct hlsl_ir_function_decl *func; @@ -6513,7 +8244,7 @@ index 3f319dea0d8..ef37eb75f03 100644 struct hlsl_type *type; char *body; -@@ -3152,8 +3213,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, +@@ -3152,8 +3294,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; @@ -6525,7 +8256,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, fn_name, type->name, -@@ -3165,7 +3227,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, +@@ -3165,7 +3308,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, if (!func) return false; @@ -6534,7 +8265,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_acos(struct hlsl_ctx *ctx, -@@ -3282,9 +3344,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, +@@ -3282,9 +3425,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, " : poly_approx;\n" "}"; @@ -6546,7 +8277,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!(buf = hlsl_get_string_buffer(ctx))) return false; -@@ -3314,7 +3376,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, +@@ -3314,7 +3457,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, if (!func) return false; @@ -6555,7 +8286,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_atan(struct hlsl_ctx *ctx, -@@ -3507,7 +3569,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, +@@ -3507,7 +3650,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, if (!func) return false; @@ -6564,7 +8295,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_cosh(struct hlsl_ctx *ctx, -@@ -3525,9 +3587,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, +@@ -3525,9 +3668,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, struct hlsl_type *cast_type; enum hlsl_base_type base; @@ -6576,7 +8307,7 @@ index 3f319dea0d8..ef37eb75f03 100644 base = HLSL_TYPE_FLOAT; cast_type = hlsl_get_vector_type(ctx, base, 3); -@@ -3698,15 +3759,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, +@@ -3698,15 +3840,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, return false; } @@ -6596,7 +8327,7 @@ index 3f319dea0d8..ef37eb75f03 100644 template = templates[dim]; switch (dim) -@@ -3734,7 +3794,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, +@@ -3734,7 +3875,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, if (!func) return false; @@ -6605,10 +8336,26 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_distance(struct hlsl_ctx *ctx, -@@ -3766,6 +3826,50 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, - return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); - } +@@ -3751,19 +3892,63 @@ static bool intrinsic_distance(struct hlsl_ctx *ctx, + if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, arg2, loc))) + return false; +- if (!(add = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, arg1, neg, loc))) ++ if (!(add = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, arg1, neg, loc))) ++ return false; ++ ++ if (!(dot = add_binary_dot_expr(ctx, params->instrs, add, add, loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, dot, loc); ++} ++ ++static bool intrinsic_dot(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); ++} ++ +static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ @@ -6644,19 +8391,26 @@ index 3f319dea0d8..ef37eb75f03 100644 + vec4_type->name, type->name, type->name, + vec4_type->name, + vec4_type->name))) -+ return false; + return false; +- +- if (!(dot = add_binary_dot_expr(ctx, params->instrs, add, add, loc))) + func = hlsl_compile_internal_function(ctx, "dst", body); + vkd3d_free(body); + if (!func) -+ return false; -+ + return false; + +- return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, dot, loc); +-} +- +-static bool intrinsic_dot(struct hlsl_ctx *ctx, +- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +-{ +- return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); + return !!add_user_call(ctx, func, params, false, loc); -+} -+ + } + static bool intrinsic_exp(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3809,9 +3913,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, +@@ -3809,9 +3994,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, " return dot(i, ng) < 0 ? n : -n;\n" "}\n"; @@ -6668,7 +8422,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name))) -@@ -3821,7 +3925,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, +@@ -3821,7 +4006,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, if (!func) return false; @@ -6677,7 +8431,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, -@@ -3926,7 +4030,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, +@@ -3926,7 +4111,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, if (!func) return false; @@ -6686,7 +8440,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_ldexp(struct hlsl_ctx *ctx, -@@ -4029,7 +4133,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, +@@ -4029,7 +4214,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) return false; @@ -6695,7 +8449,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_log(struct hlsl_ctx *ctx, -@@ -4081,6 +4185,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, +@@ -4081,6 +4266,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); } @@ -6716,7 +8470,43 @@ index 3f319dea0d8..ef37eb75f03 100644 static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -4285,13 +4403,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, +@@ -4099,6 +4298,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); + } + ++static bool intrinsic_modf(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s modf(%s x, out %s ip)\n" ++ "{\n" ++ " ip = trunc(x);\n" ++ " return x - ip;\n" ++ "}"; ++ ++ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) ++ return false; ++ type = params->args[0]->data_type; ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type->name, type->name, type->name))) ++ return false; ++ func = hlsl_compile_internal_function(ctx, "modf", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return !!add_user_call(ctx, func, params, false, loc); ++} ++ + static bool intrinsic_mul(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4285,13 +4513,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, static bool intrinsic_refract(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -6732,7 +8522,7 @@ index 3f319dea0d8..ef37eb75f03 100644 char *body; static const char template[] = -@@ -4303,28 +4417,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, +@@ -4303,28 +4527,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" "}"; @@ -6781,7 +8571,7 @@ index 3f319dea0d8..ef37eb75f03 100644 return false; func = hlsl_compile_internal_function(ctx, "refract", body); -@@ -4332,7 +4452,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, +@@ -4332,7 +4562,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, if (!func) return false; @@ -6790,7 +8580,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_round(struct hlsl_ctx *ctx, -@@ -4415,6 +4535,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, +@@ -4415,6 +4645,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); } @@ -6826,7 +8616,7 @@ index 3f319dea0d8..ef37eb75f03 100644 static bool intrinsic_sinh(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -4436,9 +4585,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -4436,9 +4695,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, " return (p * p) * (3 - 2 * p);\n" "}"; @@ -6838,7 +8628,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) return false; -@@ -4447,7 +4596,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -4447,7 +4706,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!func) return false; @@ -6847,7 +8637,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_sqrt(struct hlsl_ctx *ctx, -@@ -4469,13 +4618,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, +@@ -4469,13 +4728,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; @@ -6862,7 +8652,7 @@ index 3f319dea0d8..ef37eb75f03 100644 return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); } -@@ -4523,7 +4671,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, +@@ -4523,7 +4781,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, if (!func) return false; @@ -6871,7 +8661,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -4661,17 +4809,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -4661,17 +4919,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) return false; @@ -6892,7 +8682,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; -@@ -4937,6 +5085,7 @@ intrinsic_functions[] = +@@ -4937,6 +5195,7 @@ intrinsic_functions[] = {"determinant", 1, true, intrinsic_determinant}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, @@ -6900,15 +8690,18 @@ index 3f319dea0d8..ef37eb75f03 100644 {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"f16tof32", 1, true, intrinsic_f16tof32}, -@@ -4952,6 +5101,7 @@ intrinsic_functions[] = +@@ -4952,8 +5211,10 @@ intrinsic_functions[] = {"log", 1, true, intrinsic_log}, {"log10", 1, true, intrinsic_log10}, {"log2", 1, true, intrinsic_log2}, + {"mad", 3, true, intrinsic_mad}, {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, ++ {"modf", 2, true, intrinsic_modf}, {"mul", 2, true, intrinsic_mul}, -@@ -4966,6 +5116,7 @@ intrinsic_functions[] = + {"normalize", 1, true, intrinsic_normalize}, + {"pow", 2, true, intrinsic_pow}, +@@ -4966,6 +5227,7 @@ intrinsic_functions[] = {"saturate", 1, true, intrinsic_saturate}, {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, @@ -6916,7 +8709,7 @@ index 3f319dea0d8..ef37eb75f03 100644 {"sinh", 1, true, intrinsic_sinh}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, -@@ -5002,9 +5153,18 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -5002,9 +5264,18 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, struct intrinsic_function *intrinsic; struct hlsl_ir_function_decl *decl; @@ -6937,7 +8730,7 @@ index 3f319dea0d8..ef37eb75f03 100644 goto fail; } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), -@@ -5060,6 +5220,84 @@ fail: +@@ -5060,18 +5331,94 @@ fail: return NULL; } @@ -7022,16 +8815,35 @@ index 3f319dea0d8..ef37eb75f03 100644 static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type, struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -5071,7 +5309,7 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; +- unsigned int i, idx = 0; + + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) return NULL; - for (i = 0; i < params->args_count; ++i) +- for (i = 0; i < params->args_count; ++i) - initialize_var_components(ctx, params->instrs, var, &idx, params->args[i]); -+ initialize_var_components(ctx, params->instrs, var, &idx, params->args[i], false); ++ initialize_var(ctx, var, params, false); if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; -@@ -5113,11 +5351,6 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5088,6 +5435,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_type *cond_type = cond->data_type; + struct hlsl_type *common_type; + ++ if (cond->data_type->class == HLSL_CLASS_ERROR ++ || first->data_type->class == HLSL_CLASS_ERROR ++ || second->data_type->class == HLSL_CLASS_ERROR) ++ { ++ block->value = ctx->error_instr; ++ return true; ++ } ++ + if (cond_type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; +@@ -5113,11 +5468,6 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, } else { @@ -7043,7 +8855,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (common_type->dimx == 1 && common_type->dimy == 1) { common_type = hlsl_get_numeric_type(ctx, cond_type->class, -@@ -5139,6 +5372,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5139,6 +5489,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, cond_string); hlsl_release_string_buffer(ctx, value_string); } @@ -7055,7 +8867,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) -@@ -5854,6 +6092,21 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru +@@ -5854,6 +6209,21 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru const struct hlsl_type *object_type = object->data_type; const struct method_function *method; @@ -7077,7 +8889,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) { struct vkd3d_string_buffer *string; -@@ -5995,16 +6248,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, +@@ -5995,16 +6365,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, hlsl_release_string_buffer(ctx, string); } @@ -7094,7 +8906,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } %locations -@@ -6058,8 +6301,10 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h +@@ -6058,8 +6418,10 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_CENTROID %token KW_COLUMN_MAJOR %token KW_COMPILE @@ -7105,7 +8917,7 @@ index 3f319dea0d8..ef37eb75f03 100644 %token KW_CONTINUE %token KW_DEFAULT %token KW_DEPTHSTENCILSTATE -@@ -6067,7 +6312,6 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h +@@ -6067,7 +6429,6 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_DISCARD %token KW_DO %token KW_DOMAINSHADER @@ -7113,7 +8925,70 @@ index 3f319dea0d8..ef37eb75f03 100644 %token KW_ELSE %token KW_EXPORT %token KW_EXTERN -@@ -6827,6 +7071,8 @@ func_prototype: +@@ -6273,6 +6634,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + + %type switch_case + ++%type base_optional + %type field_type + %type named_struct_spec + %type unnamed_struct_spec +@@ -6487,11 +6849,28 @@ struct_spec: + | unnamed_struct_spec + + named_struct_spec: +- KW_STRUCT any_identifier '{' fields_list '}' ++ KW_STRUCT any_identifier base_optional '{' fields_list '}' + { + bool ret; + +- $$ = hlsl_new_struct_type(ctx, $2, $4.fields, $4.count); ++ if ($3) ++ { ++ char *name; ++ ++ if (!(name = hlsl_strdup(ctx, "$super"))) ++ YYABORT; ++ if (!hlsl_array_reserve(ctx, (void **)&$5.fields, &$5.capacity, 1 + $5.count, sizeof(*$5.fields))) ++ YYABORT; ++ memmove(&$5.fields[1], $5.fields, $5.count * sizeof(*$5.fields)); ++ ++$5.count; ++ ++ memset(&$5.fields[0], 0, sizeof($5.fields[0])); ++ $5.fields[0].type = $3; ++ $5.fields[0].loc = @3; ++ $5.fields[0].name = name; ++ } ++ ++ $$ = hlsl_new_struct_type(ctx, $2, $5.fields, $5.count); + + if (hlsl_get_var(ctx->cur_scope, $2)) + { +@@ -6518,6 +6897,23 @@ any_identifier: + | TYPE_IDENTIFIER + | NEW_IDENTIFIER + ++/* TODO: Multiple inheritance support for interfaces. */ ++base_optional: ++ %empty ++ { ++ $$ = NULL; ++ } ++ | ':' TYPE_IDENTIFIER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); ++ if ($$->class != HLSL_CLASS_STRUCT) ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Base type \"%s\" is not a struct.", $2); ++ YYABORT; ++ } ++ vkd3d_free($2); ++ } ++ + fields_list: + %empty + { +@@ -6827,6 +7223,8 @@ func_prototype: func_prototype_no_attrs | attribute_list func_prototype_no_attrs { @@ -7122,7 +8997,7 @@ index 3f319dea0d8..ef37eb75f03 100644 if ($2.first) { $2.decl->attr_count = $1.count; -@@ -7614,11 +7860,21 @@ stateblock_lhs_identifier: +@@ -7614,11 +8012,21 @@ stateblock_lhs_identifier: if (!($$ = hlsl_strdup(ctx, "pixelshader"))) YYABORT; } @@ -7144,7 +9019,7 @@ index 3f319dea0d8..ef37eb75f03 100644 state_block_index_opt: %empty -@@ -7666,7 +7922,7 @@ state_block: +@@ -7666,7 +8074,7 @@ state_block: vkd3d_free($5.args); $$ = $1; @@ -7153,7 +9028,7 @@ index 3f319dea0d8..ef37eb75f03 100644 } | state_block any_identifier '(' func_arguments ')' ';' { -@@ -7694,7 +7950,7 @@ state_block: +@@ -7694,7 +8102,7 @@ state_block: hlsl_validate_state_block_entry(ctx, entry, &@4); $$ = $1; @@ -7162,7 +9037,31 @@ index 3f319dea0d8..ef37eb75f03 100644 } state_block_list: -@@ -8092,8 +8348,7 @@ selection_statement: +@@ -7931,6 +8339,7 @@ complex_initializer: + $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; ++ $$.loc = @$; + } + | '{' complex_initializer_list '}' + { +@@ -7962,6 +8371,7 @@ complex_initializer_list: + $$.args[$$.args_count++] = $3.args[i]; + hlsl_block_add_block($$.instrs, $3.instrs); + free_parse_initializer(&$3); ++ $$.loc = @$; + } + + initializer_expr: +@@ -7979,6 +8389,7 @@ initializer_expr_list: + $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; ++ $$.loc = @$; + } + | initializer_expr_list ',' initializer_expr + { +@@ -8092,8 +8503,7 @@ selection_statement: struct hlsl_ir_node *instr; unsigned int i; @@ -7172,7 +9071,15 @@ index 3f319dea0d8..ef37eb75f03 100644 for (i = 0; i < attributes->count; ++i) { -@@ -8391,6 +8646,34 @@ primary_expr: +@@ -8298,6 +8708,7 @@ func_arguments: + if (!($$.instrs = make_empty_block(ctx))) + YYABORT; + $$.braces = false; ++ $$.loc = @$; + } + | initializer_expr_list + +@@ -8391,6 +8802,34 @@ primary_expr: { $$ = $2; } @@ -7207,7 +9114,7 @@ index 3f319dea0d8..ef37eb75f03 100644 | var_identifier '(' func_arguments ')' { if (!($$ = add_call(ctx, $1, &$3, &@1))) -@@ -8400,6 +8683,25 @@ primary_expr: +@@ -8400,6 +8839,25 @@ primary_expr: } vkd3d_free($1); } @@ -7233,7 +9140,7 @@ index 3f319dea0d8..ef37eb75f03 100644 | NEW_IDENTIFIER { if (ctx->in_state_block) -@@ -8416,7 +8718,11 @@ primary_expr: +@@ -8416,7 +8874,11 @@ primary_expr: else { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Identifier \"%s\" is not declared.", $1); @@ -7246,11 +9153,300 @@ index 3f319dea0d8..ef37eb75f03 100644 } } +@@ -8446,25 +8908,12 @@ postfix_expr: + + if (node->data_type->class == HLSL_CLASS_STRUCT) + { +- struct hlsl_type *type = node->data_type; +- const struct hlsl_struct_field *field; +- unsigned int field_idx = 0; +- +- if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3))) +- { +- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); +- vkd3d_free($3); +- YYABORT; +- } +- +- field_idx = field - type->e.record.fields; +- if (!add_record_access(ctx, $1, node, field_idx, &@2)) ++ if (!add_record_access_recurse(ctx, $1, $3, &@2)) + { + vkd3d_free($3); + YYABORT; + } + vkd3d_free($3); +- $$ = $1; + } + else if (hlsl_is_numeric_type(node->data_type)) + { +@@ -8478,14 +8927,14 @@ postfix_expr: + } + hlsl_block_add_instr($1, swizzle); + vkd3d_free($3); +- $$ = $1; + } +- else ++ else if (node->data_type->class != HLSL_CLASS_ERROR) + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3); + vkd3d_free($3); + YYABORT; + } ++ $$ = $1; + } + | postfix_expr '[' expr ']' + { +@@ -8523,14 +8972,6 @@ postfix_expr: + free_parse_initializer(&$4); + YYABORT; + } +- if ($2->dimx * $2->dimy != initializer_size(&$4)) +- { +- hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u components in constructor, but got %u.", +- $2->dimx * $2->dimy, initializer_size(&$4)); +- free_parse_initializer(&$4); +- YYABORT; +- } + + if (!($$ = add_constructor(ctx, $2, &$4, &@2))) + { +@@ -8597,10 +9038,6 @@ unary_expr: + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ + | '(' var_modifiers type arrays ')' unary_expr + { +- struct hlsl_type *src_type = node_from_block($6)->data_type; +- struct hlsl_type *dst_type; +- unsigned int i; +- + if ($2) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +@@ -8608,36 +9045,13 @@ unary_expr: + YYABORT; + } + +- dst_type = $3; +- for (i = 0; i < $4.count; ++i) +- { +- if ($4.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) +- { +- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Implicit size arrays not allowed in casts."); +- } +- dst_type = hlsl_new_array_type(ctx, dst_type, $4.sizes[i]); +- } +- +- if (!explicit_compatible_data_types(ctx, src_type, dst_type)) +- { +- struct vkd3d_string_buffer *src_string, *dst_string; +- +- src_string = hlsl_type_to_string(ctx, src_type); +- dst_string = hlsl_type_to_string(ctx, dst_type); +- if (src_string && dst_string) +- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", +- src_string->buffer, dst_string->buffer); +- hlsl_release_string_buffer(ctx, src_string); +- hlsl_release_string_buffer(ctx, dst_string); +- YYABORT; +- } +- +- if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) ++ if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) + { + destroy_block($6); ++ vkd3d_free($4.sizes); + YYABORT; + } ++ vkd3d_free($4.sizes); + $$ = $6; + } + diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 92b5c71c43f..c5dd5e71e02 100644 +index 92b5c71c43f..88bec8610cb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1655,11 +1655,16 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, +@@ -276,8 +276,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls + == base_type_get_semantic_equivalent(type2->e.numeric.type); + } + +-static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, +- struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, ++static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, + uint32_t index, bool output, const struct vkd3d_shader_location *loc) + { + struct hlsl_semantic new_semantic; +@@ -287,7 +287,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) + return NULL; + +- LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!ascii_strcasecmp(ext_var->name, new_name)) + { +@@ -339,12 +339,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + ext_var->is_input_semantic = 1; + ext_var->is_param = var->is_param; + list_add_before(&var->scope_entry, &ext_var->scope_entry); +- list_add_tail(&ctx->extern_vars, &ext_var->extern_entry); ++ list_add_tail(&func->extern_vars, &ext_var->extern_entry); + + return ext_var; + } + +-static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, ++static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, + uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; +@@ -375,7 +375,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + struct hlsl_ir_var *input; + struct hlsl_ir_load *load; + +- if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, ++ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, + semantic_index + i, false, loc))) + return; + +@@ -408,8 +408,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + } + } + +-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &lhs->node.loc; + struct hlsl_type *type = lhs->node.data_type; +@@ -466,30 +466,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + return; + list_add_after(&c->entry, &element_load->node.entry); + +- prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index); ++ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); + } + } + else + { +- prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); ++ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); + } + } + + /* Split inputs into two variables representing the semantic and temp registers, + * and copy the former to the latter, so that writes to input variables work. */ +-static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) ++static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- list_add_head(&block->instrs, &load->node.entry); ++ list_add_head(&func->body.instrs, &load->node.entry); + +- prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + +-static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, ++static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, + uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = rhs->node.data_type, *vector_type; +@@ -517,18 +517,19 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + struct hlsl_ir_var *output; + struct hlsl_ir_load *load; + +- if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) ++ if (!(output = add_semantic_var(ctx, func, var, vector_type, ++ modifiers, semantic, semantic_index + i, true, loc))) + return; + + if (type->class == HLSL_CLASS_MATRIX) + { + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- hlsl_block_add_instr(block, c); ++ hlsl_block_add_instr(&func->body, c); + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) + return; +- hlsl_block_add_instr(block, &load->node); ++ hlsl_block_add_instr(&func->body, &load->node); + } + else + { +@@ -536,17 +537,17 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) + return; +- hlsl_block_add_instr(block, &load->node); ++ hlsl_block_add_instr(&func->body, &load->node); + } + + if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) + return; +- hlsl_block_add_instr(block, store); ++ hlsl_block_add_instr(&func->body, store); + } + } + +-static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &rhs->node.loc; + struct hlsl_type *type = rhs->node.data_type; +@@ -580,34 +581,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- hlsl_block_add_instr(block, c); ++ hlsl_block_add_instr(&func->body, c); + + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; +- hlsl_block_add_instr(block, &element_load->node); ++ hlsl_block_add_instr(&func->body, &element_load->node); + +- append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); ++ append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { +- append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); ++ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); + } + } + + /* Split outputs into two variables representing the temp and semantic + * registers, and copy the former to the latter, so that reads from output + * variables work. */ +-static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) ++static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- hlsl_block_add_instr(block, &load->node); ++ hlsl_block_add_instr(&func->body, &load->node); + +- append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +@@ -1655,11 +1656,16 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_MATRIX: case HLSL_CLASS_ARRAY: case HLSL_CLASS_STRUCT: @@ -7270,7 +9466,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: -@@ -4050,12 +4055,14 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -4050,12 +4056,14 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) switch (instr->type) { case HLSL_IR_CONSTANT: @@ -7285,7 +9481,51 @@ index 92b5c71c43f..c5dd5e71e02 100644 if (list_empty(&instr->uses)) { list_remove(&instr->entry); -@@ -4337,6 +4344,10 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -4106,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context) + } + } + +-static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, ++static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + struct hlsl_ir_node *instr) + { + if (!deref->rel_offset.node) +@@ -4119,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + return true; + } + ++void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++{ ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ var->indexable = false; ++ } ++ ++ transform_derefs(ctx, mark_indexable_var, &entry_func->body); ++} ++ + static char get_regset_name(enum hlsl_regset regset) + { + switch (regset) +@@ -4135,11 +4157,11 @@ static char get_regset_name(enum hlsl_regset regset) + vkd3d_unreachable(); + } + +-static void allocate_register_reservations(struct hlsl_ctx *ctx) ++static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars) + { + struct hlsl_ir_var *var; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry) + { + const struct hlsl_reg_reservation *reservation = &var->reg_reservation; + unsigned int r; +@@ -4337,11 +4359,23 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_CONSTANT: case HLSL_IR_STRING_CONSTANT: break; @@ -7296,7 +9536,57 @@ index 92b5c71c43f..c5dd5e71e02 100644 } } } -@@ -4816,7 +4827,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + +-static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++static void init_var_liveness(struct hlsl_ir_var *var) ++{ ++ if (var->is_uniform || var->is_input_semantic) ++ var->first_write = 1; ++ else if (var->is_output_semantic) ++ var->last_read = UINT_MAX; ++} ++ ++void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { + struct hlsl_scope *scope; + struct hlsl_ir_var *var; +@@ -4355,16 +4389,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform || var->is_input_semantic) +- var->first_write = 1; +- else if (var->is_output_semantic) +- var->last_read = UINT_MAX; +- } ++ init_var_liveness(var); ++ ++ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) ++ init_var_liveness(var); + + compute_liveness_recurse(&entry_func->body, 0, 0); + } + ++static void mark_vars_usage(struct hlsl_ctx *ctx) ++{ ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->last_read) ++ var->is_read = true; ++ } ++ } ++} ++ + struct register_allocator + { + struct allocation +@@ -4816,7 +4863,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } } @@ -7306,7 +9596,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_constant_register *reg; -@@ -4838,6 +4850,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, +@@ -4838,6 +4886,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, memset(reg, 0, sizeof(*reg)); reg->index = component_index / 4; reg->value.f[component_index % 4] = f; @@ -7314,7 +9604,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 } static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, -@@ -4898,7 +4911,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, +@@ -4898,7 +4947,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, vkd3d_unreachable(); } @@ -7323,7 +9613,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 } break; -@@ -4991,17 +5004,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -4991,17 +5040,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); @@ -7349,16 +9639,94 @@ index 92b5c71c43f..c5dd5e71e02 100644 return; } -@@ -5128,7 +5141,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - if (ctx->profile->major_version < 4) +@@ -5081,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + * index to all (simultaneously live) variables or intermediate values. Agnostic + * as to how many registers are actually available for the current backend, and + * does not handle constants. */ +-static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { + struct register_allocator allocator = {0}; ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ ++ /* Reset variable temp register allocations. */ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) ++ memset(var->regs, 0, sizeof(var->regs)); ++ } ++ } + + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) +@@ -5092,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio + + for (i = 0; i < entry_func->parameters.count; ++i) + { +- const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; +- ++ var = entry_func->parameters.vars[i]; + if (var->is_output_semantic) + { + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); +@@ -5103,11 +5163,13 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio + } + + allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); +- ctx->temp_count = allocator.reg_count; + vkd3d_free(allocator.allocations); ++ ++ return allocator.reg_count; + } + +-static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) ++static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, ++ unsigned int *counter, bool output, bool is_patch_constant_func) + { + static const char *const shader_names[] = { - struct vkd3d_shader_version version; +@@ -5120,27 +5182,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + }; + + enum vkd3d_shader_register_type type; ++ struct vkd3d_shader_version version; + uint32_t reg; + bool builtin; + + VKD3D_ASSERT(var->semantic.name); + +- if (ctx->profile->major_version < 4) ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ ++ if (version.major < 4) + { +- struct vkd3d_shader_version version; - D3DDECLUSAGE usage; + enum vkd3d_decl_usage usage; uint32_t usage_idx; /* ps_1_* outputs are special and go in temp register 0. */ -@@ -5152,10 +5165,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +- if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL) + return; + +- version.major = ctx->profile->major_version; +- version.minor = ctx->profile->minor_version; +- version.type = ctx->profile->type; +- builtin = hlsl_sm1_register_from_semantic(&version, ++ builtin = sm1_register_from_semantic_name(&version, + var->semantic.name, var->semantic.index, output, &type, ®); +- if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) ++ if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -5152,22 +5215,24 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } else { @@ -7367,11 +9735,46 @@ index 92b5c71c43f..c5dd5e71e02 100644 bool has_idx; - if (!hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage)) -+ if (!sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output)) ++ if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, ++ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); -@@ -5195,7 +5208,8 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) + return; + } +- if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, &has_idx))) ++ ++ if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) + reg = has_idx ? var->semantic.index : 0; + } + + if (builtin) + { +- TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type], ++ TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type], + output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); + } + else +@@ -5181,21 +5246,23 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + } + } + +-static void allocate_semantic_registers(struct hlsl_ctx *ctx) ++static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { ++ bool is_patch_constant_func = entry_func == ctx->patch_constant_func; + unsigned int input_counter = 0, output_counter = 0; + struct hlsl_ir_var *var; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_input_semantic) +- allocate_semantic_register(ctx, var, &input_counter, false); ++ allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); + if (var->is_output_semantic) +- allocate_semantic_register(ctx, var, &output_counter, true); ++ allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); } } @@ -7381,7 +9784,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 { const struct hlsl_buffer *buffer; -@@ -5203,7 +5217,12 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 +@@ -5203,7 +5270,12 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 { if (buffer->reservation.reg_type == 'b' && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) @@ -7394,7 +9797,16 @@ index 92b5c71c43f..c5dd5e71e02 100644 } return NULL; } -@@ -5386,8 +5405,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -5260,7 +5332,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va + + TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); + buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); +- if (var->last_read) ++ if (var->is_read) + buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); + } + +@@ -5386,8 +5458,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (reservation->reg_type == 'b') { @@ -7405,7 +9817,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 unsigned int max_index = get_max_cbuffer_reg_index(ctx); if (buffer->reservation.reg_index > max_index) -@@ -5395,14 +5414,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -5395,14 +5467,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx) "Buffer reservation cb%u exceeds target's maximum (cb%u).", buffer->reservation.reg_index, max_index); @@ -7423,7 +9835,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 } buffer->reg.space = reservation->reg_space; -@@ -5419,12 +5438,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -5419,12 +5491,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx) else if (!reservation->reg_type) { unsigned int max_index = get_max_cbuffer_reg_index(ctx); @@ -7438,7 +9850,26 @@ index 92b5c71c43f..c5dd5e71e02 100644 buffer->reg.space = 0; buffer->reg.index = index; -@@ -5786,6 +5805,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere +@@ -5491,15 +5563,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + return NULL; + } + +-static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) ++static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset) + { + char regset_name = get_regset_name(regset); + uint32_t min_index = 0, id = 0; + struct hlsl_ir_var *var; + +- if (regset == HLSL_REGSET_UAVS) ++ if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") + || !ascii_strcasecmp(var->semantic.name, "sv_target"))) +@@ -5786,6 +5858,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere return ret; } @@ -7465,7 +9896,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) { unsigned int i; -@@ -5834,6 +5873,263 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a +@@ -5834,6 +5926,263 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } } @@ -7729,7 +10160,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct hlsl_ir_node *instr, *next; -@@ -5890,12 +6186,16 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod +@@ -5890,12 +6239,16 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod } } @@ -7747,104 +10178,217 @@ index 92b5c71c43f..c5dd5e71e02 100644 lower_ir(ctx, lower_broadcasts, body); while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -@@ -5956,8 +6256,8 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - if (!hlsl_sm1_register_from_semantic(&program->shader_version, - var->semantic.name, var->semantic.index, output, &type, ®ister_index)) - { -+ enum vkd3d_decl_usage usage; - unsigned int usage_idx; -- D3DDECLUSAGE usage; - bool ret; - - register_index = var->regs[HLSL_REGSET_NUMERIC].id; -@@ -5969,7 +6269,7 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - * SV_Position; the closer equivalent is VPOS, which is not declared - * as a semantic. */ - if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -- && output && usage == D3DDECLUSAGE_POSITION) -+ && output && usage == VKD3D_DECL_USAGE_POSITION) - sysval = VKD3D_SHADER_SV_POSITION; - } - mask = (1 << var->data_type->dimx) - 1; -@@ -6006,101 +6306,1036 @@ static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_progra - } +@@ -5928,22 +6281,104 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + } while (progress); } --/* OBJECTIVE: Translate all the information from ctx and entry_func to the -- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -- * without relying on ctx and entry_func. */ --static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -- uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) +-static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, +- struct vsir_program *program, bool output, struct hlsl_ir_var *var) ++static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct shader_signature *signature, bool output, struct hlsl_ir_var *var) { -- struct vkd3d_shader_version version = {0}; -- struct vkd3d_bytecode_buffer buffer = {0}; -- -- version.major = ctx->profile->major_version; -- version.minor = ctx->profile->minor_version; -- version.type = ctx->profile->type; -- if (!vsir_program_init(program, NULL, &version, 0)) -- { -- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -- return; -- } -- -- write_sm1_uniforms(ctx, &buffer); -- if (buffer.status) -- { -- vkd3d_free(buffer.data); -- ctx->result = buffer.status; -- return; -- } -- ctab->code = buffer.data; -- ctab->size = buffer.size; -+ uint32_t swizzle; + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ enum vkd3d_shader_component_type component_type; ++ unsigned int register_index, mask, use_mask; ++ const char *name = var->semantic.name; + enum vkd3d_shader_register_type type; +- struct shader_signature *signature; + struct signature_element *element; +- unsigned int register_index, mask; -- sm1_generate_vsir_signature(ctx, program); +- if ((!output && !var->last_read) || (output && !var->first_write)) +- return; ++ if (hlsl_version_ge(ctx, 4, 0)) ++ { ++ struct vkd3d_string_buffer *string; ++ bool has_idx, ret; + +- if (output) +- signature = &program->output_signature; ++ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ++ ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, ++ output, signature == &program->patch_constant_signature); ++ VKD3D_ASSERT(ret); ++ if (sysval == ~0u) ++ return; ++ ++ if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) ++ { ++ register_index = has_idx ? var->semantic.index : ~0u; ++ } ++ else ++ { ++ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ } ++ ++ /* NOTE: remember to change this to the actually allocated mask once ++ * we start optimizing interstage signatures. */ ++ mask = (1u << var->data_type->dimx) - 1; ++ use_mask = mask; /* FIXME: retrieve use mask accurately. */ ++ ++ switch (var->data_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ break; ++ ++ case HLSL_TYPE_INT: ++ component_type = VKD3D_SHADER_COMPONENT_INT; ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ component_type = VKD3D_SHADER_COMPONENT_UINT; ++ break; ++ ++ default: ++ if ((string = hlsl_type_to_string(ctx, var->data_type))) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Invalid data type %s for semantic variable %s.", string->buffer, var->name); ++ hlsl_release_string_buffer(ctx, string); ++ component_type = VKD3D_SHADER_COMPONENT_VOID; ++ break; ++ } ++ ++ if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) ++ name = "SV_Target"; ++ else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) ++ name ="SV_Depth"; ++ else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) ++ name = "SV_Position"; ++ } + else +- signature = &program->input_signature; ++ { ++ if ((!output && !var->last_read) || (output && !var->first_write)) ++ return; ++ ++ if (!sm1_register_from_semantic_name(&program->shader_version, ++ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ { ++ enum vkd3d_decl_usage usage; ++ unsigned int usage_idx; ++ bool ret; ++ ++ register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ ++ ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx); ++ VKD3D_ASSERT(ret); ++ /* With the exception of vertex POSITION output, none of these are ++ * system values. Pixel POSITION input is not equivalent to ++ * SV_Position; the closer equivalent is VPOS, which is not declared ++ * as a semantic. */ ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ } ++ ++ mask = (1 << var->data_type->dimx) - 1; ++ use_mask = mask; /* FIXME: retrieve use mask accurately. */ ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ } + + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, + signature->element_count + 1, sizeof(*signature->elements))) +@@ -5952,73 +6387,986 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, + return; + } + element = &signature->elements[signature->element_count++]; ++ memset(element, 0, sizeof(*element)); + +- if (!hlsl_sm1_register_from_semantic(&program->shader_version, +- var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ if (!(element->semantic_name = vkd3d_strdup(name))) + { +- unsigned int usage_idx; +- D3DDECLUSAGE usage; +- bool ret; +- +- register_index = var->regs[HLSL_REGSET_NUMERIC].id; +- +- ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); +- VKD3D_ASSERT(ret); +- /* With the exception of vertex POSITION output, none of these are +- * system values. Pixel POSITION input is not equivalent to +- * SV_Position; the closer equivalent is VPOS, which is not declared +- * as a semantic. */ +- if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX +- && output && usage == D3DDECLUSAGE_POSITION) +- sysval = VKD3D_SHADER_SV_POSITION; +- } +- mask = (1 << var->data_type->dimx) - 1; ++ --signature->element_count; ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element->semantic_index = var->semantic.index; ++ element->sysval_semantic = sysval; ++ element->component_type = component_type; ++ element->register_index = register_index; ++ element->target_location = register_index; ++ element->register_count = 1; ++ element->mask = mask; ++ element->used_mask = use_mask; ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) ++ element->interpolation_mode = VKD3DSIM_LINEAR; ++} ++ ++static void generate_vsir_signature(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_function_decl *func) ++{ ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (func == ctx->patch_constant_func) ++ { ++ generate_vsir_signature_entry(ctx, program, ++ &program->patch_constant_signature, var->is_output_semantic, var); ++ } ++ else ++ { ++ if (var->is_input_semantic) ++ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); ++ if (var->is_output_semantic) ++ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); ++ } ++ } ++} ++ ++static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) ++{ ++ uint32_t swizzle; ++ + swizzle = hlsl_swizzle_from_writemask(src_writemask); + swizzle = hlsl_map_swizzle(swizzle, dst_writemask); + swizzle = vsir_swizzle_from_hlsl(swizzle); + return swizzle; - } - --static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -- struct hlsl_block **found_block) ++} ++ +static void sm1_generate_vsir_constant_defs(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_block *block) - { -- struct hlsl_ir_node *node; ++{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int i, x; - -- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ + for (i = 0; i < ctx->constant_defs.count; ++i) - { -- if (node == stop_point) -- return NULL; ++ { + const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; - -- if (node->type == HLSL_IR_IF) ++ + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { -- struct hlsl_ir_if *iff = hlsl_ir_if(node); -- struct hlsl_ir_jump *jump = NULL; -- -- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) -- return jump; -- if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) -- return jump; ++ { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; - } -- else if (node->type == HLSL_IR_JUMP) ++ } + + ins = &instructions->elements[instructions->count]; + if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, VKD3DSIH_DEF, 1, 1)) - { -- struct hlsl_ir_jump *jump = hlsl_ir_jump(node); ++ { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } @@ -7881,8 +10425,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 + enum hlsl_sampler_dim sampler_dim; + struct hlsl_ir_var *var; + unsigned int i, count; - -- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) ++ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) @@ -7892,48 +10435,28 @@ index 92b5c71c43f..c5dd5e71e02 100644 + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - { -- *found_block = block; -- return jump; -- } -- } -- } ++ { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; - -- return NULL; --} ++ + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_2D: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; + break; - --static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) --{ -- /* Always use the explicit limit if it has been passed. */ -- if (loop->unroll_limit) -- return loop->unroll_limit; ++ + case HLSL_SAMPLER_DIM_CUBE: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; + break; - -- /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ -- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -- return 1024; ++ + case HLSL_SAMPLER_DIM_3D: + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_3D; + break; - -- /* SM4 limits implicit unrolling to 254 iterations. */ -- if (hlsl_version_ge(ctx, 4, 0)) -- return 254; ++ + case HLSL_SAMPLER_DIM_GENERIC: + /* These can appear in sm4-style combined sample instructions. */ + hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered."); + continue; - -- /* SM<3 implicitly unrolls up to 1024 iterations. */ -- return 1024; ++ + default: + vkd3d_unreachable(); + break; @@ -7967,26 +10490,18 @@ index 92b5c71c43f..c5dd5e71e02 100644 + } + } + } - } - --static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -- struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) ++} ++ +static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( + struct hlsl_ctx *ctx, struct vsir_program *program, + const struct vkd3d_shader_location *loc, enum vkd3d_shader_opcode opcode, + unsigned int dst_count, unsigned int src_count) - { -- unsigned int max_iterations, i; -- -- max_iterations = loop_unrolling_get_max_iterations(ctx, loop); ++{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *ins; - -- for (i = 0; i < max_iterations; ++i) ++ + if (!shader_instruction_array_reserve(instructions, instructions->count + 1)) - { -- struct hlsl_block tmp_dst, *jump_block; -- struct hlsl_ir_jump *jump = NULL; ++ { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } @@ -8409,7 +10924,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 + type = VKD3DSPR_TEMP; + register_index = 0; + } -+ else if (!hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, ++ else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, + deref->var->semantic.index, true, &type, ®ister_index)) + { + VKD3D_ASSERT(reg.allocated); @@ -8439,9 +10954,13 @@ index 92b5c71c43f..c5dd5e71e02 100644 + uint32_t register_index; + unsigned int writemask; + struct hlsl_reg reg; -+ + +- memset(element, 0, sizeof(*element)); +- if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) + if (hlsl_type_is_resource(deref->var->data_type)) -+ { + { +- --signature->element_count; +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + unsigned int sampler_offset; + + type = VKD3DSPR_COMBINED_SAMPLER; @@ -8464,7 +10983,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; -+ if (hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, ++ if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, + deref->var->semantic.index, false, &type, ®ister_index)) + { + writemask = (1 << deref->var->data_type->dimx) - 1; @@ -8506,7 +11025,7 @@ index 92b5c71c43f..c5dd5e71e02 100644 + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return; + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); @@ -8587,18 +11106,31 @@ index 92b5c71c43f..c5dd5e71e02 100644 + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = ddy->reg.id; + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL); -+ } -+} -+ + } +- element->semantic_index = var->semantic.index; +- element->sysval_semantic = sysval; +- element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; +- element->register_index = register_index; +- element->target_location = register_index; +- element->register_count = 1; +- element->mask = mask; +- element->used_mask = mask; +- if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) +- element->interpolation_mode = VKD3DSIM_LINEAR; + } + +-static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) +static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_swizzle *swizzle_instr) -+{ + { +- struct hlsl_ir_var *var; + struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; -+ + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) @@ -8648,7 +11180,11 @@ index 92b5c71c43f..c5dd5e71e02 100644 + struct vkd3d_shader_instruction *ins; + + if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) -+ { + { +- if (var->is_input_semantic) +- sm1_generate_vsir_signature_entry(ctx, program, false, var); +- if (var->is_output_semantic) +- sm1_generate_vsir_signature_entry(ctx, program, true, var); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) + return; + @@ -8766,39 +11302,33 @@ index 92b5c71c43f..c5dd5e71e02 100644 + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + break; + } -+ } -+} -+ -+/* OBJECTIVE: Translate all the information from ctx and entry_func to the -+ * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -+ * without relying on ctx and entry_func. */ -+static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+{ -+ struct vkd3d_shader_version version = {0}; -+ struct vkd3d_bytecode_buffer buffer = {0}; + } + } + +-/* OBJECTIVE: Translate all the information from ctx and entry_func to the +- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() +- * without relying on ctx and entry_func. */ + static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) + { + struct vkd3d_shader_version version = {0}; + struct vkd3d_bytecode_buffer buffer = {0}; + struct hlsl_block block; -+ -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ write_sm1_uniforms(ctx, &buffer); -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ ctab->code = buffer.data; -+ ctab->size = buffer.size; -+ -+ sm1_generate_vsir_signature(ctx, program); + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +- if (!vsir_program_init(program, NULL, &version, 0)) ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; +@@ -6034,7 +7382,37 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + ctab->code = buffer.data; + ctab->size = buffer.size; + +- sm1_generate_vsir_signature(ctx, program); ++ generate_vsir_signature(ctx, program, entry_func); + + hlsl_block_init(&block); + sm1_generate_vsir_constant_defs(ctx, program, &block); @@ -8808,77 +11338,115 @@ index 92b5c71c43f..c5dd5e71e02 100644 + sm1_generate_vsir_block(ctx, &entry_func->body, program); +} + -+static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -+ struct hlsl_block **found_block) ++/* OBJECTIVE: Translate all the information from ctx and entry_func to the ++ * vsir_program, so it can be used as input to tpf_compile() without relying ++ * on ctx and entry_func. */ ++static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ uint64_t config_flags, struct vsir_program *program) +{ -+ struct hlsl_ir_node *node; ++ struct vkd3d_shader_version version = {0}; + -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) + { -+ if (node == stop_point) -+ return NULL; -+ -+ if (node->type == HLSL_IR_IF) -+ { -+ struct hlsl_ir_if *iff = hlsl_ir_if(node); -+ struct hlsl_ir_jump *jump = NULL; -+ -+ if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) -+ return jump; -+ if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) -+ return jump; -+ } -+ else if (node->type == HLSL_IR_JUMP) -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); -+ -+ if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) -+ { -+ *found_block = block; -+ return jump; -+ } -+ } ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; + } + -+ return NULL; -+} -+ -+static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) -+{ -+ /* Always use the explicit limit if it has been passed. */ -+ if (loop->unroll_limit) -+ return loop->unroll_limit; -+ -+ /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ return 1024; -+ -+ /* SM4 limits implicit unrolling to 254 iterations. */ -+ if (hlsl_version_ge(ctx, 4, 0)) -+ return 254; -+ -+ /* SM<3 implicitly unrolls up to 1024 iterations. */ -+ return 1024; -+} -+ -+static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) -+{ -+ unsigned int max_iterations, i; -+ -+ max_iterations = loop_unrolling_get_max_iterations(ctx, loop); -+ -+ for (i = 0; i < max_iterations; ++i) -+ { -+ struct hlsl_block tmp_dst, *jump_block; -+ struct hlsl_ir_jump *jump = NULL; ++ generate_vsir_signature(ctx, program, entry_func); ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ generate_vsir_signature(ctx, program, ctx->patch_constant_func); + } - if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) - return false; -@@ -6406,18 +7641,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - append_output_var_copy(ctx, body, entry_func->return_var); + static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, +@@ -6337,16 +7715,23 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru + return true; + } + +-int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +- enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) ++static void process_entry_function(struct hlsl_ctx *ctx, ++ const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) + { + const struct hlsl_profile_info *profile = ctx->profile; ++ struct hlsl_block static_initializers, global_uniforms; + struct hlsl_block *const body = &entry_func->body; + struct recursive_call_ctx recursive_call_ctx; + struct hlsl_ir_var *var; + unsigned int i; + +- list_move_head(&body->instrs, &ctx->static_initializers.instrs); ++ if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers)) ++ return; ++ list_move_head(&body->instrs, &static_initializers.instrs); ++ ++ if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block)) ++ return; ++ list_move_head(&body->instrs, &global_uniforms.instrs); + + memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); + hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); +@@ -6355,7 +7740,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + /* Avoid going into an infinite loop when processing call instructions. + * lower_return() recurses into inferior calls. */ + if (ctx->result) +- return ctx->result; ++ return; + + if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) + lower_ir(ctx, lower_f16tof32, body); +@@ -6367,20 +7752,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_matrix_swizzles, body); + lower_ir(ctx, lower_index_loads, body); + +- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) +- { +- if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) +- prepend_uniform_copy(ctx, body, var); +- } +- + for (i = 0; i < entry_func->parameters.count; ++i) + { + var = entry_func->parameters.vars[i]; + +- if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (hlsl_type_is_resource(var->data_type)) + { + prepend_uniform_copy(ctx, body, var); + } ++ else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ { ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Patch constant function parameter \"%s\" cannot be uniform.", var->name); ++ else ++ prepend_uniform_copy(ctx, body, var); ++ } + else + { + if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT +@@ -6392,9 +7779,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } + + if (var->storage_modifiers & HLSL_STORAGE_IN) +- prepend_input_var_copy(ctx, body, var); ++ prepend_input_var_copy(ctx, entry_func, var); + if (var->storage_modifiers & HLSL_STORAGE_OUT) +- append_output_var_copy(ctx, body, var); ++ append_output_var_copy(ctx, entry_func, var); + } } + if (entry_func->return_var) +@@ -6403,24 +7790,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); +- append_output_var_copy(ctx, body, entry_func->return_var); +- } +- - for (i = 0; i < entry_func->attr_count; ++i) - { - const struct hlsl_attribute *attr = entry_func->attrs[i]; @@ -8888,19 +11456,97 @@ index 92b5c71c43f..c5dd5e71e02 100644 - else - hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE, - "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name); -- } ++ append_output_var_copy(ctx, entry_func, entry_func->return_var); + } + +- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) +- hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, +- "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); +- + if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); +@@ -6496,29 +7868,69 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + while (hlsl_transform_ir(ctx, dce, body, NULL)); + + compute_liveness(ctx, entry_func); ++ mark_vars_usage(ctx); + +- if (TRACE_ON()) +- rb_for_each_entry(&ctx->functions, dump_function, ctx); ++ calculate_resource_register_counts(ctx); + +- transform_derefs(ctx, mark_indexable_vars, body); ++ allocate_register_reservations(ctx, &ctx->extern_vars); ++ allocate_register_reservations(ctx, &entry_func->extern_vars); ++ allocate_semantic_registers(ctx, entry_func); ++} + +- calculate_resource_register_counts(ctx); ++int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) ++{ ++ const struct hlsl_profile_info *profile = ctx->profile; ++ struct hlsl_block global_uniform_block; ++ struct hlsl_ir_var *var; ++ + parse_entry_function_attributes(ctx, entry_func); + if (ctx->result) + return ctx->result; -- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) +- allocate_register_reservations(ctx); + if (profile->type == VKD3D_SHADER_TYPE_HULL) + validate_hull_shader_attributes(ctx, entry_func); + else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, ++ "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); ++ ++ hlsl_block_init(&global_uniform_block); ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) ++ prepend_uniform_copy(ctx, &global_uniform_block, var); ++ } ++ ++ process_entry_function(ctx, &global_uniform_block, entry_func); ++ if (ctx->result) ++ return ctx->result; ++ ++ if (profile->type == VKD3D_SHADER_TYPE_HULL) ++ { ++ process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func); ++ if (ctx->result) ++ return ctx->result; ++ } ++ ++ hlsl_block_cleanup(&global_uniform_block); -@@ -6540,7 +7770,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +- allocate_temp_registers(ctx, entry_func); + if (profile->major_version < 4) + { ++ mark_indexable_vars(ctx, entry_func); ++ allocate_temp_registers(ctx, entry_func); + allocate_const_registers(ctx, entry_func); + } + else + { + allocate_buffers(ctx); +- allocate_objects(ctx, HLSL_REGSET_TEXTURES); +- allocate_objects(ctx, HLSL_REGSET_UAVS); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); + } +- allocate_semantic_registers(ctx); +- allocate_objects(ctx, HLSL_REGSET_SAMPLERS); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); ++ ++ if (TRACE_ON()) ++ rb_for_each_entry(&ctx->functions, dump_function, ctx); + + if (ctx->result) + return ctx->result; +@@ -6540,14 +7952,29 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry return ctx->result; } @@ -8909,6 +11555,29 @@ index 92b5c71c43f..c5dd5e71e02 100644 vsir_program_cleanup(&program); vkd3d_shader_free_shader_code(&ctab); return result; + } + + case VKD3D_SHADER_TARGET_DXBC_TPF: +- return hlsl_sm4_write(ctx, entry_func, out); ++ { ++ uint32_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vsir_program program; ++ int result; ++ ++ sm4_generate_vsir(ctx, entry_func, config_flags, &program); ++ if (ctx->result) ++ { ++ vsir_program_cleanup(&program); ++ return ctx->result; ++ } ++ ++ result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); ++ vsir_program_cleanup(&program); ++ return result; ++ } + + default: + ERR("Unsupported shader target type %#x.\n", target_type); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index db4913b7c62..716adb15f08 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -8952,7 +11621,7 @@ index db4913b7c62..716adb15f08 100644 break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 747238e2fee..6a74e2eb8de 100644 +index 747238e2fee..b47f12d2188 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -19,6 +19,15 @@ @@ -8971,24 +11640,27 @@ index 747238e2fee..6a74e2eb8de 100644 static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) { -@@ -65,7 +74,7 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil +@@ -65,7 +74,8 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil } bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve) -+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type) ++ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, ++ bool normalised_io) { memset(program, 0, sizeof(*program)); -@@ -87,6 +96,7 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c +@@ -87,6 +97,9 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c } program->shader_version = *version; + program->cf_type = cf_type; ++ program->normalised_io = normalised_io; ++ program->normalised_hull_cp_io = normalised_io; return shader_instruction_array_init(&program->instructions, reserve); } -@@ -117,26 +127,145 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( +@@ -117,26 +130,156 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( return NULL; } @@ -9075,6 +11747,12 @@ index 747238e2fee..6a74e2eb8de 100644 + src->reg.idx[0].offset = idx; +} + ++static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ src->reg.idx[0].offset = idx; ++} ++ +static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); @@ -9116,6 +11794,12 @@ index 747238e2fee..6a74e2eb8de 100644 + dst->reg.idx[0].offset = idx; +} + ++static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ dst->reg.idx[0].offset = idx; ++} ++ +static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); @@ -9126,7 +11810,6 @@ index 747238e2fee..6a74e2eb8de 100644 +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + dst->reg.idx[0].offset = idx; -+ dst->write_mask = VKD3DSP_WRITEMASK_0; +} + +void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, @@ -9142,7 +11825,7 @@ index 747238e2fee..6a74e2eb8de 100644 struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) { -@@ -161,6 +290,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, +@@ -161,6 +304,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } @@ -9180,7 +11863,7 @@ index 747238e2fee..6a74e2eb8de 100644 static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, enum vkd3d_shader_opcode *opcode, bool *requires_swap) { -@@ -441,10 +601,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog +@@ -441,10 +615,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog return VKD3D_OK; } @@ -9240,7 +11923,16 @@ index 747238e2fee..6a74e2eb8de 100644 unsigned int tmp_idx = ~0u, i; enum vkd3d_result ret; -@@ -481,6 +689,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr +@@ -473,6 +695,8 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + case VKD3DSIH_DCL_CONSTANT_BUFFER: + case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: ++ case VKD3DSIH_DCL_THREAD_GROUP: ++ case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_shader_instruction_make_nop(ins); + break; + +@@ -481,6 +705,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return ret; break; @@ -9279,7 +11971,16 @@ index 747238e2fee..6a74e2eb8de 100644 default: break; } -@@ -541,9 +781,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( +@@ -523,7 +779,7 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i + } + + for (i = 0; i < ins->dst_count; ++i) +- shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); ++ shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); + } + + static const struct vkd3d_shader_varying_map *find_varying_map( +@@ -541,9 +797,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( } static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, @@ -9293,7 +11994,7 @@ index 747238e2fee..6a74e2eb8de 100644 struct shader_signature *signature = &program->output_signature; const struct vkd3d_shader_varying_map_info *varying_map; unsigned int i; -@@ -727,147 +969,23 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali +@@ -727,144 +985,20 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali return VKD3D_OK; } @@ -9342,10 +12043,7 @@ index 747238e2fee..6a74e2eb8de 100644 - param->modifiers = VKD3DSPDM_NONE; - param->shift = 0; -} -+ flattener.phase = VKD3DSIH_INVALID; -+ for (i = 0, locations.count = 0; i < instructions->count; ++i) -+ flattener_eliminate_phase_related_dcls(&flattener, i, &locations); - +- -void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) -{ - vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); @@ -9446,15 +12144,13 @@ index 747238e2fee..6a74e2eb8de 100644 - flattener.phase = VKD3DSIH_INVALID; - for (i = 0, locations.count = 0; i < instructions->count; ++i) - flattener_eliminate_phase_related_dcls(&flattener, i, &locations); -- -- if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) -- return result; -+ if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) -+ return result; ++ flattener.phase = VKD3DSIH_INVALID; ++ for (i = 0, locations.count = 0; i < instructions->count; ++i) ++ flattener_eliminate_phase_related_dcls(&flattener, i, &locations); - if (flattener.phase != VKD3DSIH_INVALID) - { -@@ -876,7 +994,7 @@ static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd + if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) + return result; +@@ -876,7 +1010,7 @@ static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); } @@ -9463,7 +12159,7 @@ index 747238e2fee..6a74e2eb8de 100644 return result; } -@@ -892,9 +1010,9 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont +@@ -892,9 +1026,9 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } @@ -9475,7 +12171,7 @@ index 747238e2fee..6a74e2eb8de 100644 struct vkd3d_shader_src_param *rel_addr; if (instructions->outpointid_param) -@@ -991,7 +1109,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p +@@ -991,7 +1125,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p } static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( @@ -9484,11 +12180,19 @@ index 747238e2fee..6a74e2eb8de 100644 { struct vkd3d_shader_instruction_array *instructions; struct control_point_normaliser normaliser; -@@ -1001,12 +1119,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i +@@ -1001,12 +1135,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i enum vkd3d_result ret; unsigned int i, j; - if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) ++ VKD3D_ASSERT(!program->normalised_hull_cp_io); ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) ++ { ++ program->normalised_hull_cp_io = true; ++ return VKD3D_OK; ++ } ++ + if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program))) { ERR("Failed to allocate src param.\n"); @@ -9499,12 +12203,13 @@ index 747238e2fee..6a74e2eb8de 100644 instructions = &normaliser.instructions; normaliser.phase = VKD3DSIH_INVALID; -@@ -1043,22 +1161,22 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i +@@ -1043,22 +1185,25 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i input_control_point_count = ins->declaration.count; break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; ++ program->normalised_hull_cp_io = true; return VKD3D_OK; case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -9515,6 +12220,7 @@ index 747238e2fee..6a74e2eb8de 100644 input_control_point_count, i, &location); - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; ++ program->normalised_hull_cp_io = true; return ret; default: break; @@ -9523,10 +12229,11 @@ index 747238e2fee..6a74e2eb8de 100644 - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; ++ program->normalised_hull_cp_io = true; return VKD3D_OK; } -@@ -1098,18 +1216,20 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * +@@ -1098,36 +1243,35 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } @@ -9535,8 +12242,9 @@ index 747238e2fee..6a74e2eb8de 100644 +static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) { +- unsigned int i, base_write_mask; + const struct signature_element *e; - unsigned int i, base_write_mask; ++ unsigned int i; for (i = 0; i < signature->element_count; ++i) { @@ -9552,13 +12260,14 @@ index 747238e2fee..6a74e2eb8de 100644 } } -@@ -1119,15 +1239,20 @@ static unsigned int shader_signature_find_element_for_reg(const struct shader_si - reg_idx, write_mask); - base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); - if (base_write_mask != write_mask) +- /* Validated in the TPF reader, but failure in signature_element_range_expand_mask() +- * can land us here on an unmatched vector mask. */ +- FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n", +- reg_idx, write_mask); +- base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); +- if (base_write_mask != write_mask) - return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask); -+ return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask, element_idx); - +- - vkd3d_unreachable(); + return false; } @@ -9576,7 +12285,7 @@ index 747238e2fee..6a74e2eb8de 100644 } static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], -@@ -1181,9 +1306,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, +@@ -1181,9 +1325,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, { const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; const struct vkd3d_shader_register *reg = &range->dst.reg; @@ -9588,7 +12297,7 @@ index 747238e2fee..6a74e2eb8de 100644 switch (reg->type) { -@@ -1215,9 +1341,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, +@@ -1215,9 +1360,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, reg_idx = reg->idx[reg->idx_count - 1].offset; write_mask = range->dst.write_mask; @@ -9600,7 +12309,17 @@ index 747238e2fee..6a74e2eb8de 100644 } static int signature_element_mask_compare(const void *a, const void *b) -@@ -1388,6 +1513,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map +@@ -1345,6 +1489,9 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map + return false; + memcpy(elements, s->elements, element_count * sizeof(*elements)); + ++ for (i = 0; i < element_count; ++i) ++ elements[i].sort_index = i; ++ + qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); + + for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) +@@ -1388,6 +1535,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map else e->interpolation_mode = f->interpolation_mode; } @@ -9609,7 +12328,7 @@ index 747238e2fee..6a74e2eb8de 100644 } } element_count = new_count; -@@ -1415,6 +1542,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map +@@ -1415,6 +1564,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); e->register_count = register_count; e->mask = signature_element_range_expand_mask(e, register_count, range_map); @@ -9622,7 +12341,17 @@ index 747238e2fee..6a74e2eb8de 100644 } } element_count = new_count; -@@ -1530,7 +1663,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par +@@ -1518,6 +1673,9 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + break; + + case VKD3DSPR_RASTOUT: ++ /* Leave point size as a system value for the backends to consume. */ ++ if (reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) ++ return true; + reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; + signature = normaliser->output_signature; + reg->type = VKD3DSPR_OUTPUT; +@@ -1530,10 +1688,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par id_idx = reg->idx_count - 1; write_mask = dst_param->write_mask; @@ -9631,8 +12360,11 @@ index 747238e2fee..6a74e2eb8de 100644 + vkd3d_unreachable(); e = &signature->elements[element_idx]; - dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask); -@@ -1653,7 +1787,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par +- dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask); + if (is_io_dcl) + { + /* Validated in the TPF reader. */ +@@ -1653,7 +1811,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par id_idx = reg->idx_count - 1; write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0); @@ -9642,7 +12374,7 @@ index 747238e2fee..6a74e2eb8de 100644 e = &signature->elements[element_idx]; if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) -@@ -1737,13 +1872,13 @@ static bool use_flat_interpolation(const struct vsir_program *program, +@@ -1737,13 +1896,13 @@ static bool use_flat_interpolation(const struct vsir_program *program, if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, @@ -9658,7 +12390,7 @@ index 747238e2fee..6a74e2eb8de 100644 return false; } -@@ -1751,12 +1886,12 @@ static bool use_flat_interpolation(const struct vsir_program *program, +@@ -1751,12 +1910,14 @@ static bool use_flat_interpolation(const struct vsir_program *program, } static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, @@ -9671,10 +12403,12 @@ index 747238e2fee..6a74e2eb8de 100644 - bool has_control_point_phase; - unsigned int i, j; + unsigned int i; ++ ++ VKD3D_ASSERT(!program->normalised_io); normaliser.phase = VKD3DSIH_INVALID; normaliser.shader_type = program->shader_version.type; -@@ -1765,7 +1900,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -1765,7 +1926,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.output_signature = &program->output_signature; normaliser.patch_constant_signature = &program->patch_constant_signature; @@ -9683,7 +12417,7 @@ index 747238e2fee..6a74e2eb8de 100644 { ins = &program->instructions.elements[i]; -@@ -1779,8 +1914,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -1779,8 +1940,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program vkd3d_shader_instruction_make_nop(ins); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: @@ -9692,7 +12426,7 @@ index 747238e2fee..6a74e2eb8de 100644 case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: normaliser.phase = ins->opcode; -@@ -1790,22 +1923,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -1790,22 +1949,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program } } @@ -9715,7 +12449,15 @@ index 747238e2fee..6a74e2eb8de 100644 if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) -@@ -1918,7 +2035,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par +@@ -1832,6 +1975,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + + program->instructions = normaliser.instructions; + program->use_vocp = normaliser.use_vocp; ++ program->normalised_io = true; + return VKD3D_OK; + } + +@@ -1918,7 +2062,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par param->reg.idx_count = 3; } @@ -9725,7 +12467,7 @@ index 747238e2fee..6a74e2eb8de 100644 { struct flat_constants_normaliser normaliser = {0}; unsigned int i, j; -@@ -1957,7 +2075,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ +@@ -1957,7 +2102,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ return VKD3D_OK; } @@ -9735,7 +12477,7 @@ index 747238e2fee..6a74e2eb8de 100644 { size_t i, depth = 0; bool dead = false; -@@ -2045,103 +2164,6 @@ static void remove_dead_code(struct vsir_program *program) +@@ -2045,103 +2191,6 @@ static void remove_dead_code(struct vsir_program *program) break; } } @@ -9839,7 +12581,7 @@ index 747238e2fee..6a74e2eb8de 100644 return VKD3D_OK; } -@@ -2789,11 +2811,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte +@@ -2789,11 +2838,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte } static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, @@ -9855,7 +12597,7 @@ index 747238e2fee..6a74e2eb8de 100644 if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) { vkd3d_free(program->instructions.elements); -@@ -2801,6 +2826,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi +@@ -2801,6 +2853,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi program->instructions.capacity = flattener.instruction_capacity; program->instructions.count = flattener.instruction_count; program->block_count = flattener.block_id; @@ -9863,7 +12605,7 @@ index 747238e2fee..6a74e2eb8de 100644 } else { -@@ -2860,13 +2886,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i +@@ -2860,13 +2913,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i return true; } @@ -9881,7 +12623,7 @@ index 747238e2fee..6a74e2eb8de 100644 if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) goto fail; -@@ -3050,7 +3079,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl +@@ -3050,7 +3106,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl vkd3d_free(block_info); } @@ -9891,7 +12633,7 @@ index 747238e2fee..6a74e2eb8de 100644 { size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; struct ssas_to_temps_block_info *info, *block_info = NULL; -@@ -3058,6 +3088,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ +@@ -3058,6 +3115,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ struct ssas_to_temps_alloc alloc = {0}; unsigned int current_label = 0; @@ -9900,7 +12642,7 @@ index 747238e2fee..6a74e2eb8de 100644 if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { ERR("Failed to allocate block info array.\n"); -@@ -5271,12 +5303,15 @@ out: +@@ -5271,12 +5330,15 @@ out: } static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, @@ -9917,7 +12659,7 @@ index 747238e2fee..6a74e2eb8de 100644 target.jump_target_temp_idx = program->temp_count; target.temp_count = program->temp_count + 1; -@@ -5324,6 +5359,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, +@@ -5324,6 +5386,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, program->instructions.capacity = target.ins_capacity; program->instructions.count = target.ins_count; program->temp_count = target.temp_count; @@ -9925,7 +12667,7 @@ index 747238e2fee..6a74e2eb8de 100644 return VKD3D_OK; -@@ -5451,11 +5487,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f +@@ -5451,11 +5514,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f } static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, @@ -9941,7 +12683,7 @@ index 747238e2fee..6a74e2eb8de 100644 for (i = 0; i < program->instructions.count;) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -@@ -5508,9 +5547,11 @@ static bool find_colour_signature_idx(const struct shader_signature *signature, +@@ -5508,9 +5574,11 @@ static bool find_colour_signature_idx(const struct shader_signature *signature, static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, @@ -9954,7 +12696,7 @@ index 747238e2fee..6a74e2eb8de 100644 size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins; -@@ -5565,6 +5606,11 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr +@@ -5565,6 +5633,11 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); break; @@ -9966,7 +12708,7 @@ index 747238e2fee..6a74e2eb8de 100644 default: FIXME("Unhandled parameter data type %#x.\n", ref->data_type); return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -5596,8 +5642,9 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr +@@ -5596,8 +5669,9 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr } static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, @@ -9977,7 +12719,7 @@ index 747238e2fee..6a74e2eb8de 100644 const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; static const struct vkd3d_shader_location no_loc; enum vkd3d_shader_comparison_func compare_func; -@@ -5620,13 +5667,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro +@@ -5620,13 +5694,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, @@ -9993,7 +12735,7 @@ index 747238e2fee..6a74e2eb8de 100644 return VKD3D_ERROR_INVALID_ARGUMENT; } compare_func = func->u.immediate_constant.u.u32; -@@ -5650,7 +5697,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro +@@ -5650,7 +5724,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro if (ins->opcode == VKD3DSIH_RET) { if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, @@ -10002,7 +12744,7 @@ index 747238e2fee..6a74e2eb8de 100644 return ret; i = new_pos; continue; -@@ -5677,78 +5724,268 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro +@@ -5677,456 +5751,1845 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro return VKD3D_OK; } @@ -10026,55 +12768,49 @@ index 747238e2fee..6a74e2eb8de 100644 - CF_TYPE_BLOCKS, - } cf_type; - bool inside_block; -- -- struct validation_context_temp_data -- { -- enum vsir_dimension dimension; -- size_t first_seen; -- } *temps; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int output_idx = 0; -- struct validation_context_ssa_data +- struct validation_context_temp_data - { - enum vsir_dimension dimension; +- size_t first_seen; +- } *temps; ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- struct validation_context_ssa_data ++ ins = &program->instructions.elements[pos]; ++ ++ for (unsigned int i = 0; i < 8; ++i) + { +- enum vsir_dimension dimension; - enum vkd3d_data_type data_type; - size_t first_seen; - uint32_t write_mask; - uint32_t read_mask; - size_t first_assigned; - } *ssas; -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; ++ if (!(mask & (1u << i))) ++ continue; - enum vkd3d_shader_opcode *blocks; - size_t depth; - size_t blocks_capacity; -}; -+ ins = &program->instructions.elements[pos]; - --static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, -- enum vkd3d_shader_error error, const char *format, ...) --{ -- struct vkd3d_string_buffer buf; -- va_list args; -+ for (unsigned int i = 0; i < 8; ++i) -+ { -+ if (!(mask & (1u << i))) -+ continue; - -- vkd3d_string_buffer_init(&buf); + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); + src_param_init_temp_float4(&ins->src[0], position_temp); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); + ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -- va_start(args, format); -- vkd3d_string_buffer_vprintf(&buf, format, args); -- va_end(args); +-static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, +- enum vkd3d_shader_error error, const char *format, ...) +-{ +- struct vkd3d_string_buffer buf; +- va_list args; + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + if (output_idx < 4) + ins->dst[0].reg.idx[0].offset = low_signature_idx; @@ -10084,21 +12820,13 @@ index 747238e2fee..6a74e2eb8de 100644 + ins->dst[0].write_mask = (1u << (output_idx % 4)); + ++output_idx; -- if (ctx->invalid_instruction_idx) -- { -- vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); -- ERR("VSIR validation error: %s\n", buf.buffer); -- } -- else -- { -- const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -- vkd3d_shader_error(ctx->message_context, &ins->location, error, -- "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -- ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); +- vkd3d_string_buffer_init(&buf); + ++ins; - } ++ } -- vkd3d_string_buffer_cleanup(&buf); +- va_start(args, format); +- vkd3d_string_buffer_vprintf(&buf, format, args); +- va_end(args); + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = position_signature_idx; @@ -10108,31 +12836,52 @@ index 747238e2fee..6a74e2eb8de 100644 + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -- if (!ctx->status) -- ctx->status = VKD3D_ERROR_INVALID_SHADER; +- if (ctx->invalid_instruction_idx) +- { +- vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); +- ERR("VSIR validation error: %s\n", buf.buffer); +- } +- else + *ret_pos = pos + vkd3d_popcount(mask) + 1; + return VKD3D_OK; - } - --static void vsir_validate_src_param(struct validation_context *ctx, -- const struct vkd3d_shader_src_param *src); -+static bool find_position_signature_idx(const struct shader_signature *signature, uint32_t *idx) ++} ++ ++static bool find_sysval_signature_idx(const struct shader_signature *signature, ++ enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx) +{ + for (unsigned int i = 0; i < signature->element_count; ++i) -+ { -+ if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_POSITION) + { +- const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; +- vkd3d_shader_error(ctx->message_context, &ins->location, error, +- "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); +- ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); ++ if (signature->elements[i].sysval_semantic == sysval) + { + *idx = i; + return true; + } -+ } -+ + } + +- vkd3d_string_buffer_cleanup(&buf); +- +- if (!ctx->status) +- ctx->status = VKD3D_ERROR_INVALID_SHADER; + return false; -+} -+ + } + +-static void vsir_validate_src_param(struct validation_context *ctx, +- const struct vkd3d_shader_src_param *src); +- +-static void vsir_validate_register(struct validation_context *ctx, +- const struct vkd3d_shader_register *reg) +static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program, + struct vsir_transformation_context *ctx) -+{ + { +- unsigned int i; +- +- if (reg->type >= VKD3DSPR_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.", +- reg->type); + struct shader_signature *signature = &program->output_signature; + unsigned int low_signature_idx = ~0u, high_signature_idx = ~0u; + const struct vkd3d_shader_parameter1 *mask_parameter = NULL; @@ -10143,23 +12892,39 @@ index 747238e2fee..6a74e2eb8de 100644 + unsigned int plane_count; + size_t new_pos; + int ret; -+ + +- if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.", +- reg->precision); + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) + return VKD3D_OK; -+ + +- if (reg->data_type >= VKD3D_DATA_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.", +- reg->data_type); + for (unsigned int i = 0; i < program->parameter_count; ++i) + { + const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -+ + +- if (reg->dimension >= VSIR_DIMENSION_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.", +- reg->dimension); + if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_MASK) + mask_parameter = parameter; + } -+ + +- if (reg->idx_count > ARRAY_SIZE(reg->idx)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.", +- reg->idx_count); + if (!mask_parameter) + return VKD3D_OK; -+ + +- for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) + if (mask_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { + { +- const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; +- if (reg->idx[i].rel_addr) +- vsir_validate_src_param(ctx, param); + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported clip plane mask parameter type %#x.", mask_parameter->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -10169,33 +12934,51 @@ index 747238e2fee..6a74e2eb8de 100644 + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid clip plane mask parameter data type %#x.", mask_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; -+ } + } + mask = mask_parameter->u.immediate_constant.u.u32; -+ + +- switch (reg->type) + if (!mask) + return VKD3D_OK; + + for (unsigned int i = 0; i < signature->element_count; ++i) -+ { + { +- case VKD3DSPR_TEMP: + if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_CLIP_DISTANCE) -+ { + { +- struct validation_context_temp_data *data; + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER, + "Clip planes cannot be used if the shader writes clip distance."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + } -+ -+ if (!find_position_signature_idx(signature, &position_signature_idx)) + +- if (reg->idx_count != 1) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a TEMP register.", +- reg->idx_count); +- break; +- } ++ if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx)) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, + "Shader does not write position."); + return VKD3D_ERROR_INVALID_SHADER; + } -+ + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); + /* Append the clip plane signature indices. */ -+ + +- if (reg->idx[0].offset >= ctx->program->temp_count) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", +- reg->idx[0].offset, ctx->program->temp_count); +- break; +- } + plane_count = vkd3d_popcount(mask); -+ + +- data = &ctx->temps[reg->idx[0].offset]; + if (!(new_elements = vkd3d_realloc(signature->elements, + (signature->element_count + 2) * sizeof(*signature->elements)))) + return VKD3D_ERROR_OUT_OF_MEMORY; @@ -10227,9 +13010,29 @@ index 747238e2fee..6a74e2eb8de 100644 + + /* We're going to be reading from the output position, so we need to go + * through the whole shader and convert it to a temp. */ -+ + +- if (reg->dimension == VSIR_DIMENSION_NONE) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a TEMP register."); +- break; +- } + position_temp = program->temp_count++; -+ + +- /* TEMP registers can be scalar or vec4, provided that +- * each individual register always appears with the same +- * dimension. */ +- if (data->dimension == VSIR_DIMENSION_NONE) +- { +- data->dimension = reg->dimension; +- data->first_seen = ctx->instruction_idx; +- } +- else if (data->dimension != reg->dimension) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a TEMP register: " +- "it has already been seen with dimension %#x at instruction %zu.", +- reg->dimension, data->dimension, data->first_seen); +- } +- break; + for (size_t i = 0; i < program->instructions.count; ++i) + { + ins = &program->instructions.elements[i]; @@ -10244,18 +13047,357 @@ index 747238e2fee..6a74e2eb8de 100644 + return ret; + i = new_pos; + continue; + } + +- case VKD3DSPR_SSA: ++ for (size_t j = 0; j < ins->dst_count; ++j) + { +- struct validation_context_ssa_data *data; ++ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + +- if (reg->idx_count != 1) ++ /* Note we run after I/O normalization. */ ++ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == position_signature_idx) + { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a SSA register.", +- reg->idx_count); +- break; ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = position_temp; + } + } ++ } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a SSA register."); ++ return VKD3D_OK; ++} + +- if (reg->idx[0].offset >= ctx->program->ssa_count) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "SSA register index %u exceeds the maximum count %u.", +- reg->idx[0].offset, ctx->program->ssa_count); +- break; +- } ++static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, ++ const struct vkd3d_shader_instruction *ret, size_t *ret_pos) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; + +- data = &ctx->ssas[reg->idx[0].offset]; ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- if (reg->dimension == VSIR_DIMENSION_NONE) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register."); +- break; +- } ++ ins = &program->instructions.elements[pos]; + +- /* SSA registers can be scalar or vec4, provided that each +- * individual register always appears with the same +- * dimension. */ +- if (data->dimension == VSIR_DIMENSION_NONE) +- { +- data->dimension = reg->dimension; +- data->data_type = reg->data_type; +- data->first_seen = ctx->instruction_idx; +- } +- else +- { +- if (data->dimension != reg->dimension) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a SSA register: " +- "it has already been seen with dimension %#x at instruction %zu.", +- reg->dimension, data->dimension, data->first_seen); +- +- if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a SSA register: " +- "it has already been seen with data type %#x at instruction %zu.", +- reg->data_type, data->data_type, data->first_seen); +- } +- break; +- } ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); + +- case VKD3DSPR_LABEL: +- if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", +- reg->precision); ++ *ret_pos = pos + 1; ++ return VKD3D_OK; ++} + +- if (reg->data_type != VKD3D_DATA_UNUSED) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", +- reg->data_type); ++static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ const struct vkd3d_shader_parameter1 *size_parameter = NULL; ++ static const struct vkd3d_shader_location no_loc; + +- if (reg->dimension != VSIR_DIMENSION_NONE) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.", +- reg->dimension); ++ if (program->has_point_size) ++ return VKD3D_OK; + +- if (reg->idx_count != 1) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.", +- reg->idx_count); +- break; +- } ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX ++ && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY ++ && program->shader_version.type != VKD3D_SHADER_TYPE_HULL ++ && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) ++ return VKD3D_OK; + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a LABEL register."); +- +- /* Index == 0 is invalid, but it is temporarily allowed +- * for intermediate stages. Once we support validation +- * dialects we can selectively check for that. */ +- if (reg->idx[0].offset > ctx->program->block_count) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "LABEL register index %u exceeds the maximum count %u.", +- reg->idx[0].offset, ctx->program->block_count); +- break; ++ for (unsigned int i = 0; i < program->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; + +- case VKD3DSPR_NULL: +- if (reg->idx_count != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.", +- reg->idx_count); +- break; ++ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE) ++ size_parameter = parameter; ++ } + +- case VKD3DSPR_IMMCONST: +- if (reg->idx_count != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.", +- reg->idx_count); +- break; ++ if (!size_parameter) ++ return VKD3D_OK; + +- case VKD3DSPR_IMMCONST64: +- if (reg->idx_count != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.", +- reg->idx_count); +- break; ++ if (size_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid point size parameter data type %#x.", size_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } + +- default: +- break; ++ program->has_point_size = true; ++ ++ /* Append a point size write before each ret. */ ++ for (size_t i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ if (ins->opcode == VKD3DSIH_RET) ++ { ++ size_t new_pos; ++ int ret; ++ ++ if ((ret = insert_point_size_before_ret(program, ins, &new_pos)) < 0) ++ return ret; ++ i = new_pos; ++ } + } ++ ++ return VKD3D_OK; + } + +-static void vsir_validate_dst_param(struct validation_context *ctx, +- const struct vkd3d_shader_dst_param *dst) ++static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) + { +- vsir_validate_register(ctx, &dst->reg); ++ const struct vkd3d_shader_parameter1 *min_parameter = NULL, *max_parameter = NULL; ++ static const struct vkd3d_shader_location no_loc; + +- if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", +- dst->write_mask); ++ if (!program->has_point_size) ++ return VKD3D_OK; + +- switch (dst->reg.dimension) +- { +- case VSIR_DIMENSION_SCALAR: +- if (dst->write_mask != VKD3DSP_WRITEMASK_0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.", +- dst->write_mask); +- break; ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX ++ && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY ++ && program->shader_version.type != VKD3D_SHADER_TYPE_HULL ++ && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) ++ return VKD3D_OK; + +- case VSIR_DIMENSION_VEC4: +- if (dst->write_mask == 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask."); +- break; ++ for (unsigned int i = 0; i < program->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; + +- default: +- if (dst->write_mask != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.", +- dst->reg.dimension, dst->write_mask); +- break; ++ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN) ++ min_parameter = parameter; ++ else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX) ++ max_parameter = parameter; + } + +- if (dst->modifiers & ~VKD3DSPDM_MASK) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", +- dst->modifiers); ++ if (!min_parameter && !max_parameter) ++ return VKD3D_OK; + +- switch (dst->shift) ++ if (min_parameter && min_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) + { +- case 0: +- case 1: +- case 2: +- case 3: +- case 13: +- case 14: +- case 15: +- break; ++ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid minimum point size parameter data type %#x.", min_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } + +- default: +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", +- dst->shift); ++ if (max_parameter && max_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid maximum point size parameter data type %#x.", max_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; + } + +- switch (dst->reg.type) ++ /* Replace writes to the point size by inserting a clamp before each write. */ ++ ++ for (size_t i = 0; i < program->instructions.count; ++i) + { +- case VKD3DSPR_SSA: +- if (dst->reg.idx[0].offset < ctx->program->ssa_count) +- { +- struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ const struct vkd3d_shader_location *loc; ++ unsigned int ssa_value; ++ bool clamp = false; + +- if (data->write_mask == 0) +- { +- data->write_mask = dst->write_mask; +- data->first_assigned = ctx->instruction_idx; +- } +- else +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, +- "SSA register is already assigned at instruction %zu.", +- data->first_assigned); +- } ++ if (vsir_instruction_is_dcl(ins)) ++ continue; + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == position_signature_idx) ++ if (dst->reg.type == VKD3DSPR_RASTOUT) + { -+ dst->reg.type = VKD3DSPR_TEMP; -+ dst->reg.idx[0].offset = position_temp; ++ dst_param_init_ssa_float(dst, program->ssa_count); ++ ssa_value = program->ssa_count++; ++ clamp = true; + } +- break; ++ } + +- case VKD3DSPR_IMMCONST: +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid IMMCONST register used as destination parameter."); +- break; ++ if (!clamp) ++ continue; + +- case VKD3DSPR_IMMCONST64: +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid IMMCONST64 register used as destination parameter."); +- break; ++ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; + +- default: +- break; +- } +-} ++ loc = &program->instructions.elements[i].location; ++ ins = &program->instructions.elements[i + 1]; ++ ++ if (min_parameter) ++ { ++ vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MAX, 1, 2); ++ src_param_init_ssa_float(&ins->src[0], ssa_value); ++ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, VKD3D_DATA_FLOAT); ++ if (max_parameter) ++ { ++ dst_param_init_ssa_float(&ins->dst[0], program->ssa_count); ++ ssa_value = program->ssa_count++; + } ++ else ++ { ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; ++ } ++ ++ins; ++ ++i; ++ } ++ ++ if (max_parameter) ++ { ++ vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MIN, 1, 2); ++ src_param_init_ssa_float(&ins->src[0], ssa_value); ++ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, VKD3D_DATA_FLOAT); ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; ++ ++ ++i; + } + } + @@ -10326,233 +13468,990 @@ index 747238e2fee..6a74e2eb8de 100644 + ctx->status = VKD3D_ERROR_INVALID_SHADER; +} + ++static void vsir_validate_register_without_indices(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->idx_count != 0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++} ++ ++static void vsir_validate_io_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ const struct shader_signature *signature; ++ bool has_control_point = false; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_INPUT: ++ signature = &ctx->program->input_signature; ++ ++ switch (ctx->program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_GEOMETRY: ++ case VKD3D_SHADER_TYPE_HULL: ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ has_control_point = true; ++ break; ++ ++ default: ++ break; ++ } ++ break; ++ ++ case VKD3DSPR_OUTPUT: ++ switch (ctx->program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ { ++ signature = &ctx->program->output_signature; ++ has_control_point = ctx->program->normalised_hull_cp_io; ++ } ++ else if (ctx->program->normalised_io) ++ { ++ signature = &ctx->program->output_signature; ++ has_control_point = true; ++ } ++ else ++ { ++ signature = &ctx->program->patch_constant_signature; ++ } ++ break; ++ ++ default: ++ signature = &ctx->program->output_signature; ++ break; ++ } ++ break; ++ ++ case VKD3DSPR_INCONTROLPOINT: ++ signature = &ctx->program->input_signature; ++ has_control_point = true; ++ break; ++ ++ case VKD3DSPR_OUTCONTROLPOINT: ++ signature = &ctx->program->output_signature; ++ has_control_point = true; ++ break; ++ ++ case VKD3DSPR_PATCHCONST: ++ signature = &ctx->program->patch_constant_signature; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!ctx->program->normalised_io) ++ { ++ /* Indices are [register] or [control point, register]. Both are ++ * allowed to have a relative address. */ ++ unsigned int expected_idx_count = 1 + !!has_control_point; ++ ++ if (reg->idx_count != expected_idx_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++ return; ++ } ++ } ++ else ++ { ++ struct signature_element *element; ++ unsigned int expected_idx_count; ++ unsigned int signature_idx; ++ bool is_array = false; ++ ++ /* If the signature element is not an array, indices are ++ * [signature] or [control point, signature]. If the signature ++ * element is an array, indices are [array, signature] or ++ * [control point, array, signature]. In any case `signature' is ++ * not allowed to have a relative address, while the others are. ++ */ ++ if (reg->idx_count < 1) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++ return; ++ } ++ ++ if (reg->idx[reg->idx_count - 1].rel_addr) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for the signature index of a register of type %#x.", ++ reg->type); ++ return; ++ } ++ ++ signature_idx = reg->idx[reg->idx_count - 1].offset; ++ ++ if (signature_idx >= signature->element_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Signature index %u exceeds the signature size %u in a register of type %#x.", ++ signature_idx, signature->element_count, reg->type); ++ return; ++ } ++ ++ element = &signature->elements[signature_idx]; ++ if (element->register_count > 1 || vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) ++ is_array = true; ++ ++ expected_idx_count = 1 + !!has_control_point + !!is_array; ++ ++ if (reg->idx_count != expected_idx_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++ return; ++ } ++ } ++} ++ ++static void vsir_validate_temp_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ struct validation_context_temp_data *data; ++ ++ if (reg->idx_count != 1) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a TEMP register.", ++ reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for a TEMP register."); ++ ++ if (reg->idx[0].offset >= ctx->program->temp_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "TEMP register index %u exceeds the maximum count %u.", ++ reg->idx[0].offset, ctx->program->temp_count); ++ return; ++ } ++ ++ data = &ctx->temps[reg->idx[0].offset]; ++ ++ if (reg->dimension == VSIR_DIMENSION_NONE) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension NONE for a TEMP register."); ++ return; ++ } ++ ++ /* TEMP registers can be scalar or vec4, provided that ++ * each individual register always appears with the same ++ * dimension. */ ++ if (data->dimension == VSIR_DIMENSION_NONE) ++ { ++ data->dimension = reg->dimension; ++ data->first_seen = ctx->instruction_idx; ++ } ++ else if (data->dimension != reg->dimension) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension %#x for a TEMP register: " ++ "it has already been seen with dimension %#x at instruction %zu.", ++ reg->dimension, data->dimension, data->first_seen); ++ } ++} ++ ++static void vsir_validate_rastout_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->idx_count != 1) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a RASTOUT register.", ++ reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for a RASTOUT register."); ++ ++ if (reg->idx[0].offset >= 3) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid offset for a RASTOUT register."); ++} ++ ++static void vsir_validate_misctype_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->idx_count != 1) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a MISCTYPE register.", ++ reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for a MISCTYPE register."); ++ ++ if (reg->idx[0].offset >= 2) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid offset for a MISCTYPE register."); ++} ++ ++static void vsir_validate_label_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, ++ "Invalid precision %#x for a LABEL register.", reg->precision); ++ ++ if (reg->data_type != VKD3D_DATA_UNUSED) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for a LABEL register.", reg->data_type); ++ ++ if (reg->dimension != VSIR_DIMENSION_NONE) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension %#x for a LABEL register.", reg->dimension); ++ ++ if (reg->idx_count != 1) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a LABEL register.", reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for a LABEL register."); ++ ++ /* Index == 0 is invalid, but it is temporarily allowed ++ * for intermediate stages. Once we support validation ++ * dialects we can selectively check for that. */ ++ if (reg->idx[0].offset > ctx->program->block_count) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "LABEL register index %u exceeds the maximum count %u.", ++ reg->idx[0].offset, ctx->program->block_count); ++} ++ ++static void vsir_validate_sampler_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, ++ "Invalid precision %#x for a SAMPLER register.", reg->precision); ++ ++ if (reg->data_type != VKD3D_DATA_UNUSED) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for a SAMPLER register.", reg->data_type); ++ ++ /* VEC4 is allowed in gather operations. */ ++ if (reg->dimension == VSIR_DIMENSION_SCALAR) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension SCALAR for a SAMPLER register."); ++ ++ if (reg->idx_count != 2) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a SAMPLER register.", reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for the descriptor index of a SAMPLER register."); ++} ++ ++static void vsir_validate_resource_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, ++ "Invalid precision %#x for a RESOURCE register.", reg->precision); ++ ++ if (reg->data_type != VKD3D_DATA_UNUSED) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for a RESOURCE register.", reg->data_type); ++ ++ if (reg->dimension != VSIR_DIMENSION_VEC4) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension %#x for a RESOURCE register.", reg->dimension); ++ ++ if (reg->idx_count != 2) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a RESOURCE register.", reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for the descriptor index of a RESOURCE register."); ++} ++ ++static void vsir_validate_uav_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, ++ "Invalid precision %#x for a UAV register.", ++ reg->precision); ++ ++ if (reg->data_type != VKD3D_DATA_UNUSED) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for a UAV register.", ++ reg->data_type); ++ ++ /* NONE is allowed in counter operations. */ ++ if (reg->dimension == VSIR_DIMENSION_SCALAR) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension %#x for a UAV register.", ++ reg->dimension); ++ ++ if (reg->idx_count != 2) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a UAV register.", ++ reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for the descriptor index of a UAV register."); ++} ++ ++static void vsir_validate_ssa_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ struct validation_context_ssa_data *data; ++ ++ if (reg->idx_count != 1) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a SSA register.", ++ reg->idx_count); ++ return; ++ } ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for a SSA register."); ++ ++ if (reg->idx[0].offset >= ctx->program->ssa_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "SSA register index %u exceeds the maximum count %u.", ++ reg->idx[0].offset, ctx->program->ssa_count); ++ return; ++ } ++ ++ data = &ctx->ssas[reg->idx[0].offset]; ++ ++ if (reg->dimension == VSIR_DIMENSION_NONE) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension NONE for a SSA register."); ++ return; ++ } ++ ++ /* SSA registers can be scalar or vec4, provided that each ++ * individual register always appears with the same ++ * dimension. */ ++ if (data->dimension == VSIR_DIMENSION_NONE) ++ { ++ data->dimension = reg->dimension; ++ data->data_type = reg->data_type; ++ data->first_seen = ctx->instruction_idx; ++ } ++ else ++ { ++ if (data->dimension != reg->dimension) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, ++ "Invalid dimension %#x for a SSA register: " ++ "it has already been seen with dimension %#x at instruction %zu.", ++ reg->dimension, data->dimension, data->first_seen); ++ ++ if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid data type %#x for a SSA register: " ++ "it has already been seen with data type %#x at instruction %zu.", ++ reg->data_type, data->data_type, data->first_seen); ++ } ++} ++ +static void vsir_validate_src_param(struct validation_context *ctx, + const struct vkd3d_shader_src_param *src); - - static void vsir_validate_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) -@@ -5934,6 +6171,151 @@ static void vsir_validate_register(struct validation_context *ctx, - reg->idx_count); - break; - -+ case VKD3DSPR_SAMPLER: -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a SAMPLER register.", -+ reg->precision); + -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a SAMPLER register.", -+ reg->data_type); ++static void vsir_validate_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ unsigned int i; + -+ /* VEC4 is allowed in gather operations. */ -+ if (reg->dimension == VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension SCALAR for a SAMPLER register."); ++ if (reg->type >= VKD3DSPR_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.", ++ reg->type); + -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a SAMPLER register.", -+ reg->idx_count); -+ break; -+ } ++ if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.", ++ reg->precision); + -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a SAMPLER register."); ++ if (reg->data_type >= VKD3D_DATA_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.", ++ reg->data_type); ++ ++ if (reg->dimension >= VSIR_DIMENSION_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.", ++ reg->dimension); ++ ++ if (reg->idx_count > ARRAY_SIZE(reg->idx)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.", ++ reg->idx_count); ++ ++ for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) ++ { ++ const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; ++ if (reg->idx[i].rel_addr) ++ vsir_validate_src_param(ctx, param); ++ } ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_TEMP: ++ vsir_validate_temp_register(ctx, reg); + break; + -+ case VKD3DSPR_RESOURCE: -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a RESOURCE register.", -+ reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a RESOURCE register.", -+ reg->data_type); -+ -+ if (reg->dimension != VSIR_DIMENSION_VEC4) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a RESOURCE register.", -+ reg->dimension); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a RESOURCE register.", -+ reg->idx_count); -+ break; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a RESOURCE register."); -+ break; -+ -+ case VKD3DSPR_UAV: -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a UAV register.", -+ reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a UAV register.", -+ reg->data_type); -+ -+ /* NONE is allowed in counter operations. */ -+ if (reg->dimension == VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a UAV register.", -+ reg->dimension); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a UAV register.", -+ reg->idx_count); -+ break; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a UAV register."); -+ break; -+ -+ case VKD3DSPR_DEPTHOUT: -+ if (reg->idx_count != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a DEPTHOUT register.", -+ reg->idx_count); -+ break; -+ -+ case VKD3DSPR_DEPTHOUTGE: -+ if (reg->idx_count != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a DEPTHOUTGE register.", -+ reg->idx_count); -+ break; -+ -+ case VKD3DSPR_DEPTHOUTLE: -+ if (reg->idx_count != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a DEPTHOUTLE register.", -+ reg->idx_count); ++ case VKD3DSPR_INPUT: ++ vsir_validate_io_register(ctx, reg); + break; + + case VKD3DSPR_RASTOUT: -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a RASTOUT register.", -+ reg->idx_count); -+ break; -+ } ++ vsir_validate_rastout_register(ctx, reg); ++ break; + -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a RASTOUT register."); ++ case VKD3DSPR_OUTPUT: ++ vsir_validate_io_register(ctx, reg); ++ break; + -+ if (reg->idx[0].offset >= 3) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid offset for a RASTOUT register."); ++ case VKD3DSPR_DEPTHOUT: ++ vsir_validate_register_without_indices(ctx, reg); + break; + + case VKD3DSPR_MISCTYPE: -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a MISCTYPE register.", -+ reg->idx_count); -+ break; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a MISCTYPE register."); -+ -+ if (reg->idx[0].offset >= 2) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid offset for a MISCTYPE register."); ++ vsir_validate_misctype_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_LABEL: ++ vsir_validate_label_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_IMMCONST: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_IMMCONST64: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_NULL: ++ vsir_validate_register_without_indices(ctx, reg); + break; + - default: - break; - } -@@ -6019,6 +6401,16 @@ static void vsir_validate_dst_param(struct validation_context *ctx, - "Invalid IMMCONST64 register used as destination parameter."); - break; - + case VKD3DSPR_SAMPLER: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid SAMPLER register used as destination parameter."); ++ vsir_validate_sampler_register(ctx, reg); + break; + + case VKD3DSPR_RESOURCE: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid RESOURCE register used as destination parameter."); ++ vsir_validate_resource_register(ctx, reg); + break; + - default: - break; - } -@@ -6054,6 +6446,11 @@ static void vsir_validate_src_param(struct validation_context *ctx, - } - break; ++ case VKD3DSPR_UAV: ++ vsir_validate_uav_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_INCONTROLPOINT: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_OUTCONTROLPOINT: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_PATCHCONST: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_DEPTHOUTGE: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_DEPTHOUTLE: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_SSA: ++ vsir_validate_ssa_register(ctx, reg); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static void vsir_validate_dst_param(struct validation_context *ctx, ++ const struct vkd3d_shader_dst_param *dst) ++{ ++ vsir_validate_register(ctx, &dst->reg); ++ ++ if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.", ++ dst->write_mask); ++ ++ switch (dst->reg.dimension) ++ { ++ case VSIR_DIMENSION_SCALAR: ++ if (dst->write_mask != VKD3DSP_WRITEMASK_0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.", ++ dst->write_mask); ++ break; ++ ++ case VSIR_DIMENSION_VEC4: ++ if (dst->write_mask == 0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask."); ++ break; ++ ++ default: ++ if (dst->write_mask != 0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.", ++ dst->reg.dimension, dst->write_mask); ++ break; ++ } ++ ++ if (dst->modifiers & ~VKD3DSPDM_MASK) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.", ++ dst->modifiers); ++ ++ switch (dst->shift) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 3: ++ case 13: ++ case 14: ++ case 15: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", ++ dst->shift); ++ } ++ ++ switch (dst->reg.type) ++ { ++ case VKD3DSPR_SSA: ++ if (dst->reg.idx[0].offset < ctx->program->ssa_count) ++ { ++ struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; ++ ++ if (data->write_mask == 0) ++ { ++ data->write_mask = dst->write_mask; ++ data->first_assigned = ctx->instruction_idx; ++ } ++ else ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, ++ "SSA register is already assigned at instruction %zu.", ++ data->first_assigned); ++ } ++ } ++ break; ++ ++ case VKD3DSPR_IMMCONST: ++ case VKD3DSPR_IMMCONST64: ++ case VKD3DSPR_SAMPLER: ++ case VKD3DSPR_RESOURCE: ++ case VKD3DSPR_INPUT: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid %#x register used as destination parameter.", dst->reg.type); ++ break; ++ ++ case VKD3DSPR_PATCHCONST: ++ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "PATCHCONST register used as destination parameters are only allowed in Hull Shaders."); ++ break; ++ ++ default: ++ break; ++ } ++} + static void vsir_validate_src_param(struct validation_context *ctx, + const struct vkd3d_shader_src_param *src) + { +- vsir_validate_register(ctx, &src->reg); ++ vsir_validate_register(ctx, &src->reg); ++ ++ if (src->swizzle & ~0x03030303u) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", ++ src->swizzle); ++ ++ if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", ++ src->reg.dimension, src->swizzle); ++ ++ if (src->modifiers >= VKD3DSPSM_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", ++ src->modifiers); ++ ++ switch (src->reg.type) ++ { ++ case VKD3DSPR_SSA: ++ if (src->reg.idx[0].offset < ctx->program->ssa_count) ++ { ++ struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; ++ unsigned int i; ++ ++ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) ++ data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); ++ } ++ break; ++ + case VKD3DSPR_NULL: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid NULL register used as source parameter."); + break; + - default: - break; - } -@@ -6105,13 +6502,13 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, - return true; - } ++ case VKD3DSPR_OUTPUT: ++ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL ++ || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid OUTPUT register used as source parameter."); ++ break; ++ ++ case VKD3DSPR_PATCHCONST: ++ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN ++ && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders."); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static void vsir_validate_dst_count(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction, unsigned int count) ++{ ++ if (instruction->dst_count != count) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, ++ "Invalid destination count %u for an instruction of type %#x, expected %u.", ++ instruction->dst_count, instruction->opcode, count); ++} --static const char *name_from_cf_type(enum cf_type type) +- if (src->swizzle & ~0x03030303u) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", +- src->swizzle); ++static void vsir_validate_src_count(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction, unsigned int count) ++{ ++ if (instruction->src_count != count) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, ++ "Invalid source count %u for an instruction of type %#x, expected %u.", ++ instruction->src_count, instruction->opcode, count); ++} + +- if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", +- src->reg.dimension, src->swizzle); ++static bool vsir_validate_src_min_count(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction, unsigned int count) ++{ ++ if (instruction->src_count < count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, ++ "Invalid source count %u for an instruction of type %#x, expected at least %u.", ++ instruction->src_count, instruction->opcode, count); ++ return false; ++ } + +- if (src->modifiers >= VKD3DSPSM_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", +- src->modifiers); ++ return true; ++} ++ ++static bool vsir_validate_src_max_count(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction, unsigned int count) ++{ ++ if (instruction->src_count > count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, ++ "Invalid source count %u for an instruction of type %#x, expected at most %u.", ++ instruction->src_count, instruction->opcode, count); ++ return false; ++ } ++ ++ return true; ++} ++ ++enum vsir_signature_type ++{ ++ SIGNATURE_TYPE_INPUT, ++ SIGNATURE_TYPE_OUTPUT, ++ SIGNATURE_TYPE_PATCH_CONSTANT, ++}; ++ ++static const char * const signature_type_names[] = ++{ ++ [SIGNATURE_TYPE_INPUT] = "input", ++ [SIGNATURE_TYPE_OUTPUT] = "output", ++ [SIGNATURE_TYPE_PATCH_CONSTANT] = "patch constant", ++}; ++ ++#define PS_BIT (1u << VKD3D_SHADER_TYPE_PIXEL) ++#define VS_BIT (1u << VKD3D_SHADER_TYPE_VERTEX) ++#define GS_BIT (1u << VKD3D_SHADER_TYPE_GEOMETRY) ++#define HS_BIT (1u << VKD3D_SHADER_TYPE_HULL) ++#define DS_BIT (1u << VKD3D_SHADER_TYPE_DOMAIN) ++#define CS_BIT (1u << VKD3D_SHADER_TYPE_COMPUTE) ++ ++static const struct sysval_validation_data_element ++{ ++ unsigned int input; ++ unsigned int output; ++ unsigned int patch_constant; ++ enum vkd3d_shader_component_type data_type; ++ unsigned int component_count; ++} ++sysval_validation_data[] = ++{ ++ [VKD3D_SHADER_SV_POSITION] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, ++ VKD3D_SHADER_COMPONENT_FLOAT, 4}, ++ [VKD3D_SHADER_SV_CLIP_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, ++ VKD3D_SHADER_COMPONENT_FLOAT, 4}, ++ [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, ++ VKD3D_SHADER_COMPONENT_FLOAT, 4}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++}; ++ ++static void vsir_validate_signature_element(struct validation_context *ctx, ++ const struct shader_signature *signature, enum vsir_signature_type signature_type, ++ unsigned int idx) ++{ ++ const char *signature_type_name = signature_type_names[signature_type]; ++ const struct signature_element *element = &signature->elements[idx]; ++ bool integer_type = false; ++ ++ if (element->register_count == 0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); ++ ++ if (element->mask == 0 || (element->mask & ~0xf)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); ++ ++ if (!vkd3d_bitmask_is_contiguous(element->mask)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Non-contiguous mask %#x.", ++ idx, signature_type_name, element->mask); ++ ++ /* Here we'd likely want to validate that the usage mask is a subset of the ++ * signature mask. Unfortunately the D3DBC parser sometimes violates this. ++ * For example I've seen a shader like this: ++ * ps_3_0 ++ * [...] ++ * dcl_texcoord0 v0 ++ * [...] ++ * texld r2.xyzw, v0.xyzw, s1.xyzw ++ * [...] ++ * ++ * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to ++ * compute the signature mask, but the texld instruction apparently uses all ++ * the components. Of course the last two components are ignored, but ++ * formally they seem to be used. So we end up with a signature element with ++ * mask .xy and usage mask .xyzw. ++ * ++ * The correct fix would probably be to make the D3DBC parser aware of which ++ * components are really used for each instruction, but that would take some ++ * time. */ ++ if (element->used_mask & ~0xf) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid usage mask %#x.", ++ idx, signature_type_name, element->used_mask); ++ ++ switch (element->sysval_semantic) ++ { ++ case VKD3D_SHADER_SV_NONE: ++ case VKD3D_SHADER_SV_POSITION: ++ case VKD3D_SHADER_SV_CLIP_DISTANCE: ++ case VKD3D_SHADER_SV_CULL_DISTANCE: ++ case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: ++ case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: ++ case VKD3D_SHADER_SV_VERTEX_ID: ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: ++ case VKD3D_SHADER_SV_INSTANCE_ID: ++ case VKD3D_SHADER_SV_IS_FRONT_FACE: ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: ++ case VKD3D_SHADER_SV_TARGET: ++ case VKD3D_SHADER_SV_DEPTH: ++ case VKD3D_SHADER_SV_COVERAGE: ++ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: ++ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: ++ case VKD3D_SHADER_SV_STENCIL_REF: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid system value semantic %#x.", ++ idx, signature_type_name, element->sysval_semantic); ++ break; ++ } ++ ++ if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) ++ { ++ const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; ++ ++ if (data->input || data->output || data->patch_constant) ++ { ++ unsigned int mask; ++ ++ switch (signature_type) ++ { ++ case SIGNATURE_TYPE_INPUT: ++ mask = data->input; ++ break; ++ ++ case SIGNATURE_TYPE_OUTPUT: ++ mask = data->output; ++ break; ++ ++ case SIGNATURE_TYPE_PATCH_CONSTANT: ++ mask = data->patch_constant; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!(mask & (1u << ctx->program->shader_version.type))) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid system value semantic %#x.", ++ idx, signature_type_name, element->sysval_semantic); ++ } ++ ++ if (data->component_count != 0) ++ { ++ if (element->component_type != data->data_type) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid data type %#x for system value semantic %#x.", ++ idx, signature_type_name, element->component_type, element->sysval_semantic); ++ ++ if (vsir_write_mask_component_count(element->mask) > data->component_count) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid mask %#x for system value semantic %#x.", ++ idx, signature_type_name, element->mask, element->sysval_semantic); ++ } ++ } ++ ++ switch (element->component_type) ++ { ++ case VKD3D_SHADER_COMPONENT_INT: ++ case VKD3D_SHADER_COMPONENT_UINT: ++ integer_type = true; ++ break; ++ ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid component type %#x.", ++ idx, signature_type_name, element->component_type); ++ break; ++ } ++ ++ if (element->min_precision >= VKD3D_SHADER_MINIMUM_PRECISION_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid minimum precision %#x.", ++ idx, signature_type_name, element->min_precision); ++ ++ if (element->interpolation_mode >= VKD3DSIM_COUNT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid interpolation mode %#x.", ++ idx, signature_type_name, element->interpolation_mode); ++ ++ if (integer_type && element->interpolation_mode != VKD3DSIM_NONE ++ && element->interpolation_mode != VKD3DSIM_CONSTANT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", ++ idx, signature_type_name, element->interpolation_mode); ++} ++ ++static const unsigned int allowed_signature_phases[] = ++{ ++ [SIGNATURE_TYPE_INPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, ++ [SIGNATURE_TYPE_OUTPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, ++ [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, ++}; ++ ++static void vsir_validate_signature(struct validation_context *ctx, ++ const struct shader_signature *signature, enum vsir_signature_type signature_type) ++{ ++ unsigned int i; ++ ++ if (signature->element_count != 0 && !(allowed_signature_phases[signature_type] ++ & (1u << ctx->program->shader_version.type))) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Unexpected %s signature.", signature_type_names[signature_type]); ++ ++ for (i = 0; i < signature->element_count; ++i) ++ vsir_validate_signature_element(ctx, signature, signature_type, i); ++} ++ +static const char *name_from_cf_type(enum vsir_control_flow_type type) - { - switch (type) - { -- case CF_TYPE_STRUCTURED: ++{ ++ switch (type) ++ { + case VSIR_CF_STRUCTURED: - return "structured"; -- case CF_TYPE_BLOCKS: ++ return "structured"; + case VSIR_CF_BLOCKS: - return "block-based"; - default: - vkd3d_unreachable(); -@@ -6119,437 +6516,510 @@ static const char *name_from_cf_type(enum cf_type type) - } - - static void vsir_validate_cf_type(struct validation_context *ctx, -- const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) ++ return "block-based"; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void vsir_validate_cf_type(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, enum vsir_control_flow_type expected_type) - { -- VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); -- VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); -- if (ctx->cf_type != expected_type) ++{ + if (ctx->program->cf_type != expected_type) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", -- instruction->opcode, name_from_cf_type(ctx->cf_type)); ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", + instruction->opcode, name_from_cf_type(ctx->program->cf_type)); - } - --static void vsir_validate_instruction(struct validation_context *ctx) ++} ++ +static void vsir_validator_push_block(struct validation_context *ctx, enum vkd3d_shader_opcode opcode) - { -- const struct vkd3d_shader_version *version = &ctx->program->shader_version; -- const struct vkd3d_shader_instruction *instruction; -- size_t i; -- -- instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; ++{ + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + { + ctx->status = VKD3D_ERROR_OUT_OF_MEMORY; @@ -10560,9 +14459,7 @@ index 747238e2fee..6a74e2eb8de 100644 + } + ctx->blocks[ctx->depth++] = opcode; +} - -- for (i = 0; i < instruction->dst_count; ++i) -- vsir_validate_dst_param(ctx, &instruction->dst[i]); ++ +static void vsir_validate_hull_shader_phase(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10577,14 +14474,11 @@ index 747238e2fee..6a74e2eb8de 100644 + ctx->phase = instruction->opcode; + ctx->dcl_temps_found = false; +} - -- for (i = 0; i < instruction->src_count; ++i) -- vsir_validate_src_param(ctx, &instruction->src[i]); ++ +static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + size_t i; - -- if (instruction->opcode >= VKD3DSIH_INVALID) ++ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); + vsir_validate_dst_count(ctx, instruction, 0); + @@ -10592,15 +14486,11 @@ index 747238e2fee..6a74e2eb8de 100644 + return; + + if (vsir_register_is_label(&instruction->src[0].reg)) - { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", -- instruction->opcode); -- } ++ { + /* Unconditional branch: parameters are jump label, + * optional merge label, optional continue label. */ + vsir_validate_src_max_count(ctx, instruction, 3); - -- switch (instruction->opcode) ++ + for (i = 0; i < instruction->src_count; ++i) + { + if (!vsir_register_is_label(&instruction->src[i].reg)) @@ -10610,37 +14500,13 @@ index 747238e2fee..6a74e2eb8de 100644 + } + } + else - { -- case VKD3DSIH_HS_DECLS: -- case VKD3DSIH_HS_CONTROL_POINT_PHASE: -- case VKD3DSIH_HS_FORK_PHASE: -- case VKD3DSIH_HS_JOIN_PHASE: -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (version->type != VKD3D_SHADER_TYPE_HULL) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -- "Phase instruction %#x is only valid in a hull shader.", -- instruction->opcode); -- if (ctx->depth != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -- "Phase instruction %#x must appear to top level.", -- instruction->opcode); -- ctx->phase = instruction->opcode; -- ctx->dcl_temps_found = false; -- return; ++ { + /* Conditional branch: parameters are condition, true + * jump label, false jump label, optional merge label, + * optional continue label. */ + vsir_validate_src_min_count(ctx, instruction, 3); + vsir_validate_src_max_count(ctx, instruction, 5); - -- case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: -- /* Exclude non-finite values. */ -- if (!(instruction->declaration.max_tessellation_factor >= 1.0f -- && instruction->declaration.max_tessellation_factor <= 64.0f)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", -- instruction->declaration.max_tessellation_factor); -- return; ++ + for (i = 1; i < instruction->src_count; ++i) + { + if (!vsir_register_is_label(&instruction->src[i].reg)) @@ -10649,21 +14515,10 @@ index 747238e2fee..6a74e2eb8de 100644 + instruction->src[i].reg.type); + } + } - -- case VKD3DSIH_DCL_INPUT_PRIMITIVE: -- if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -- || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", -- instruction->declaration.primitive_type.type); -- return; ++ + ctx->inside_block = false; +} - -- case VKD3DSIH_DCL_VERTICES_OUT: -- if (instruction->declaration.count > 1024) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", -- instruction->declaration.count); -- return; ++ +static void vsir_validate_dcl_gs_instances(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10671,13 +14526,7 @@ index 747238e2fee..6a74e2eb8de 100644 + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", + instruction->declaration.count); +} - -- case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: -- if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED -- || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", -- instruction->declaration.primitive_type.type); -- return; ++ +static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10688,12 +14537,7 @@ index 747238e2fee..6a74e2eb8de 100644 + "Max tessellation factor %f is invalid.", + instruction->declaration.max_tessellation_factor); +} - -- case VKD3DSIH_DCL_GS_INSTANCES: -- if (!instruction->declaration.count || instruction->declaration.count > 32) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", -- instruction->declaration.count); -- return; ++ +static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10702,12 +14546,7 @@ index 747238e2fee..6a74e2eb8de 100644 + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", + instruction->declaration.primitive_type.type); +} - -- case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: -- if (!instruction->declaration.count || instruction->declaration.count > 32) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", -- instruction->declaration.count); -- return; ++ +static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10716,13 +14555,7 @@ index 747238e2fee..6a74e2eb8de 100644 + "Output control point count %u is invalid.", + instruction->declaration.count); +} - -- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -- if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID -- || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -- "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); -- return; ++ +static void vsir_validate_dcl_output_topology(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10731,13 +14564,7 @@ index 747238e2fee..6a74e2eb8de 100644 + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", + instruction->declaration.primitive_type.type); +} - -- case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -- if (!instruction->declaration.tessellator_output_primitive -- || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -- "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); -- return; ++ +static void vsir_validate_dcl_temps(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10750,13 +14577,7 @@ index 747238e2fee..6a74e2eb8de 100644 + instruction->declaration.count, ctx->program->temp_count); + ctx->dcl_temps_found = true; +} - -- case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -- if (!instruction->declaration.tessellator_partitioning -- || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -- "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); -- return; ++ +static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10765,10 +14586,7 @@ index 747238e2fee..6a74e2eb8de 100644 + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); +} - -- default: -- break; -- } ++ +static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10779,13 +14597,7 @@ index 747238e2fee..6a74e2eb8de 100644 + "Tessellator output primitive %#x is invalid.", + instruction->declaration.tessellator_output_primitive); +} - -- /* Only DCL instructions may occur outside hull shader phases. */ -- if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL -- && ctx->phase == VKD3DSIH_INVALID) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -- "Instruction %#x appear before any phase instruction in a hull shader.", -- instruction->opcode); ++ +static void vsir_validate_dcl_tessellator_partitioning(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10796,22 +14608,7 @@ index 747238e2fee..6a74e2eb8de 100644 + "Tessellator partitioning %#x is invalid.", + instruction->declaration.tessellator_partitioning); +} - -- /* We support two different control flow types in shaders: -- * block-based, like DXIL and SPIR-V, and structured, like D3DBC -- * and TPF. The shader is detected as block-based when its first -- * instruction, except for DCL_* and phases, is a LABEL. Currently -- * we mandate that each shader is either purely block-based or -- * purely structured. In principle we could allow structured -- * constructs in a block, provided they are confined in a single -- * block, but need for that hasn't arisen yet, so we don't. */ -- if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) -- { -- if (instruction->opcode == VKD3DSIH_LABEL) -- ctx->cf_type = CF_TYPE_BLOCKS; -- else -- ctx->cf_type = CF_TYPE_STRUCTURED; -- } ++ +static void vsir_validate_dcl_vertices_out(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) +{ @@ -10922,41 +14719,29 @@ index 747238e2fee..6a74e2eb8de 100644 + instruction->src_count); + incoming_count = instruction->src_count / 2; -- if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) +- switch (src->reg.type) + for (i = 0; i < incoming_count; ++i) { -- switch (instruction->opcode) -- { -- case VKD3DSIH_LABEL: -- if (ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block."); -- ctx->inside_block = true; -- break; +- case VKD3DSPR_SSA: +- if (src->reg.idx[0].offset < ctx->program->ssa_count) +- { +- struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; +- unsigned int i; + unsigned int value_idx = 2 * i; + unsigned int label_idx = 2 * i + 1; -- case VKD3DSIH_RET: -- case VKD3DSIH_BRANCH: -- case VKD3DSIH_SWITCH_MONOLITHIC: -- if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -- "Invalid instruction %#x outside any block.", -- instruction->opcode); -- ctx->inside_block = false; -- break; +- for (i = 0; i < VKD3D_VEC4_SIZE; ++i) +- data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); +- } +- break; + if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) + && !register_is_ssa(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for incoming %u of type %#x in PHI instruction, " + "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -- default: -- if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -- "Invalid instruction %#x outside any block.", -- instruction->opcode); -- break; -- } +- default: +- break; + if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid value dimension %#x for incoming %u in PHI instruction, expected scalar.", @@ -10967,287 +14752,154 @@ index 747238e2fee..6a74e2eb8de 100644 + "Invalid label register for case %u of type %#x in PHI instruction, " + "expected LABEL.", i, instruction->src[value_idx].reg.type); } - -- switch (instruction->opcode) -- { -- case VKD3DSIH_DCL_TEMPS: -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->dcl_temps_found) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction."); -- if (instruction->declaration.count > ctx->program->temp_count) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, -- "Invalid DCL_TEMPS count %u, expected at most %u.", -- instruction->declaration.count, ctx->program->temp_count); -- ctx->dcl_temps_found = true; -- break; ++ + if (instruction->dst_count < 1) + return; - -- case VKD3DSIH_IF: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; ++ + if (!register_is_ssa(&instruction->dst[0].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid destination of type %#x in PHI instruction, expected SSA.", + instruction->dst[0].reg.type); - -- case VKD3DSIH_IFC: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 2); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = VKD3DSIH_IF; -- break; ++ + if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid destination dimension %#x in PHI instruction, expected scalar.", + instruction->dst[0].reg.dimension); - -- case VKD3DSIH_ELSE: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); -- else -- ctx->blocks[ctx->depth - 1] = instruction->opcode; -- break; ++ + if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, + "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", + instruction->dst[0].modifiers); - -- case VKD3DSIH_ENDIF: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block."); -- else -- --ctx->depth; -- break; ++ + if (instruction->dst[0].shift != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, + "Invalid shift %#x for the destination of a PHI instruction, expected none.", + instruction->dst[0].shift); -+} + } -- case VKD3DSIH_LOOP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; +-static void vsir_validate_dst_count(struct validation_context *ctx, +- const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_rep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ + { +- if (instruction->dst_count != count) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, +- "Invalid destination count %u for an instruction of type %#x, expected %u.", +- instruction->dst_count, instruction->opcode, count); + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_REP); -+} + } -- case VKD3DSIH_ENDLOOP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block."); -- else -- --ctx->depth; -- break; +-static void vsir_validate_src_count(struct validation_context *ctx, +- const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_ret(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ + { +- if (instruction->src_count != count) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, +- "Invalid source count %u for an instruction of type %#x, expected %u.", +- instruction->src_count, instruction->opcode, count); + ctx->inside_block = false; -+} + } -- case VKD3DSIH_REP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; +-static bool vsir_validate_src_min_count(struct validation_context *ctx, +- const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_switch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) -+{ + { +- if (instruction->src_count < count) +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, +- "Invalid source count %u for an instruction of type %#x, expected at least %u.", +- instruction->src_count, instruction->opcode, count); +- return false; +- } +- +- return true; + vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED); + vsir_validator_push_block(ctx, VKD3DSIH_SWITCH); -+} + } -- case VKD3DSIH_ENDREP: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block."); -- else -- --ctx->depth; -- break; +-static bool vsir_validate_src_max_count(struct validation_context *ctx, +- const struct vkd3d_shader_instruction *instruction, unsigned int count) +static void vsir_validate_switch_monolithic(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) -+{ + { +- if (instruction->src_count > count) +- { + unsigned int i, case_count; - -- case VKD3DSIH_SWITCH: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) -- return; -- ctx->blocks[ctx->depth++] = instruction->opcode; -- break; ++ + vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS); - -- case VKD3DSIH_ENDSWITCH: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block."); -- else -- --ctx->depth; -- break; ++ + /* Parameters are source, default label, merge label and + * then pairs of constant value and case label. */ - -- case VKD3DSIH_RET: -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 0); -- break; ++ + if (!vsir_validate_src_min_count(ctx, instruction, 3)) + return; - -- case VKD3DSIH_LABEL: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); -- vsir_validate_dst_count(ctx, instruction, 0); -- vsir_validate_src_count(ctx, instruction, 1); -- if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid register of type %#x in a LABEL instruction, expected LABEL.", -- instruction->src[0].reg.type); -- break; ++ + if (instruction->src_count % 2 != 1) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, +- "Invalid source count %u for an instruction of type %#x, expected at most %u.", +- instruction->src_count, instruction->opcode, count); +- return false; +- } + "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", + instruction->src_count); -- case VKD3DSIH_BRANCH: -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); -- vsir_validate_dst_count(ctx, instruction, 0); -- if (!vsir_validate_src_min_count(ctx, instruction, 1)) -- break; -- if (vsir_register_is_label(&instruction->src[0].reg)) -- { -- /* Unconditional branch: parameters are jump label, -- * optional merge label, optional continue label. */ -- vsir_validate_src_max_count(ctx, instruction, 3); +- return true; +-} + if (!vsir_register_is_label(&instruction->src[1].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", + instruction->src[1].reg.type); -- for (i = 0; i < instruction->src_count; ++i) -- { -- if (!vsir_register_is_label(&instruction->src[i].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", -- instruction->src[i].reg.type); -- } -- } -- else -- { -- /* Conditional branch: parameters are condition, true -- * jump label, false jump label, optional merge label, -- * optional continue label. */ -- vsir_validate_src_min_count(ctx, instruction, 3); -- vsir_validate_src_max_count(ctx, instruction, 5); +-static const char *name_from_cf_type(enum cf_type type) +-{ +- switch (type) + if (!vsir_register_is_label(&instruction->src[2].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", + instruction->src[2].reg.type); - -- for (i = 1; i < instruction->src_count; ++i) -- { -- if (!vsir_register_is_label(&instruction->src[i].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", -- instruction->src[i].reg.type); -- } -- } -- break; ++ + case_count = (instruction->src_count - 3) / 2; - -- case VKD3DSIH_SWITCH_MONOLITHIC: -- { -- unsigned int case_count; ++ + for (i = 0; i < case_count; ++i) -+ { + { +- case CF_TYPE_STRUCTURED: +- return "structured"; +- case CF_TYPE_BLOCKS: +- return "block-based"; +- default: +- vkd3d_unreachable(); + unsigned int value_idx = 3 + 2 * i; + unsigned int label_idx = 3 + 2 * i + 1; - -- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); -- vsir_validate_dst_count(ctx, instruction, 0); -- /* Parameters are source, default label, merge label and -- * then pairs of constant value and case label. */ -- if (!vsir_validate_src_min_count(ctx, instruction, 3)) -- break; -- if (instruction->src_count % 2 != 1) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, -- "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", -- instruction->src_count); ++ + if (!register_is_constant(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for case %u of type %#x in monolithic SWITCH instruction, " + "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - -- if (!vsir_register_is_label(&instruction->src[1].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", -- instruction->src[1].reg.type); ++ + if (!vsir_register_is_label(&instruction->src[label_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid label register for case %u of type %#x in monolithic SWITCH instruction, " + "expected LABEL.", i, instruction->src[value_idx].reg.type); -+ } - -- if (!vsir_register_is_label(&instruction->src[2].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", -- instruction->src[2].reg.type); + } ++ + ctx->inside_block = false; -+} + } -- case_count = (instruction->src_count - 3) / 2; +-static void vsir_validate_cf_type(struct validation_context *ctx, +- const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) +struct vsir_validator_instruction_desc -+{ + { +- VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); +- VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); +- if (ctx->cf_type != expected_type) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", +- instruction->opcode, name_from_cf_type(ctx->cf_type)); +-} + unsigned int dst_param_count; + unsigned int src_param_count; + void (*validate)(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction); +}; - -- for (i = 0; i < case_count; ++i) -- { -- unsigned int value_idx = 3 + 2 * i; -- unsigned int label_idx = 3 + 2 * i + 1; -- -- if (!register_is_constant(&instruction->src[value_idx].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, " -- "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); -- -- if (!vsir_register_is_label(&instruction->src[label_idx].reg)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, " -- "expected LABEL.", i, instruction->src[value_idx].reg.type); -- } -- break; ++ +static const struct vsir_validator_instruction_desc vsir_validator_instructions[] = +{ + [VKD3DSIH_BRANCH] = {0, ~0u, vsir_validate_branch}, @@ -11281,29 +14933,105 @@ index 747238e2fee..6a74e2eb8de 100644 + [VKD3DSIH_SWITCH] = {0, 1, vsir_validate_switch}, + [VKD3DSIH_SWITCH_MONOLITHIC] = {0, ~0u, vsir_validate_switch_monolithic}, +}; -+ -+static void vsir_validate_instruction(struct validation_context *ctx) -+{ -+ const struct vkd3d_shader_version *version = &ctx->program->shader_version; -+ const struct vkd3d_shader_instruction *instruction; -+ size_t i; -+ -+ instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; -+ -+ for (i = 0; i < instruction->dst_count; ++i) -+ vsir_validate_dst_param(ctx, &instruction->dst[i]); -+ -+ for (i = 0; i < instruction->src_count; ++i) -+ vsir_validate_src_param(ctx, &instruction->src[i]); -+ -+ if (instruction->opcode >= VKD3DSIH_INVALID) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", -+ instruction->opcode); -+ } -+ + + static void vsir_validate_instruction(struct validation_context *ctx) + { +@@ -6148,136 +7611,40 @@ static void vsir_validate_instruction(struct validation_context *ctx) + instruction->opcode); + } + +- switch (instruction->opcode) + if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID) -+ { + { +- case VKD3DSIH_HS_DECLS: +- case VKD3DSIH_HS_CONTROL_POINT_PHASE: +- case VKD3DSIH_HS_FORK_PHASE: +- case VKD3DSIH_HS_JOIN_PHASE: +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- if (version->type != VKD3D_SHADER_TYPE_HULL) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, +- "Phase instruction %#x is only valid in a hull shader.", +- instruction->opcode); +- if (ctx->depth != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, +- "Phase instruction %#x must appear to top level.", +- instruction->opcode); +- ctx->phase = instruction->opcode; +- ctx->dcl_temps_found = false; +- return; +- +- case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: +- /* Exclude non-finite values. */ +- if (!(instruction->declaration.max_tessellation_factor >= 1.0f +- && instruction->declaration.max_tessellation_factor <= 64.0f)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.", +- instruction->declaration.max_tessellation_factor); +- return; +- +- case VKD3DSIH_DCL_INPUT_PRIMITIVE: +- if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED +- || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS input primitive %u is invalid.", +- instruction->declaration.primitive_type.type); +- return; +- +- case VKD3DSIH_DCL_VERTICES_OUT: +- if (instruction->declaration.count > 1024) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output vertex count %u is invalid.", +- instruction->declaration.count); +- return; +- +- case VKD3DSIH_DCL_OUTPUT_TOPOLOGY: +- if (instruction->declaration.primitive_type.type == VKD3D_PT_UNDEFINED +- || instruction->declaration.primitive_type.type >= VKD3D_PT_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS output primitive %u is invalid.", +- instruction->declaration.primitive_type.type); +- return; +- +- case VKD3DSIH_DCL_GS_INSTANCES: +- if (!instruction->declaration.count || instruction->declaration.count > 32) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_GS, "GS instance count %u is invalid.", +- instruction->declaration.count); +- return; +- +- case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: +- if (!instruction->declaration.count || instruction->declaration.count > 32) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.", +- instruction->declaration.count); +- return; +- +- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: +- if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID +- || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, +- "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); +- return; +- +- case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: +- if (!instruction->declaration.tessellator_output_primitive +- || instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, +- "Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive); +- return; +- +- case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: +- if (!instruction->declaration.tessellator_partitioning +- || instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, +- "Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning); +- return; +- +- default: +- break; +- } +- +- /* Only DCL instructions may occur outside hull shader phases. */ +- if (!vsir_instruction_is_dcl(instruction) && version->type == VKD3D_SHADER_TYPE_HULL +- && ctx->phase == VKD3DSIH_INVALID) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, +- "Instruction %#x appear before any phase instruction in a hull shader.", +- instruction->opcode); + switch (instruction->opcode) + { + case VKD3DSIH_NOP: @@ -11312,20 +15040,277 @@ index 747238e2fee..6a74e2eb8de 100644 + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + break; -+ + +- /* We support two different control flow types in shaders: +- * block-based, like DXIL and SPIR-V, and structured, like D3DBC +- * and TPF. The shader is detected as block-based when its first +- * instruction, except for DCL_* and phases, is a LABEL. Currently +- * we mandate that each shader is either purely block-based or +- * purely structured. In principle we could allow structured +- * constructs in a block, provided they are confined in a single +- * block, but need for that hasn't arisen yet, so we don't. */ +- if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) +- { +- if (instruction->opcode == VKD3DSIH_LABEL) +- ctx->cf_type = CF_TYPE_BLOCKS; +- else +- ctx->cf_type = CF_TYPE_STRUCTURED; + default: + if (!vsir_instruction_is_dcl(instruction)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Instruction %#x appear before any phase instruction in a hull shader.", + instruction->opcode); + break; ++ } + } + +- if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) ++ if (ctx->program->cf_type == VSIR_CF_BLOCKS && !ctx->inside_block) + { + switch (instruction->opcode) + { ++ case VKD3DSIH_NOP: + case VKD3DSIH_LABEL: +- if (ctx->inside_block) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block."); +- ctx->inside_block = true; +- break; +- +- case VKD3DSIH_RET: +- case VKD3DSIH_BRANCH: +- case VKD3DSIH_SWITCH_MONOLITHIC: +- if (!ctx->inside_block) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, +- "Invalid instruction %#x outside any block.", +- instruction->opcode); +- ctx->inside_block = false; ++ case VKD3DSIH_HS_DECLS: ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: ++ case VKD3DSIH_HS_JOIN_PHASE: + break; + + default: +- if (!ctx->inside_block) ++ if (!vsir_instruction_is_dcl(instruction)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid instruction %#x outside any block.", + instruction->opcode); +@@ -6285,271 +7652,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) } -+ } + } + +- switch (instruction->opcode) ++ if (instruction->opcode < ARRAY_SIZE(vsir_validator_instructions)) + { +- case VKD3DSIH_DCL_TEMPS: +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- if (ctx->dcl_temps_found) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction."); +- if (instruction->declaration.count > ctx->program->temp_count) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS, +- "Invalid DCL_TEMPS count %u, expected at most %u.", +- instruction->declaration.count, ctx->program->temp_count); +- ctx->dcl_temps_found = true; +- break; +- +- case VKD3DSIH_IF: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 1); +- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) +- return; +- ctx->blocks[ctx->depth++] = instruction->opcode; +- break; +- +- case VKD3DSIH_IFC: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 2); +- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) +- return; +- ctx->blocks[ctx->depth++] = VKD3DSIH_IF; +- break; +- +- case VKD3DSIH_ELSE: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); +- else +- ctx->blocks[ctx->depth - 1] = instruction->opcode; +- break; +- +- case VKD3DSIH_ENDIF: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block."); +- else +- --ctx->depth; +- break; +- +- case VKD3DSIH_LOOP: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); +- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) +- return; +- ctx->blocks[ctx->depth++] = instruction->opcode; +- break; +- +- case VKD3DSIH_ENDLOOP: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block."); +- else +- --ctx->depth; +- break; +- +- case VKD3DSIH_REP: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 1); +- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) +- return; +- ctx->blocks[ctx->depth++] = instruction->opcode; +- break; +- +- case VKD3DSIH_ENDREP: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block."); +- else +- --ctx->depth; +- break; +- +- case VKD3DSIH_SWITCH: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 1); +- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) +- return; +- ctx->blocks[ctx->depth++] = instruction->opcode; +- break; +- +- case VKD3DSIH_ENDSWITCH: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block."); +- else +- --ctx->depth; +- break; +- +- case VKD3DSIH_RET: +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 0); +- break; +- +- case VKD3DSIH_LABEL: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); +- vsir_validate_dst_count(ctx, instruction, 0); +- vsir_validate_src_count(ctx, instruction, 1); +- if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid register of type %#x in a LABEL instruction, expected LABEL.", +- instruction->src[0].reg.type); +- break; +- +- case VKD3DSIH_BRANCH: +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); +- vsir_validate_dst_count(ctx, instruction, 0); +- if (!vsir_validate_src_min_count(ctx, instruction, 1)) +- break; +- if (vsir_register_is_label(&instruction->src[0].reg)) +- { +- /* Unconditional branch: parameters are jump label, +- * optional merge label, optional continue label. */ +- vsir_validate_src_max_count(ctx, instruction, 3); +- +- for (i = 0; i < instruction->src_count; ++i) +- { +- if (!vsir_register_is_label(&instruction->src[i].reg)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.", +- instruction->src[i].reg.type); +- } +- } +- else +- { +- /* Conditional branch: parameters are condition, true +- * jump label, false jump label, optional merge label, +- * optional continue label. */ +- vsir_validate_src_min_count(ctx, instruction, 3); +- vsir_validate_src_max_count(ctx, instruction, 5); +- +- for (i = 1; i < instruction->src_count; ++i) +- { +- if (!vsir_register_is_label(&instruction->src[i].reg)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.", +- instruction->src[i].reg.type); +- } +- } +- break; +- +- case VKD3DSIH_SWITCH_MONOLITHIC: +- { +- unsigned int case_count; +- +- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); +- vsir_validate_dst_count(ctx, instruction, 0); +- /* Parameters are source, default label, merge label and +- * then pairs of constant value and case label. */ +- if (!vsir_validate_src_min_count(ctx, instruction, 3)) +- break; +- if (instruction->src_count % 2 != 1) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, +- "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", +- instruction->src_count); ++ const struct vsir_validator_instruction_desc *desc; + +- if (!vsir_register_is_label(&instruction->src[1].reg)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", +- instruction->src[1].reg.type); +- +- if (!vsir_register_is_label(&instruction->src[2].reg)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.", +- instruction->src[2].reg.type); +- +- case_count = (instruction->src_count - 3) / 2; +- +- for (i = 0; i < case_count; ++i) +- { +- unsigned int value_idx = 3 + 2 * i; +- unsigned int label_idx = 3 + 2 * i + 1; +- +- if (!register_is_constant(&instruction->src[value_idx].reg)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, " +- "expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); +- +- if (!vsir_register_is_label(&instruction->src[label_idx].reg)) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, " +- "expected LABEL.", i, instruction->src[value_idx].reg.type); +- } +- break; +- } ++ desc = &vsir_validator_instructions[instruction->opcode]; - case VKD3DSIH_PHI: -+ if (ctx->program->cf_type == VSIR_CF_BLOCKS && !ctx->inside_block) -+ { -+ switch (instruction->opcode) ++ if (desc->validate) { - unsigned int incoming_count; - @@ -11342,41 +15327,22 @@ index 747238e2fee..6a74e2eb8de 100644 - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid destination of type %#x in PHI instruction, expected SSA.", - instruction->dst[0].reg.type); -+ case VKD3DSIH_NOP: -+ case VKD3DSIH_LABEL: -+ case VKD3DSIH_HS_DECLS: -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: -+ case VKD3DSIH_HS_JOIN_PHASE: -+ break; - +- - if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, - "Invalid destination dimension %#x in PHI instruction, expected scalar.", - instruction->dst[0].reg.dimension); -+ default: -+ if (!vsir_instruction_is_dcl(instruction)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid instruction %#x outside any block.", -+ instruction->opcode); -+ break; -+ } -+ } - +- - if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, - "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", - instruction->dst[0].modifiers); -+ if (instruction->opcode < ARRAY_SIZE(vsir_validator_instructions)) -+ { -+ const struct vsir_validator_instruction_desc *desc; - +- - if (instruction->dst[0].shift != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, - "Invalid shift %#x for the destination of a PHI instruction, expected none.", - instruction->dst[0].shift); -+ desc = &vsir_validator_instructions[instruction->opcode]; - +- - for (i = 0; i < incoming_count; ++i) - { - unsigned int value_idx = 2 * i; @@ -11399,8 +15365,6 @@ index 747238e2fee..6a74e2eb8de 100644 - "expected LABEL.", i, instruction->src[value_idx].reg.type); - } - break; -+ if (desc->validate) -+ { + if (desc->dst_param_count != ~0u) + vsir_validate_dst_count(ctx, instruction, desc->dst_param_count); + if (desc->src_param_count != ~0u) @@ -11413,17 +15377,80 @@ index 747238e2fee..6a74e2eb8de 100644 } } -@@ -6575,7 +7045,8 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c +@@ -6563,19 +7679,71 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + .null_location = {.source_name = source_name}, + .status = VKD3D_OK, + .phase = VKD3DSIH_INVALID, ++ .invalid_instruction_idx = true, + }; + unsigned int i; + + if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) + return VKD3D_OK; + ++ switch (program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ break; ++ ++ default: ++ if (program->patch_constant_signature.element_count != 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Patch constant signature is only valid for hull and domain shaders."); ++ } ++ ++ switch (program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ case VKD3D_SHADER_TYPE_GEOMETRY: ++ if (program->input_control_point_count == 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid zero input control point count."); ++ break; ++ ++ default: ++ if (program->input_control_point_count != 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid input control point count %u.", ++ program->input_control_point_count); ++ } ++ ++ switch (program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ if (program->output_control_point_count == 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid zero output control point count."); ++ break; ++ ++ default: ++ if (program->output_control_point_count != 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid output control point count %u.", ++ program->output_control_point_count); ++ } ++ ++ vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); ++ vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); ++ vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); ++ + if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) + goto fail; + if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) goto fail; - for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) ++ ctx.invalid_instruction_idx = false; ++ + for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count + && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx) vsir_validate_instruction(&ctx); ctx.invalid_instruction_idx = true; -@@ -6610,74 +7081,75 @@ fail: +@@ -6610,74 +7778,75 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } @@ -11498,17 +15525,15 @@ index 747238e2fee..6a74e2eb8de 100644 + vsir_transform(&ctx, vsir_program_remap_output_signature); if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) - { +- { - if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) - return result; - - if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, - &program->input_signature)) < 0) - return result; -+ vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); -+ vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); - } - +- } +- - if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) - return result; - @@ -11516,9 +15541,11 @@ index 747238e2fee..6a74e2eb8de 100644 - return result; - - remove_dead_code(program); -- ++ vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); + - if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) - return result; ++ vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); + vsir_transform(&ctx, vsir_program_normalise_io_registers); + vsir_transform(&ctx, vsir_program_normalise_flat_constants); + vsir_transform(&ctx, vsir_program_remove_dead_code); @@ -11534,23 +15561,26 @@ index 747238e2fee..6a74e2eb8de 100644 - return result; + vsir_transform(&ctx, vsir_program_insert_alpha_test); + vsir_transform(&ctx, vsir_program_insert_clip_planes); ++ vsir_transform(&ctx, vsir_program_insert_point_size); ++ vsir_transform(&ctx, vsir_program_insert_point_size_clamp); if (TRACE_ON()) - vkd3d_shader_trace(program); - +- vkd3d_shader_trace(program); +- - if ((result = vsir_program_validate(program, config_flags, - compile_info->source_name, message_context)) < 0) - return result; -- ++ vsir_program_trace(program); + - return result; + return ctx.result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c new file mode 100644 -index 00000000000..6b41363d60e +index 00000000000..5baefbc1f44 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -0,0 +1,319 @@ +@@ -0,0 +1,881 @@ +/* + * Copyright 2024 Feifan He for CodeWeavers + * @@ -11591,6 +15621,9 @@ index 00000000000..6b41363d60e + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; + unsigned int indent; ++ const char *prefix; ++ const struct vkd3d_shader_interface_info *interface_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, @@ -11603,16 +15636,37 @@ index 00000000000..6b41363d60e + va_end(args); +} + ++static const char *msl_get_prefix(enum vkd3d_shader_type type) ++{ ++ switch (type) ++ { ++ case VKD3D_SHADER_TYPE_VERTEX: ++ return "vs"; ++ case VKD3D_SHADER_TYPE_HULL: ++ return "hs"; ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ return "ds"; ++ case VKD3D_SHADER_TYPE_GEOMETRY: ++ return "gs"; ++ case VKD3D_SHADER_TYPE_PIXEL: ++ return "ps"; ++ case VKD3D_SHADER_TYPE_COMPUTE: ++ return "cs"; ++ default: ++ return NULL; ++ } ++} ++ +static void msl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) +{ + vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); +} + +static void msl_print_register_datatype(struct vkd3d_string_buffer *buffer, -+ struct msl_generator *gen, const struct vkd3d_shader_register *reg) ++ struct msl_generator *gen, enum vkd3d_data_type data_type) +{ + vkd3d_string_buffer_printf(buffer, "."); -+ switch (reg->data_type) ++ switch (data_type) + { + case VKD3D_DATA_FLOAT: + vkd3d_string_buffer_printf(buffer, "f"); @@ -11625,8 +15679,8 @@ index 00000000000..6b41363d60e + break; + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled register datatype %#x.", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "", reg->data_type); ++ "Internal compiler error: Unhandled register datatype %#x.", data_type); ++ vkd3d_string_buffer_printf(buffer, "", data_type); + break; + } +} @@ -11638,8 +15692,66 @@ index 00000000000..6b41363d60e + { + case VKD3DSPR_TEMP: + vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); -+ msl_print_register_datatype(buffer, gen, reg); ++ msl_print_register_datatype(buffer, gen, reg->data_type); + break; ++ ++ case VKD3DSPR_INPUT: ++ if (reg->idx_count != 1) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled input register index count %u.", reg->idx_count); ++ vkd3d_string_buffer_printf(buffer, "", reg->type); ++ break; ++ } ++ if (reg->idx[0].rel_addr) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled input register indirect addressing."); ++ vkd3d_string_buffer_printf(buffer, "", reg->type); ++ break; ++ } ++ vkd3d_string_buffer_printf(buffer, "v[%u]", reg->idx[0].offset); ++ msl_print_register_datatype(buffer, gen, reg->data_type); ++ break; ++ ++ case VKD3DSPR_OUTPUT: ++ if (reg->idx_count != 1) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled output register index count %u.", reg->idx_count); ++ vkd3d_string_buffer_printf(buffer, "", reg->type); ++ break; ++ } ++ if (reg->idx[0].rel_addr) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled output register indirect addressing."); ++ vkd3d_string_buffer_printf(buffer, "", reg->type); ++ break; ++ } ++ vkd3d_string_buffer_printf(buffer, "o[%u]", reg->idx[0].offset); ++ msl_print_register_datatype(buffer, gen, reg->data_type); ++ break; ++ ++ case VKD3DSPR_CONSTBUFFER: ++ if (reg->idx_count != 3) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count); ++ vkd3d_string_buffer_printf(buffer, "", reg->type); ++ break; ++ } ++ if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled constant buffer register indirect addressing."); ++ vkd3d_string_buffer_printf(buffer, "", reg->type); ++ break; ++ } ++ vkd3d_string_buffer_printf(buffer, "descriptors.cb_%u[%u]", reg->idx[0].offset, reg->idx[2].offset); ++ msl_print_register_datatype(buffer, gen, reg->data_type); ++ break; ++ + default: + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled register type %#x.", reg->type); @@ -11797,6 +15909,456 @@ index 00000000000..6b41363d60e + } +} + ++static bool msl_check_shader_visibility(const struct msl_generator *gen, ++ enum vkd3d_shader_visibility visibility) ++{ ++ enum vkd3d_shader_type t = gen->program->shader_version.type; ++ ++ switch (visibility) ++ { ++ case VKD3D_SHADER_VISIBILITY_ALL: ++ return true; ++ case VKD3D_SHADER_VISIBILITY_VERTEX: ++ return t == VKD3D_SHADER_TYPE_VERTEX; ++ case VKD3D_SHADER_VISIBILITY_HULL: ++ return t == VKD3D_SHADER_TYPE_HULL; ++ case VKD3D_SHADER_VISIBILITY_DOMAIN: ++ return t == VKD3D_SHADER_TYPE_DOMAIN; ++ case VKD3D_SHADER_VISIBILITY_GEOMETRY: ++ return t == VKD3D_SHADER_TYPE_GEOMETRY; ++ case VKD3D_SHADER_VISIBILITY_PIXEL: ++ return t == VKD3D_SHADER_TYPE_PIXEL; ++ case VKD3D_SHADER_VISIBILITY_COMPUTE: ++ return t == VKD3D_SHADER_TYPE_COMPUTE; ++ default: ++ WARN("Invalid shader visibility %#x.\n", visibility); ++ return false; ++ } ++} ++ ++static bool msl_get_cbv_binding(const struct msl_generator *gen, ++ unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) ++{ ++ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; ++ const struct vkd3d_shader_resource_binding *binding; ++ unsigned int i; ++ ++ if (!interface_info) ++ return false; ++ ++ for (i = 0; i < interface_info->binding_count; ++i) ++ { ++ binding = &interface_info->bindings[i]; ++ ++ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) ++ continue; ++ if (binding->register_space != register_space) ++ continue; ++ if (binding->register_index != register_idx) ++ continue; ++ if (!msl_check_shader_visibility(gen, binding->shader_visibility)) ++ continue; ++ if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) ++ continue; ++ *binding_idx = i; ++ return true; ++ } ++ ++ return false; ++} ++ ++static void msl_generate_cbv_declaration(struct msl_generator *gen, ++ const struct vkd3d_shader_descriptor_info1 *cbv) ++{ ++ const struct vkd3d_shader_descriptor_binding *binding; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ unsigned int binding_idx; ++ size_t size; ++ ++ if (cbv->count != 1) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, ++ "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); ++ return; ++ } ++ ++ if (!msl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx)) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, ++ "No descriptor binding specified for constant buffer %u.", cbv->register_id); ++ return; ++ } ++ ++ binding = &gen->interface_info->bindings[binding_idx].binding; ++ ++ if (binding->set != 0) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, ++ "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id); ++ return; ++ } ++ ++ if (binding->count != 1) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, ++ "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id); ++ return; ++ } ++ ++ size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t)); ++ size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); ++ ++ vkd3d_string_buffer_printf(buffer, ++ "constant vkd3d_vec4 (&cb_%u)[%zu] [[id(%u)]];", cbv->register_id, size, binding->binding); ++}; ++ ++static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen) ++{ ++ const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; ++ const struct vkd3d_shader_descriptor_info1 *descriptor; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ unsigned int i; ++ ++ if (!info->descriptor_count) ++ return; ++ ++ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_descriptors\n{\n", gen->prefix); ++ ++ for (i = 0; i < info->descriptor_count; ++i) ++ { ++ descriptor = &info->descriptors[i]; ++ ++ msl_print_indent(buffer, 1); ++ switch (descriptor->type) ++ { ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: ++ msl_generate_cbv_declaration(gen, descriptor); ++ break; ++ ++ default: ++ vkd3d_string_buffer_printf(buffer, "/* */", descriptor->type); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type); ++ break; ++ } ++ vkd3d_string_buffer_printf(buffer, "\n"); ++ } ++ ++ vkd3d_string_buffer_printf(buffer, "};\n\n"); ++} ++ ++static void msl_generate_input_struct_declarations(struct msl_generator *gen) ++{ ++ const struct shader_signature *signature = &gen->program->input_signature; ++ enum vkd3d_shader_type type = gen->program->shader_version.type; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_in\n{\n", gen->prefix); ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ e = &signature->elements[i]; ++ ++ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) ++ continue; ++ ++ if (e->sysval_semantic) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); ++ continue; ++ } ++ ++ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); ++ continue; ++ } ++ ++ if (e->interpolation_mode != VKD3DSIM_NONE) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); ++ continue; ++ } ++ ++ if(e->register_count > 1) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled register count %u.", e->register_count); ++ continue; ++ } ++ ++ msl_print_indent(gen->buffer, 1); ++ ++ switch(e->component_type) ++ { ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "float4 "); ++ break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, "int4 "); ++ break; ++ case VKD3D_SHADER_COMPONENT_UINT: ++ vkd3d_string_buffer_printf(buffer, "uint4 "); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, " ", e->component_type); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled component type %#x.", e->component_type); ++ break; ++ } ++ ++ vkd3d_string_buffer_printf(buffer, "shader_in_%u ", i); ++ ++ switch (type) ++ { ++ case VKD3D_SHADER_TYPE_VERTEX: ++ vkd3d_string_buffer_printf(gen->buffer, "[[attribute(%u)]]", e->target_location); ++ break; ++ case VKD3D_SHADER_TYPE_PIXEL: ++ vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled shader type %#x.", type); ++ break; ++ } ++ ++ vkd3d_string_buffer_printf(buffer, ";\n"); ++ } ++ ++ vkd3d_string_buffer_printf(buffer, "};\n\n"); ++} ++ ++static void msl_generate_vertex_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) ++{ ++ switch (e->sysval_semantic) ++ { ++ case VKD3D_SHADER_SV_POSITION: ++ vkd3d_string_buffer_printf(gen->buffer, "[[position]]"); ++ break; ++ case VKD3D_SHADER_SV_NONE: ++ vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled vertex shader system value %#x.", e->sysval_semantic); ++ break; ++ } ++} ++ ++static void msl_generate_pixel_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) ++{ ++ switch (e->sysval_semantic) ++ { ++ case VKD3D_SHADER_SV_TARGET: ++ vkd3d_string_buffer_printf(gen->buffer, "[[color(%u)]]", e->target_location); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled pixel shader system value %#x.", e->sysval_semantic); ++ break; ++ } ++} ++ ++static void msl_generate_output_struct_declarations(struct msl_generator *gen) ++{ ++ const struct shader_signature *signature = &gen->program->output_signature; ++ enum vkd3d_shader_type type = gen->program->shader_version.type; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_out\n{\n", gen->prefix); ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ e = &signature->elements[i]; ++ ++ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) ++ continue; ++ ++ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); ++ continue; ++ } ++ ++ if (e->interpolation_mode != VKD3DSIM_NONE) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); ++ continue; ++ } ++ ++ if(e->register_count > 1) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled register count %u.", e->register_count); ++ continue; ++ } ++ ++ msl_print_indent(gen->buffer, 1); ++ ++ switch(e->component_type) ++ { ++ case VKD3D_SHADER_COMPONENT_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "float4 "); ++ break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, "int4 "); ++ break; ++ case VKD3D_SHADER_COMPONENT_UINT: ++ vkd3d_string_buffer_printf(buffer, "uint4 "); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, " ", e->component_type); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled component type %#x.", e->component_type); ++ break; ++ } ++ ++ vkd3d_string_buffer_printf(buffer, "shader_out_%u ", i); ++ ++ switch (type) ++ { ++ case VKD3D_SHADER_TYPE_VERTEX: ++ msl_generate_vertex_output_element_attribute(gen, e); ++ break; ++ case VKD3D_SHADER_TYPE_PIXEL: ++ msl_generate_pixel_output_element_attribute(gen, e); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled shader type %#x.", type); ++ break; ++ } ++ ++ vkd3d_string_buffer_printf(buffer, ";\n"); ++ } ++ ++ vkd3d_string_buffer_printf(buffer, "};\n\n"); ++} ++ ++static void msl_generate_entrypoint_prologue(struct msl_generator *gen) ++{ ++ const struct shader_signature *signature = &gen->program->input_signature; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ e = &signature->elements[i]; ++ ++ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) ++ continue; ++ ++ vkd3d_string_buffer_printf(buffer, " %s_in[%u]", gen->prefix, e->register_index); ++ if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) ++ { ++ msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); ++ msl_print_write_mask(buffer, e->mask); ++ vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); ++ msl_print_write_mask(buffer, e->mask); ++ } ++ else ++ { ++ vkd3d_string_buffer_printf(buffer, " = ", e->sysval_semantic); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); ++ } ++ vkd3d_string_buffer_printf(buffer, ";\n"); ++ } ++} ++ ++static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) ++{ ++ const struct shader_signature *signature = &gen->program->output_signature; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ e = &signature->elements[i]; ++ ++ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) ++ continue; ++ ++ switch (e->sysval_semantic) ++ { ++ case VKD3D_SHADER_SV_NONE: ++ case VKD3D_SHADER_SV_TARGET: ++ case VKD3D_SHADER_SV_POSITION: ++ vkd3d_string_buffer_printf(buffer, " output.shader_out_%u", i); ++ msl_print_write_mask(buffer, e->mask); ++ vkd3d_string_buffer_printf(buffer, " = %s_out", gen->prefix); ++ msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); ++ msl_print_write_mask(buffer, e->mask); ++ break; ++ default: ++ vkd3d_string_buffer_printf(buffer, " ", e->sysval_semantic); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); ++ } ++ vkd3d_string_buffer_printf(buffer, ";\n"); ++ } ++} ++ ++static void msl_generate_entrypoint(struct msl_generator *gen) ++{ ++ enum vkd3d_shader_type type = gen->program->shader_version.type; ++ ++ switch (type) ++ { ++ case VKD3D_SHADER_TYPE_VERTEX: ++ vkd3d_string_buffer_printf(gen->buffer, "vertex "); ++ break; ++ case VKD3D_SHADER_TYPE_PIXEL: ++ vkd3d_string_buffer_printf(gen->buffer, "fragment "); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled shader type %#x.", type); ++ return; ++ } ++ ++ vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); ++ ++ if (gen->descriptor_info->descriptor_count) ++ { ++ msl_print_indent(gen->buffer, 2); ++ /* TODO: Configurable argument buffer binding location. */ ++ vkd3d_string_buffer_printf(gen->buffer, ++ "constant vkd3d_%s_descriptors& descriptors [[buffer(0)]],\n", gen->prefix); ++ } ++ ++ msl_print_indent(gen->buffer, 2); ++ vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_in input [[stage_in]])\n{\n", gen->prefix); ++ ++ /* TODO: declare #maximum_register + 1 */ ++ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_in[%u];\n", gen->prefix, 32); ++ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); ++ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); ++ ++ msl_generate_entrypoint_prologue(gen); ++ ++ vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); ++ if (gen->descriptor_info->descriptor_count) ++ vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); ++ vkd3d_string_buffer_printf(gen->buffer, ");\n"); ++ ++ msl_generate_entrypoint_epilogue(gen); ++ ++ vkd3d_string_buffer_printf(gen->buffer, " return output;\n}\n"); ++} ++ +static void msl_generator_generate(struct msl_generator *gen) +{ + const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; @@ -11811,7 +16373,17 @@ index 00000000000..6b41363d60e + vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); + vkd3d_string_buffer_printf(gen->buffer, " float4 f;\n};\n\n"); + -+ vkd3d_string_buffer_printf(gen->buffer, "void shader_main()\n{\n"); ++ msl_generate_descriptor_struct_declarations(gen); ++ msl_generate_input_struct_declarations(gen); ++ msl_generate_output_struct_declarations(gen); ++ ++ vkd3d_string_buffer_printf(gen->buffer, ++ "void %s_main(thread vkd3d_vec4 *v, " ++ "thread vkd3d_vec4 *o", ++ gen->prefix); ++ if (gen->descriptor_info->descriptor_count) ++ vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); ++ vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); + + ++gen->indent; + @@ -11826,7 +16398,11 @@ index 00000000000..6b41363d60e + msl_handle_instruction(gen, &instructions->elements[i]); + } + -+ vkd3d_string_buffer_printf(gen->buffer, "}\n"); ++ --gen->indent; ++ ++ vkd3d_string_buffer_printf(gen->buffer, "}\n\n"); ++ ++ msl_generate_entrypoint(gen); + + if (TRACE_ON()) + vkd3d_string_buffer_trace(gen->buffer); @@ -11839,8 +16415,12 @@ index 00000000000..6b41363d60e +} + +static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + struct vkd3d_shader_message_context *message_context) +{ ++ enum vkd3d_shader_type type = program->shader_version.type; ++ + memset(gen, 0, sizeof(*gen)); + gen->program = program; + vkd3d_string_buffer_cache_init(&gen->string_buffers); @@ -11850,11 +16430,20 @@ index 00000000000..6b41363d60e + return VKD3D_ERROR_OUT_OF_MEMORY; + } + gen->message_context = message_context; ++ if (!(gen->prefix = msl_get_prefix(type))) ++ { ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled shader type %#x.", type); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); ++ gen->descriptor_info = descriptor_info; + + return VKD3D_OK; +} + +int msl_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct msl_generator generator; @@ -11863,15 +16452,46 @@ index 00000000000..6b41363d60e + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + -+ if ((ret = msl_generator_init(&generator, program, message_context)) < 0) ++ VKD3D_ASSERT(program->normalised_io); ++ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ ++ if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) + return ret; + msl_generator_generate(&generator); + msl_generator_cleanup(&generator); + + return VKD3D_ERROR_INVALID_SHADER; +} +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h +index 9806614a35b..a98c8ae3df5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.h ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h +@@ -60,6 +60,7 @@ struct preproc_expansion + { + struct preproc_buffer buffer; + const struct preproc_text *text; ++ struct preproc_text *arg_values; + /* Back-pointer to the macro, if this expansion a macro body. This is + * necessary so that argument tokens can be correctly replaced. */ + struct preproc_macro *macro; +@@ -72,7 +73,6 @@ struct preproc_macro + + char **arg_names; + size_t arg_count; +- struct preproc_text *arg_values; + + struct preproc_text body; + }; +@@ -117,6 +117,7 @@ struct preproc_ctx + STATE_ARGS, + } state; + unsigned int paren_depth; ++ struct preproc_text *arg_values; + } text_func, directive_func; + + int current_directive; diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index 2b7455a5c30..41c21cca1f5 100644 +index 2b7455a5c30..d167415c356 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ @@ -11882,6 +16502,20 @@ index 2b7455a5c30..41c21cca1f5 100644 #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ +@@ -29,11 +30,11 @@ + + #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) + +-static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) ++static struct preproc_expansion *preproc_get_top_expansion(struct preproc_ctx *ctx) + { + if (!ctx->expansion_count) + return NULL; +- return ctx->expansion_stack[ctx->expansion_count - 1].macro; ++ return &ctx->expansion_stack[ctx->expansion_count - 1]; + } + + static void update_location(struct preproc_ctx *ctx); @@ -66,7 +67,7 @@ static void update_location(struct preproc_ctx *ctx); NEWLINE \r?\n @@ -11891,8 +16525,188 @@ index 2b7455a5c30..41c21cca1f5 100644 INT_SUFFIX [uUlL]{0,2} %% +@@ -132,14 +133,14 @@ INT_SUFFIX [uUlL]{0,2} + + if (!ctx->last_was_newline) + { +- struct preproc_macro *macro; ++ struct preproc_expansion *exp; + + /* Stringification is only done for function-like macro bodies. + * Anywhere else, we need to parse it as two separate tokens. + * We could use a state for this, but yyless() is easier and cheap. + */ + +- if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) ++ if ((exp = preproc_get_top_expansion(ctx)) && exp->macro && exp->macro->arg_count) + return T_HASHSTRING; + + yyless(1); +@@ -258,6 +259,12 @@ static void preproc_pop_buffer(struct preproc_ctx *ctx) + + yy_delete_buffer(exp->buffer.lexer_buffer, ctx->scanner); + ++ if (exp->macro) ++ { ++ for (unsigned int i = 0; i < exp->macro->arg_count; ++i) ++ vkd3d_string_buffer_cleanup(&exp->arg_values[i].text); ++ free(exp->arg_values); ++ } + --ctx->expansion_count; + TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); + } +@@ -310,15 +317,15 @@ static int return_token(int token, YYSTYPE *lval, const char *text) + + static const struct preproc_text *find_arg_expansion(struct preproc_ctx *ctx, const char *s) + { +- struct preproc_macro *macro; ++ struct preproc_expansion *exp; + unsigned int i; + +- if ((macro = preproc_get_top_macro(ctx))) ++ if ((exp = preproc_get_top_expansion(ctx)) && exp->macro) + { +- for (i = 0; i < macro->arg_count; ++i) ++ for (i = 0; i < exp->macro->arg_count; ++i) + { +- if (!strcmp(s, macro->arg_names[i])) +- return ¯o->arg_values[i]; ++ if (!strcmp(s, exp->macro->arg_names[i])) ++ return &exp->arg_values[i]; + } + } + return NULL; +@@ -330,7 +337,7 @@ static void preproc_text_add(struct preproc_text *text, const char *string) + } + + static bool preproc_push_expansion(struct preproc_ctx *ctx, +- const struct preproc_text *text, struct preproc_macro *macro) ++ const struct preproc_text *text, struct preproc_macro *macro, struct preproc_text *arg_values) + { + struct preproc_expansion *exp; + +@@ -342,6 +349,7 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, + exp->buffer.lexer_buffer = yy_scan_bytes(text->text.buffer, text->text.content_size, ctx->scanner); + exp->buffer.location = text->location; + exp->macro = macro; ++ exp->arg_values = arg_values; + TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); + return true; + } +@@ -542,7 +550,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + + if ((expansion = find_arg_expansion(ctx, text))) + { +- preproc_push_expansion(ctx, expansion, NULL); ++ preproc_push_expansion(ctx, expansion, NULL, NULL); + continue; + } + +@@ -550,7 +558,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + { + if (!macro->arg_count) + { +- preproc_push_expansion(ctx, ¯o->body, macro); ++ preproc_push_expansion(ctx, ¯o->body, macro, NULL); + } + else + { +@@ -616,16 +624,19 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + case STATE_IDENTIFIER: + if (token == '(') + { +- struct preproc_text *first_arg = &func_state->macro->arg_values[0]; +- unsigned int i; ++ struct preproc_text *arg_values; ++ ++ if (!(arg_values = calloc(func_state->macro->arg_count, sizeof(*arg_values)))) ++ return 0; ++ ++ for (unsigned int i = 0; i < func_state->macro->arg_count; ++i) ++ vkd3d_string_buffer_init(&arg_values[i].text); ++ arg_values[0].location = *lloc; + + func_state->arg_count = 0; + func_state->paren_depth = 1; + func_state->state = STATE_ARGS; +- for (i = 0; i < func_state->macro->arg_count; ++i) +- func_state->macro->arg_values[i].text.content_size = 0; +- +- first_arg->location = *lloc; ++ func_state->arg_values = arg_values; + } + else + { +@@ -649,7 +660,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + VKD3D_ASSERT(func_state->macro->arg_count); + + if (func_state->arg_count < func_state->macro->arg_count) +- current_arg = &func_state->macro->arg_values[func_state->arg_count]; ++ current_arg = &func_state->arg_values[func_state->arg_count]; + + switch (token) + { +@@ -664,7 +675,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + + if ((expansion = find_arg_expansion(ctx, text))) + { +- preproc_push_expansion(ctx, expansion, NULL); ++ preproc_push_expansion(ctx, expansion, NULL, NULL); + continue; + } + +@@ -700,7 +711,8 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + { + if (++func_state->arg_count == func_state->macro->arg_count) + { +- preproc_push_expansion(ctx, &func_state->macro->body, func_state->macro); ++ preproc_push_expansion(ctx, &func_state->macro->body, ++ func_state->macro, func_state->arg_values); + } + else + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y +index 366e351e3b5..c6be17bd230 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.y ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y +@@ -91,7 +91,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati + size_t arg_count, const struct vkd3d_shader_location *body_loc, struct vkd3d_string_buffer *body) + { + struct preproc_macro *macro; +- unsigned int i; + int ret; + + if ((macro = preproc_find_macro(ctx, name))) +@@ -108,14 +107,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati + macro->name = name; + macro->arg_names = arg_names; + macro->arg_count = arg_count; +- macro->arg_values = NULL; +- if (arg_count && !(macro->arg_values = vkd3d_calloc(arg_count, sizeof(*macro->arg_values)))) +- { +- vkd3d_free(macro); +- return false; +- } +- for (i = 0; i < arg_count; ++i) +- vkd3d_string_buffer_init(¯o->arg_values[i].text); + macro->body.text = *body; + macro->body.location = *body_loc; + ret = rb_put(&ctx->macros, name, ¯o->entry); +@@ -129,12 +120,8 @@ void preproc_free_macro(struct preproc_macro *macro) + + vkd3d_free(macro->name); + for (i = 0; i < macro->arg_count; ++i) +- { +- vkd3d_string_buffer_cleanup(¯o->arg_values[i].text); + vkd3d_free(macro->arg_names[i]); +- } + vkd3d_free(macro->arg_names); +- vkd3d_free(macro->arg_values); + vkd3d_string_buffer_cleanup(¯o->body.text); + vkd3d_free(macro); + } diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 49979ab2491..11c054a28f5 100644 +index 49979ab2491..6a28e2cd68e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -97,15 +97,37 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co @@ -12094,12 +16908,12 @@ index 49979ab2491..11c054a28f5 100644 vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream, builder->main_function_id, SpvExecutionModeInvocations, &builder->invocation_count, 1); - vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream); -- + - vkd3d_spirv_stream_append(&stream, &builder->debug_stream); - vkd3d_spirv_stream_append(&stream, &builder->annotation_stream); - vkd3d_spirv_stream_append(&stream, &builder->global_stream); - vkd3d_spirv_stream_append(&stream, &builder->function_stream); - +- - if (!(code = vkd3d_calloc(stream.word_count, sizeof(*code)))) + if (!vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->debug_stream) @@ -12122,7 +16936,73 @@ index 49979ab2491..11c054a28f5 100644 return true; } -@@ -3316,8 +3331,10 @@ static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_ +@@ -2647,8 +2662,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) + { + compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); +- compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count +- && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY; + + compiler->shader_interface = *shader_interface; + if (shader_interface->push_constant_buffer_count) +@@ -2675,6 +2688,11 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + } + } + ++ if (compiler->shader_type == VKD3D_SHADER_TYPE_VERTEX) ++ compiler->emit_point_size = true; ++ else if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) ++ compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count; ++ + compiler->scan_descriptor_info = scan_descriptor_info; + + compiler->phase = VKD3DSIH_INVALID; +@@ -3252,18 +3270,6 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder * + vkd3d_spirv_build_op_name(builder, id, "%s", debug_name); + } + +-static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, +- struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, +- enum vkd3d_shader_component_type component_type, unsigned int component_count) +-{ +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- uint32_t type_id, ptr_type_id; +- +- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); +- ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); +- return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); +-} +- + static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, + struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, + enum vkd3d_shader_component_type component_type, unsigned int component_count, +@@ -3273,10 +3279,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil + uint32_t type_id, length_id, ptr_type_id; + unsigned int i; + +- if (!length_count) +- return spirv_compiler_emit_variable(compiler, +- stream, storage_class, component_type, component_count); +- + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + for (i = 0; i < length_count; ++i) + { +@@ -3290,6 +3292,14 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil + return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); + } + ++static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, ++ struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, ++ enum vkd3d_shader_component_type component_type, unsigned int component_count) ++{ ++ return spirv_compiler_emit_array_variable(compiler, stream, storage_class, ++ component_type, component_count, NULL, 0); ++} ++ + static const struct vkd3d_spec_constant_info + { + enum vkd3d_shader_parameter_name name; +@@ -3316,8 +3326,10 @@ static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_ return NULL; } @@ -12134,7 +17014,7 @@ index 49979ab2491..11c054a28f5 100644 if (!compiler->current_spec_constant_id) { unsigned int i, id = 0; -@@ -3327,28 +3344,52 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com +@@ -3327,28 +3339,52 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) @@ -12194,7 +17074,7 @@ index 49979ab2491..11c054a28f5 100644 if (info) vkd3d_spirv_build_op_name(builder, id, "%s", info->debug_name); -@@ -3365,7 +3406,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile +@@ -3365,7 +3401,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile } static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, @@ -12204,7 +17084,7 @@ index 49979ab2491..11c054a28f5 100644 { unsigned int i; -@@ -3375,17 +3417,17 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler +@@ -3375,17 +3412,17 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler return compiler->spec_constants[i].id; } @@ -12225,7 +17105,7 @@ index 49979ab2491..11c054a28f5 100644 ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, compiler->spirv_parameter_info[index].buffer_id, -@@ -3393,48 +3435,49 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi +@@ -3393,48 +3430,49 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } @@ -12297,7 +17177,7 @@ index 49979ab2491..11c054a28f5 100644 } static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, -@@ -4210,7 +4253,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, +@@ -4210,7 +4248,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, else if (reg->type == VKD3DSPR_UNDEF) return spirv_compiler_emit_load_undef(compiler, reg, write_mask); else if (reg->type == VKD3DSPR_PARAMETER) @@ -12307,7 +17187,7 @@ index 49979ab2491..11c054a28f5 100644 component_count = vsir_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -4500,9 +4544,24 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, +@@ -4500,9 +4539,24 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, uint32_t val_id) { @@ -12334,7 +17214,66 @@ index 49979ab2491..11c054a28f5 100644 spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, val_id); } -@@ -6120,12 +6179,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, +@@ -4809,6 +4863,10 @@ static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = + { + VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup, + }; ++static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = ++{ ++ VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize, ++}; + static const struct + { + enum vkd3d_shader_register_type reg_type; +@@ -5398,7 +5456,11 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, + VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); + VKD3D_ASSERT(reg->idx_count < 2); + +- if (!(builtin = get_spirv_builtin_for_register(reg->type))) ++ if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) ++ { ++ builtin = &vkd3d_output_point_size_builtin; ++ } ++ else if (!(builtin = get_spirv_builtin_for_register(reg->type))) + { + FIXME("Unhandled register %#x.\n", reg->type); + return; +@@ -5451,7 +5513,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + const struct shader_signature *shader_signature; + const struct vkd3d_spirv_builtin *builtin; + enum vkd3d_shader_sysval_semantic sysval; +- uint32_t write_mask, reg_write_mask; ++ uint32_t write_mask; + bool use_private_variable = false; + struct vkd3d_symbol reg_symbol; + SpvStorageClass storage_class; +@@ -5502,7 +5564,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + use_private_variable = true; + } + +- reg_write_mask = write_mask >> component_idx; + vkd3d_symbol_make_io(®_symbol, reg_type, element_idx); + + if (rb_get(&compiler->symbol_table, ®_symbol)) +@@ -5580,7 +5641,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, + use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, +- use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); ++ use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; + VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); + +@@ -5591,7 +5652,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + if (use_private_variable) + { + compiler->private_output_variable[element_idx] = var_id; +- compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; ++ compiler->private_output_variable_write_mask[element_idx] |= write_mask >> component_idx; + if (!compiler->epilogue_function_id) + compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); + } +@@ -6120,12 +6181,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, @@ -12349,7 +17288,7 @@ index 49979ab2491..11c054a28f5 100644 uint32_t array_type_id, ptr_type_id, var_id; bool write_only = false, coherent = false; struct vkd3d_symbol symbol; -@@ -6135,12 +6194,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * +@@ -6135,12 +6196,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * resource_type, is_uav_counter, &binding_address); var_info->binding_base_idx = binding_address.binding_base_idx; @@ -12365,7 +17304,7 @@ index 49979ab2491..11c054a28f5 100644 } if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u -@@ -6194,11 +6252,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * +@@ -6194,11 +6254,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * } static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, @@ -12379,7 +17318,7 @@ index 49979ab2491..11c054a28f5 100644 struct vkd3d_push_constant_buffer_binding *push_cb; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_shader_register reg; -@@ -6206,7 +6265,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, +@@ -6206,7 +6267,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, unsigned int size; vsir_register_init(®, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 3); @@ -12388,7 +17327,7 @@ index 49979ab2491..11c054a28f5 100644 reg.idx[1].offset = range->first; reg.idx[2].offset = range->last; -@@ -6239,7 +6298,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, +@@ -6239,7 +6300,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, @@ -12397,7 +17336,7 @@ index 49979ab2491..11c054a28f5 100644 vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6275,7 +6334,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi +@@ -6275,7 +6336,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi } static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, @@ -12406,7 +17345,7 @@ index 49979ab2491..11c054a28f5 100644 { const SpvStorageClass storage_class = SpvStorageClassUniformConstant; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -@@ -6285,7 +6344,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi +@@ -6285,7 +6346,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi uint32_t type_id, var_id; vsir_register_init(®, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); @@ -12415,7 +17354,7 @@ index 49979ab2491..11c054a28f5 100644 vkd3d_symbol_make_sampler(®_symbol, ®); reg_symbol.info.sampler.range = *range; -@@ -6295,8 +6354,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi +@@ -6295,8 +6356,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi return; type_id = vkd3d_spirv_get_op_type_sampler(builder); @@ -12426,7 +17365,7 @@ index 49979ab2491..11c054a28f5 100644 vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6346,7 +6405,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty +@@ -6346,7 +6407,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, @@ -12435,7 +17374,7 @@ index 49979ab2491..11c054a28f5 100644 { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_descriptor_info1 *d; -@@ -6369,7 +6428,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler +@@ -6369,7 +6430,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1); return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim, @@ -12444,7 +17383,7 @@ index 49979ab2491..11c054a28f5 100644 reg->type == VKD3DSPR_UAV ? 2 : 1, format); } -@@ -6384,18 +6443,14 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi +@@ -6384,18 +6445,14 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi const struct vkd3d_shader_combined_resource_sampler *current; uint32_t image_type_id, type_id, ptr_type_id, var_id; enum vkd3d_shader_binding_flag resource_type_flag; @@ -12463,7 +17402,7 @@ index 49979ab2491..11c054a28f5 100644 current = &shader_interface->combined_samplers[i]; if (current->resource_space != resource_range->space || current->resource_index != resource_range->first) -@@ -6417,16 +6472,8 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi +@@ -6417,16 +6474,8 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi current->sampler_space, current->binding.count); } @@ -12481,7 +17420,7 @@ index 49979ab2491..11c054a28f5 100644 type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image_type_id); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); -@@ -6461,21 +6508,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi +@@ -6461,21 +6510,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi } static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, @@ -12510,7 +17449,7 @@ index 49979ab2491..11c054a28f5 100644 if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -@@ -6489,7 +6539,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6489,7 +6541,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp return; } @@ -12519,7 +17458,7 @@ index 49979ab2491..11c054a28f5 100644 if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) { -@@ -6517,19 +6567,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6517,19 +6569,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp else { type_id = spirv_compiler_get_image_type_id(compiler, ®, range, @@ -12543,7 +17482,7 @@ index 49979ab2491..11c054a28f5 100644 { if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -@@ -6543,7 +6589,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6543,7 +6591,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp compiler->use_invocation_interlock = true; } @@ -12552,7 +17491,7 @@ index 49979ab2491..11c054a28f5 100644 { VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ -@@ -6571,7 +6617,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6571,7 +6619,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, @@ -12561,7 +17500,49 @@ index 49979ab2491..11c054a28f5 100644 } } -@@ -8433,11 +8479,10 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, +@@ -6709,7 +6757,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi + + static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) + { +- static const struct vkd3d_spirv_builtin point_size = {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize}; ++ if (compiler->program->has_point_size) ++ return; + + /* Set the point size. Point sprites are not supported in d3d10+, but + * point primitives can still be used with e.g. stream output. Vulkan +@@ -6723,7 +6772,8 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) + || compiler->write_tess_geom_point_size) + { + vkd3d_spirv_build_op_store(&compiler->spirv_builder, +- spirv_compiler_emit_builtin_variable(compiler, &point_size, SpvStorageClassOutput, 0), ++ spirv_compiler_emit_builtin_variable(compiler, ++ &vkd3d_output_point_size_builtin, SpvStorageClassOutput, 0), + spirv_compiler_get_constant_float(compiler, 1.0f), SpvMemoryAccessMaskNone); + } + } +@@ -6845,10 +6895,9 @@ static void spirv_compiler_emit_tessellator_partitioning(struct spirv_compiler * + spirv_compiler_emit_execution_mode(compiler, mode, NULL, 0); + } + +-static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compiler, ++ const struct vsir_thread_group_size *group_size) + { +- const struct vkd3d_shader_thread_group_size *group_size = &instruction->declaration.thread_group_size; + const uint32_t local_size[] = {group_size->x, group_size->y, group_size->z}; + + spirv_compiler_emit_execution_mode(compiler, +@@ -7391,7 +7440,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + uint32_t components[VKD3D_VEC4_SIZE]; + + if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA +- || dst->modifiers || src->modifiers) ++ || src->reg.type == VKD3DSPR_PARAMETER || dst->modifiers || src->modifiers) + goto general_implementation; + + spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); +@@ -8433,11 +8482,10 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t sampler_var_id, sampler_id, sampled_image_type_id; const struct vkd3d_symbol *symbol = NULL; @@ -12574,7 +17555,7 @@ index 49979ab2491..11c054a28f5 100644 if (resource_reg->type == VKD3DSPR_RESOURCE) symbol = spirv_compiler_find_combined_sampler(compiler, resource_reg, sampler_reg); -@@ -8491,7 +8536,7 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, +@@ -8491,7 +8539,7 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, image->image_type_id = spirv_compiler_get_image_type_id(compiler, resource_reg, &symbol->info.resource.range, image->resource_type_info, @@ -12583,7 +17564,7 @@ index 49979ab2491..11c054a28f5 100644 if (sampled) { -@@ -9569,7 +9614,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co +@@ -9569,7 +9617,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co if (src->reg.type == VKD3DSPR_RASTERIZER) { val_id = spirv_compiler_emit_shader_parameter(compiler, @@ -12592,7 +17573,41 @@ index 49979ab2491..11c054a28f5 100644 } else { -@@ -10564,23 +10609,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c +@@ -10183,9 +10231,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + spirv_compiler_emit_tessellator_partitioning(compiler, + instruction->declaration.tessellator_partitioning); + break; +- case VKD3DSIH_DCL_THREAD_GROUP: +- spirv_compiler_emit_dcl_thread_group(compiler, instruction); +- break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +@@ -10506,7 +10551,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_UAV_RAW: + case VKD3DSIH_DCL_UAV_STRUCTURED: +- case VKD3DSIH_DCL_UAV_TYPED: + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_NOP: + /* nothing to do */ +@@ -10543,6 +10587,15 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) + else + spirv_compiler_emit_input(compiler, VKD3DSPR_PATCHCONST, i); + } ++ ++ if (compiler->program->has_point_size) ++ { ++ struct vkd3d_shader_dst_param dst; ++ ++ vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); ++ dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; ++ spirv_compiler_emit_output_register(compiler, &dst); ++ } + } + + static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) +@@ -10564,23 +10617,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c switch (descriptor->type) { case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: @@ -12619,7 +17634,7 @@ index 49979ab2491..11c054a28f5 100644 break; default: -@@ -10600,7 +10638,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10600,10 +10646,13 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct enum vkd3d_result result = VKD3D_OK; unsigned int i, max_element_count; @@ -12628,7 +17643,22 @@ index 49979ab2491..11c054a28f5 100644 compile_info, compiler->message_context)) < 0) return result; -@@ -10624,7 +10662,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct ++ VKD3D_ASSERT(program->normalised_io); ++ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); + if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) + return VKD3D_ERROR_OUT_OF_MEMORY; +@@ -10612,6 +10661,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + spirv_compiler_emit_temps(compiler, program->temp_count); + if (program->ssa_count) + spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); ++ if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) ++ spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); + + spirv_compiler_emit_descriptor_declarations(compiler); + +@@ -10624,7 +10675,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct { uint32_t type_id, struct_id, ptr_type_id, var_id; @@ -12640,7 +17670,7 @@ index 49979ab2491..11c054a28f5 100644 struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 84f641cc316..00a525c9ac3 100644 +index 84f641cc316..848e78a34d3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -23,6 +23,7 @@ @@ -12774,7 +17804,23 @@ index 84f641cc316..00a525c9ac3 100644 } ins->declaration.register_semantic.sysval_semantic = *tokens; } -@@ -1237,7 +1302,7 @@ static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, ui +@@ -1224,11 +1289,14 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio + } + + static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) + { ++ struct vsir_program *program = sm4->p.program; ++ + ins->declaration.thread_group_size.x = *tokens++; + ins->declaration.thread_group_size.y = *tokens++; + ins->declaration.thread_group_size.z = *tokens++; ++ program->thread_group_size = ins->declaration.thread_group_size; + } + + static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +@@ -1237,7 +1305,7 @@ static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, ui struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; const uint32_t *end = &tokens[token_count]; @@ -12783,7 +17829,7 @@ index 84f641cc316..00a525c9ac3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -@@ -1249,7 +1314,7 @@ static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction * +@@ -1249,7 +1317,7 @@ static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction * struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; const uint32_t *end = &tokens[token_count]; @@ -12792,7 +17838,7 @@ index 84f641cc316..00a525c9ac3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; resource->byte_stride = *tokens++; -@@ -1286,7 +1351,7 @@ static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruct +@@ -1286,7 +1354,7 @@ static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruct struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; const uint32_t *end = &tokens[token_count]; @@ -12801,7 +17847,7 @@ index 84f641cc316..00a525c9ac3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); resource->byte_stride = *tokens++; if (resource->byte_stride % 4) -@@ -1300,7 +1365,7 @@ static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *in +@@ -1300,7 +1368,7 @@ static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *in struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; const uint32_t *end = &tokens[token_count]; @@ -12810,25 +17856,31 @@ index 84f641cc316..00a525c9ac3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); } -@@ -1330,11 +1395,17 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = +@@ -1330,11 +1398,21 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, }; +-struct tpf_writer +struct sm4_stat -+{ + { + uint32_t fields[VKD3D_STAT_COUNT]; +}; + - struct tpf_writer - { ++struct tpf_compiler ++{ ++ /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ struct hlsl_ctx *ctx; - struct vkd3d_bytecode_buffer *buffer; +- struct vkd3d_bytecode_buffer *buffer; ++ struct vsir_program *program; struct vkd3d_sm4_lookup_tables lookup; + struct sm4_stat *stat; ++ ++ struct vkd3d_bytecode_buffer *buffer; ++ struct dxbc_writer dxbc; }; static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) -@@ -1400,8 +1471,8 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1400,8 +1478,8 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, @@ -12839,7 +17891,7 @@ index 84f641cc316..00a525c9ac3 100644 {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, {VKD3D_SM4_OP_LT, VKD3DSIH_LTO, "u", "ff"}, -@@ -1417,7 +1488,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1417,7 +1495,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, @@ -12848,7 +17900,7 @@ index 84f641cc316..00a525c9ac3 100644 {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", shader_sm4_read_conditional_op}, -@@ -1426,12 +1497,12 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1426,12 +1504,12 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, @@ -12867,7 +17919,7 @@ index 84f641cc316..00a525c9ac3 100644 {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, -@@ -1480,10 +1551,10 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1480,10 +1558,10 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) shader_sm4_read_dcl_indexable_temp}, {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", shader_sm4_read_dcl_global_flags}, @@ -12882,7 +17934,7 @@ index 84f641cc316..00a525c9ac3 100644 {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, -@@ -1492,14 +1563,14 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1492,14 +1570,14 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", shader_sm5_read_fcall}, @@ -12901,7 +17953,7 @@ index 84f641cc316..00a525c9ac3 100644 {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, -@@ -1551,33 +1622,33 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1551,33 +1629,33 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) shader_sm5_read_dcl_resource_raw}, {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", shader_sm5_read_dcl_resource_structured}, @@ -12962,7 +18014,7 @@ index 84f641cc316..00a525c9ac3 100644 {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", shader_sm5_read_sync}, {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, -@@ -1604,21 +1675,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1604,21 +1682,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, @@ -12997,7 +18049,7 @@ index 84f641cc316..00a525c9ac3 100644 {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, }; -@@ -1662,6 +1733,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1662,6 +1740,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4}, }; @@ -13159,11 +18211,17 @@ index 84f641cc316..00a525c9ac3 100644 memset(lookup, 0, sizeof(*lookup)); for (i = 0; i < ARRAY_SIZE(opcode_table); ++i) -@@ -1678,12 +1904,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1678,13 +1911,13 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) lookup->register_type_info_from_sm4[info->sm4_type] = info; lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; } -+ +-} + +-static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +-{ +- tpf->ctx = ctx; +- tpf->buffer = buffer; +- init_sm4_lookup_tables(&tpf->lookup); + for (i = 0; i < ARRAY_SIZE(stat_field_table); ++i) + { + const struct vkd3d_sm4_stat_field_info *info = &stat_field_table[i]; @@ -13172,17 +18230,8 @@ index 84f641cc316..00a525c9ac3 100644 + } } --static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct sm4_stat *stat, -+ struct vkd3d_bytecode_buffer *buffer) - { - tpf->ctx = ctx; - tpf->buffer = buffer; -+ tpf->stat = stat; - init_sm4_lookup_tables(&tpf->lookup); - } - -@@ -1721,6 +1956,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( + static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( +@@ -1721,6 +1954,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( return register_type_info->default_src_swizzle_type; } @@ -13199,7 +18248,7 @@ index 84f641cc316..00a525c9ac3 100644 static enum vkd3d_data_type map_data_type(char t) { switch (t) -@@ -1735,12 +1980,8 @@ static enum vkd3d_data_type map_data_type(char t) +@@ -1735,12 +1978,8 @@ static enum vkd3d_data_type map_data_type(char t) return VKD3D_DATA_UINT; case 'O': return VKD3D_DATA_OPAQUE; @@ -13214,16 +18263,39 @@ index 84f641cc316..00a525c9ac3 100644 default: ERR("Invalid data type '%c'.\n", t); return VKD3D_DATA_FLOAT; -@@ -2553,7 +2794,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro +@@ -2553,7 +2792,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro version.minor = VKD3D_SM4_VERSION_MINOR(version_token); /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) -+ if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20, VSIR_CF_STRUCTURED)) ++ if (!vsir_program_init(program, compile_info, ++ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, false)) return false; vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; -@@ -2706,9 +2947,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con +@@ -2670,6 +2910,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) + uninvert_used_masks(&program->patch_constant_signature); + ++ switch (program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ break; ++ ++ default: ++ if (program->patch_constant_signature.element_count != 0) ++ { ++ WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); ++ shader_signature_cleanup(&program->patch_constant_signature); ++ } ++ break; ++ } ++ + if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, + sm4.input_register_masks, "Input") + || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, +@@ -2706,9 +2961,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con && !sm4.has_control_point_phase && !sm4.p.failed) shader_sm4_validate_default_phase_index_ranges(&sm4); @@ -13233,7 +18305,7 @@ index 84f641cc316..00a525c9ac3 100644 if (sm4.p.failed) { WARN("Failed to parse shader.\n"); -@@ -2716,6 +2954,17 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con +@@ -2716,10 +2968,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_ERROR_INVALID_SHADER; } @@ -13242,7 +18314,7 @@ index 84f641cc316..00a525c9ac3 100644 + WARN("Failed to validate shader after parsing, ret %d.\n", ret); + + if (TRACE_ON()) -+ vkd3d_shader_trace(program); ++ vsir_program_trace(program); + + vsir_program_cleanup(program); + return ret; @@ -13251,18 +18323,105 @@ index 84f641cc316..00a525c9ac3 100644 return VKD3D_OK; } -@@ -2782,8 +3031,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem +-static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); ++static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); + + static bool type_is_integer(const struct hlsl_type *type) + { +@@ -2735,8 +2998,8 @@ static bool type_is_integer(const struct hlsl_type *type) + } + } + +-bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, enum vkd3d_shader_register_type *type, bool *has_idx) ++bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, ++ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) + { + unsigned int i; + +@@ -2756,6 +3019,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, + ++ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, ++ + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ +@@ -2768,9 +3034,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { +- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) ++ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) + && output == register_table[i].output +- && ctx->profile->type == register_table[i].shader_type) ++ && version->type == register_table[i].shader_type) + { + if (type) + *type = register_table[i].type; +@@ -2782,8 +3048,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem return false; } -bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3D_NAME *usage) -+bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, -+ struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output) ++static bool get_tessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, ++ enum vkd3d_tessellator_domain domain, uint32_t index) ++{ ++ switch (domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ if (index == 0) ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; ++ else if (index == 1) ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; ++ else ++ return false; ++ return true; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; ++ return index < 3; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; ++ return index < 4; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, ++ enum vkd3d_tessellator_domain domain, uint32_t index) ++{ ++ switch (domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ return false; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; ++ return index == 0; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; ++ return index < 2; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, ++ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, ++ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func) { unsigned int i; -@@ -2792,7 +3041,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant +@@ -2792,7 +3107,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant const char *name; bool output; enum vkd3d_shader_type shader_type; @@ -13271,7 +18430,7 @@ index 84f641cc316..00a525c9ac3 100644 } semantics[] = { -@@ -2800,46 +3049,47 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant +@@ -2800,46 +3115,79 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, @@ -13312,6 +18471,12 @@ index 84f641cc316..00a525c9ac3 100644 + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + ++ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_HULL, ~0u}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, ++ ++ {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, ++ + {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, @@ -13336,127 +18501,191 @@ index 84f641cc316..00a525c9ac3 100644 + {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, }; - bool needs_compat_mapping = ascii_strncasecmp(semantic->name, "sv_", 3); -+ bool needs_compat_mapping = ascii_strncasecmp(hlsl_semantic->name, "sv_", 3); ++ bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3); ++ ++ if (is_patch_constant_func) ++ { ++ if (output) ++ { ++ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) ++ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) ++ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ if (!ascii_strcasecmp(semantic_name, "sv_position")) ++ { ++ *sysval_semantic = VKD3D_SHADER_SV_NONE; ++ return true; ++ } ++ } ++ else ++ { ++ if (!ascii_strcasecmp(semantic_name, "sv_primitiveid") ++ || !ascii_strcasecmp(semantic_name, "sv_position")) ++ { ++ *sysval_semantic = ~0u; ++ return true; ++ } ++ return false; ++ } ++ } for (i = 0; i < ARRAY_SIZE(semantics); ++i) { - if (!ascii_strcasecmp(semantic->name, semantics[i].name) -+ if (!ascii_strcasecmp(hlsl_semantic->name, semantics[i].name) ++ if (!ascii_strcasecmp(semantic_name, semantics[i].name) && output == semantics[i].output - && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) - && ctx->profile->type == semantics[i].shader_type) +- && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) +- && ctx->profile->type == semantics[i].shader_type) ++ && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) ++ && version->type == semantics[i].shader_type) { - *usage = semantics[i].usage; -+ *semantic = semantics[i].semantic; ++ *sysval_semantic = semantics[i].semantic; return true; } } -@@ -2847,7 +3097,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant +@@ -2847,7 +3195,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant if (!needs_compat_mapping) return false; - *usage = D3D_NAME_UNDEFINED; -+ *semantic = VKD3D_SHADER_SV_NONE; ++ *sysval_semantic = VKD3D_SHADER_SV_NONE; return true; } -@@ -2880,16 +3130,16 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +@@ -2865,110 +3213,46 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + ctx->result = buffer->status; + } + +-static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) ++static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) + { ++ bool output = tag == TAG_OSGN || tag == TAG_PCSG; + struct vkd3d_bytecode_buffer buffer = {0}; +- struct vkd3d_string_buffer *string; +- const struct hlsl_ir_var *var; +- size_t count_position; + unsigned int i; +- bool ret; + +- count_position = put_u32(&buffer, 0); ++ put_u32(&buffer, signature->element_count); + put_u32(&buffer, 8); /* unknown */ + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ for (i = 0; i < signature->element_count; ++i) { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -+ enum vkd3d_shader_sysval_semantic semantic; - uint32_t usage_idx, reg_idx; +- unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; +- uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; +- bool has_idx; ++ const struct signature_element *element = &signature->elements[i]; ++ enum vkd3d_shader_sysval_semantic sysval; ++ uint32_t used_mask = element->used_mask; +- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) +- continue; +- - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -+ ret = sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); - VKD3D_ASSERT(ret); +- VKD3D_ASSERT(ret); - if (usage == ~0u) -+ if (semantic == ~0u) - continue; - usage_idx = var->semantic.index; +- continue; +- usage_idx = var->semantic.index; +- +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx)) +- { +- reg_idx = has_idx ? var->semantic.index : ~0u; +- } +- else +- { +- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); +- reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; +- } +- +- use_mask = width; /* FIXME: accurately report use mask */ + if (output) +- use_mask = 0xf ^ use_mask; ++ used_mask = 0xf ^ used_mask; -@@ -2908,26 +3158,26 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - use_mask = 0xf ^ use_mask; - - /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ +- /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ - if (usage >= 64) - usage = 0; -+ if (semantic >= VKD3D_SHADER_SV_TARGET) -+ semantic = VKD3D_SHADER_SV_NONE; ++ sysval = element->sysval_semantic; ++ if (sysval >= VKD3D_SHADER_SV_TARGET) ++ sysval = VKD3D_SHADER_SV_NONE; put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); +- put_u32(&buffer, usage_idx); - put_u32(&buffer, usage); -+ put_u32(&buffer, semantic); - switch (var->data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: +- switch (var->data_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: - put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32); -+ put_u32(&buffer, VKD3D_SHADER_COMPONENT_FLOAT); - break; - - case HLSL_TYPE_INT: +- break; +- +- case HLSL_TYPE_INT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32); -+ put_u32(&buffer, VKD3D_SHADER_COMPONENT_INT); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: - put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32); -+ put_u32(&buffer, VKD3D_SHADER_COMPONENT_UINT); - break; - - default: -@@ -2935,7 +3185,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid data type %s for semantic variable %s.", string->buffer, var->name); - hlsl_release_string_buffer(ctx, string); +- break; +- +- default: +- if ((string = hlsl_type_to_string(ctx, var->data_type))) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid data type %s for semantic variable %s.", string->buffer, var->name); +- hlsl_release_string_buffer(ctx, string); - put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN); -+ put_u32(&buffer, VKD3D_SHADER_COMPONENT_VOID); - } - put_u32(&buffer, reg_idx); - put_u32(&buffer, vkd3d_make_u16(width, use_mask)); -@@ -2944,25 +3194,25 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - i = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -- const char *semantic = var->semantic.name; -+ enum vkd3d_shader_sysval_semantic semantic; -+ const char *name = var->semantic.name; - size_t string_offset; -- D3D_NAME usage; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - -- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- if (usage == ~0u) -+ sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); -+ if (semantic == ~0u) - continue; - -- if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) -+ if (semantic == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) - string_offset = put_string(&buffer, "SV_Target"); -- else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) -+ else if (semantic == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) - string_offset = put_string(&buffer, "SV_Depth"); -- else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) -+ else if (semantic == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) - string_offset = put_string(&buffer, "SV_Position"); - else -- string_offset = put_string(&buffer, semantic); -+ string_offset = put_string(&buffer, name); - set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); +- } +- put_u32(&buffer, reg_idx); +- put_u32(&buffer, vkd3d_make_u16(width, use_mask)); ++ put_u32(&buffer, element->semantic_index); ++ put_u32(&buffer, sysval); ++ put_u32(&buffer, element->component_type); ++ put_u32(&buffer, element->register_index); ++ put_u32(&buffer, vkd3d_make_u16(element->mask, used_mask)); } -@@ -2990,6 +3240,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +- i = 0; +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ for (i = 0; i < signature->element_count; ++i) + { +- const char *semantic = var->semantic.name; ++ const struct signature_element *element = &signature->elements[i]; + size_t string_offset; +- D3D_NAME usage; +- +- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) +- continue; +- +- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); +- if (usage == ~0u) +- continue; + +- if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) +- string_offset = put_string(&buffer, "SV_Target"); +- else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) +- string_offset = put_string(&buffer, "SV_Depth"); +- else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) +- string_offset = put_string(&buffer, "SV_Position"); +- else +- string_offset = put_string(&buffer, semantic); +- set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); ++ string_offset = put_string(&buffer, element->semantic_name); ++ set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); + } + +- set_u32(&buffer, count_position, i); +- +- add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); ++ add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); + } + + static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +@@ -2990,6 +3274,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -13464,7 +18693,7 @@ index 84f641cc316..00a525c9ac3 100644 case HLSL_CLASS_STRUCT: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: -@@ -3123,24 +3374,24 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +@@ -3123,24 +3408,24 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) vkd3d_unreachable(); } @@ -13494,7 +18723,7 @@ index 84f641cc316..00a525c9ac3 100644 default: vkd3d_unreachable(); -@@ -3398,6 +3649,48 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un +@@ -3398,6 +3683,48 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un return extern_resources; } @@ -13543,7 +18772,7 @@ index 84f641cc316..00a525c9ac3 100644 static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); -@@ -3471,7 +3764,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3471,7 +3798,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { unsigned int dimx = resource->component_type->e.resource.format->dimx; @@ -13552,7 +18781,16 @@ index 84f641cc316..00a525c9ac3 100644 put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; -@@ -3598,7 +3891,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3552,7 +3879,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + { + uint32_t flags = 0; + +- if (var->last_read) ++ if (var->is_read) + flags |= D3D_SVF_USED; + + put_u32(&buffer, 0); /* name */ +@@ -3598,7 +3925,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) for (k = 0; k < comp_count; ++k) { struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); @@ -13561,7 +18799,7 @@ index 84f641cc316..00a525c9ac3 100644 enum hlsl_regset regset; if (comp_type->class == HLSL_CLASS_STRING) -@@ -3608,7 +3901,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3608,7 +3935,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) continue; } @@ -13571,13 +18809,166 @@ index 84f641cc316..00a525c9ac3 100644 if (regset == HLSL_REGSET_NUMERIC) { if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -@@ -4182,10 +4476,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk +@@ -3779,11 +4107,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + } + +-static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, ++static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, + uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) + { +- const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; ++ const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); + const struct hlsl_ir_var *var = deref->var; ++ struct hlsl_ctx *ctx = tpf->ctx; + + if (var->is_uniform) + { +@@ -3793,7 +4123,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ +@@ -3812,7 +4142,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ +@@ -3831,7 +4161,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ +@@ -3853,7 +4183,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ +@@ -3873,7 +4203,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + bool has_idx; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, &has_idx)) ++ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +@@ -3883,7 +4213,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + reg->idx_count = 1; + } + +- reg->dimension = VSIR_DIMENSION_VEC4; ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else +@@ -3902,7 +4235,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + bool has_idx; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, &has_idx)) ++ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +@@ -3912,7 +4245,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + reg->idx_count = 1; + } + +- if (reg->type == VKD3DSPR_DEPTHOUT) ++ if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; +@@ -3938,13 +4271,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re } } -+static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct sm4_instruction *instr) +-static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, ++static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, + const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) + { + unsigned int hlsl_swizzle; + uint32_t writemask; + +- sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); ++ sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); + if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) + { + hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +@@ -3982,7 +4315,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, + } + } + +-static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, ++static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, + const struct hlsl_ir_node *instr, uint32_t map_writemask) + { + unsigned int hlsl_swizzle; +@@ -4018,7 +4351,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ + return 0; + } + +-static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, ++static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, + enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) + { + const struct vkd3d_sm4_register_type_info *register_type_info; +@@ -4078,7 +4411,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v + return token; + } + +-static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, ++static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, + unsigned int j) + { + unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); +@@ -4108,7 +4441,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct + } + } + +-static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) ++static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) + { + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0; +@@ -4121,7 +4454,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk + sm4_write_register_index(tpf, &dst->reg, j); + } + +-static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) ++static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) + { + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0, mod_token = 0; +@@ -4182,10 +4515,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk + } + } + +-static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) ++static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) +{ -+ enum vkd3d_shader_type shader_type = tpf->ctx->profile->type; ++ enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; + enum vkd3d_sm4_stat_field stat_field; + uint32_t opcode; + @@ -13620,7 +19011,7 @@ index 84f641cc316..00a525c9ac3 100644 + } +} + - static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) ++static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) { - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = instr->opcode | instr->extra_bits; @@ -13628,7 +19019,7 @@ index 84f641cc316..00a525c9ac3 100644 unsigned int size, i, j; size_t token_position; -@@ -4218,6 +4557,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4 +@@ -4218,6 +4596,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4 size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t); token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); set_u32(buffer, token_position, token); @@ -13637,7 +19028,37 @@ index 84f641cc316..00a525c9ac3 100644 } static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, -@@ -4348,7 +4689,7 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex +@@ -4247,7 +4627,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + return true; + } + +-static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) ++static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) + { + size_t size = (cbuffer->used_size + 3) / 4; + +@@ -4282,7 +4662,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) ++static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) + { + unsigned int i; + struct sm4_instruction instr = +@@ -4323,9 +4703,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex + } + } + +-static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, ++static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, + bool uav) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + struct hlsl_type *component_type; + struct sm4_instruction instr; +@@ -4348,21 +4729,21 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex .dsts[0].reg.idx_count = 1, .dst_count = 1, @@ -13646,23 +19067,62 @@ index 84f641cc316..00a525c9ac3 100644 .idx_count = 1, }; -@@ -4412,7 +4753,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + +- if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) ++ if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) + { + hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Multisampled texture object declaration needs sample count for profile %s.", +- tpf->ctx->profile->name); ++ "Multisampled texture object declaration needs sample count for profile %u.%u.", ++ version->major, version->minor); + } + +- if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + VKD3D_ASSERT(!i); + instr.dsts[0].reg.idx[0].offset = resource->id; +@@ -4408,11 +4789,12 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + } + } + +-static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) ++static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, ++ const struct hlsl_ir_var *var, bool is_patch_constant_func) { - const struct hlsl_profile_info *profile = tpf->ctx->profile; +- const struct hlsl_profile_info *profile = tpf->ctx->profile; ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; const bool output = var->is_output_semantic; - D3D_NAME usage; + enum vkd3d_shader_sysval_semantic semantic; bool has_idx; struct sm4_instruction instr = -@@ -4445,22 +4786,23 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) +@@ -4421,7 +4803,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + .dst_count = 1, + }; + +- if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx)) ++ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) + { + if (has_idx) + { +@@ -4442,36 +4824,39 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + +- if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) ++ if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; - hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; -+ sysval_semantic_from_hlsl(&semantic, tpf->ctx, &var->semantic, output); ++ sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, ++ tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + if (semantic == ~0u) + semantic = VKD3D_SHADER_SV_NONE; @@ -13672,34 +19132,50 @@ index 84f641cc316..00a525c9ac3 100644 + switch (semantic) { - case D3D_NAME_UNDEFINED: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + case VKD3D_SHADER_SV_NONE: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; break; - case D3D_NAME_INSTANCE_ID: - case D3D_NAME_PRIMITIVE_ID: - case D3D_NAME_VERTEX_ID: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + case VKD3D_SHADER_SV_INSTANCE_ID: ++ case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_PRIMITIVE_ID: -+ case VKD3D_SHADER_SV_VERTEX_ID: + case VKD3D_SHADER_SV_SAMPLE_INDEX: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ case VKD3D_SHADER_SV_VERTEX_ID: ++ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; break; -@@ -4510,25 +4852,25 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + + default: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; + break; + } + +- if (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL) + { + enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; + +@@ -4510,32 +4895,32 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl } else { - if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (semantic == VKD3D_SHADER_SV_NONE || profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; else instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; } - switch (usage) -+ switch (semantic) ++ if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) { - case D3D_NAME_COVERAGE: - case D3D_NAME_DEPTH: @@ -13707,26 +19183,58 @@ index 84f641cc316..00a525c9ac3 100644 - case D3D_NAME_DEPTH_LESS_EQUAL: - case D3D_NAME_TARGET: - case D3D_NAME_UNDEFINED: -+ case VKD3D_SHADER_SV_COVERAGE: -+ case VKD3D_SHADER_SV_DEPTH: -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: -+ case VKD3D_SHADER_SV_TARGET: -+ case VKD3D_SHADER_SV_NONE: - break; - - default: - instr.idx_count = 1; +- break; +- +- default: +- instr.idx_count = 1; - instr.idx[0] = usage; -+ instr.idx[0] = semantic; - break; +- break; ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET ++ || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); ++ } ++ else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) ++ { ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); ++ } ++ else ++ { ++ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); ++ instr.idx_count = 1; ++ instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); } -@@ -4577,6 +4919,17 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 write_sm4_instruction(tpf, &instr); } -+static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t flags) +-static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) ++static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) + { + struct sm4_instruction instr = + { +@@ -4548,7 +4933,7 @@ static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_coun + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, ++static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, + uint32_t size, uint32_t comp_count) + { + struct sm4_instruction instr = +@@ -4562,7 +4947,7 @@ static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) ++static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) + { + struct sm4_instruction instr = + { +@@ -4577,7 +4962,105 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ret(const struct tpf_writer *tpf) ++static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) +{ + struct sm4_instruction instr = + { @@ -13737,10 +19245,279 @@ index 84f641cc316..00a525c9ac3 100644 + write_sm4_instruction(tpf, &instr); +} + - static void write_sm4_ret(const struct tpf_writer *tpf) ++static void tpf_write_hs_decls(const struct tpf_compiler *tpf) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_HS_DECLS, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, ++ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, ++ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, ++ .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, ++ enum vkd3d_shader_tessellator_partitioning partitioning) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, ++ .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, ++ enum vkd3d_shader_tessellator_output_primitive output_primitive) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, ++ .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_ret(const struct tpf_compiler *tpf) { struct sm4_instruction instr = -@@ -5578,6 +5931,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { +@@ -4587,7 +5070,7 @@ static void write_sm4_ret(const struct tpf_writer *tpf) + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) + { + struct sm4_instruction instr; +@@ -4605,7 +5088,7 @@ static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opco + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) + { + struct sm4_instruction instr; +@@ -4626,7 +5109,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -4645,7 +5128,7 @@ static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opc + } + + /* dp# instructions don't map the swizzle. */ +-static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -4663,7 +5146,7 @@ static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4 + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, ++static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { +@@ -4686,7 +5169,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, + const struct hlsl_ir_node *src3) + { +@@ -4706,7 +5189,7 @@ static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_op + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) +@@ -4715,6 +5198,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + struct sm4_instruction instr; + +@@ -4769,7 +5253,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + reg->dimension = VSIR_DIMENSION_SCALAR; + reg->u.immconst_u32[0] = index->value.u[0].u; + } +- else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) ++ else if (version->major == 4 && version->minor == 0) + { + hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } +@@ -4784,7 +5268,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; +@@ -4864,7 +5348,7 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; +@@ -4886,7 +5370,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; +@@ -4921,7 +5405,7 @@ static bool type_is_float(const struct hlsl_type *type) + return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; + } + +-static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, ++static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) + { + struct sm4_instruction instr; +@@ -4941,7 +5425,7 @@ static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) ++static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) + { + static const union + { +@@ -5050,7 +5534,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex + } + } + +-static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, ++static void write_sm4_store_uav_typed(const struct tpf_compiler *tpf, const struct hlsl_deref *dst, + const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) + { + struct sm4_instruction instr; +@@ -5058,7 +5542,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + +- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); ++ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); + instr.dst_count = 1; + + sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +@@ -5068,7 +5552,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) ++static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) + { + struct sm4_instruction instr; + +@@ -5087,8 +5571,9 @@ static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, cons + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) ++static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; + const struct hlsl_ir_node *arg3 = expr->operands[2].node; +@@ -5103,7 +5588,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + switch (expr->op) + { + case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: +- if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) + write_sm4_rasterizer_sample_count(tpf, &expr->node); + else + hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, +@@ -5224,7 +5709,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + case HLSL_TYPE_FLOAT: + /* SM5 comes with a RCP opcode */ +- if (tpf->ctx->profile->major_version >= 5) ++ if (vkd3d_shader_ver_ge(version, 5, 0)) + { + write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); + } +@@ -5578,6 +6063,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); break; @@ -13764,50 +19541,346 @@ index 84f641cc316..00a525c9ac3 100644 default: hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); } -@@ -5998,8 +6368,8 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc +@@ -5585,7 +6087,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + hlsl_release_string_buffer(tpf->ctx, dst_type_string); + } + +-static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) ++static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) + { + struct sm4_instruction instr = + { +@@ -5614,7 +6116,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) ++static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) + { + struct sm4_instruction instr = {0}; + +@@ -5653,16 +6155,17 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju + /* Does this variable's data come directly from the API user, rather than being + * temporary or from a previous shader stage? + * I.e. is it a uniform or VS input? */ +-static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) ++static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) + { + if (var->is_uniform) + return true; + +- return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; ++ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; + } + +-static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) ++static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; + +@@ -5672,7 +6175,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo + instr.dst_count = 1; + + VKD3D_ASSERT(hlsl_is_numeric_type(type)); +- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) ++ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) + { + struct hlsl_constant_value value; + +@@ -5700,7 +6203,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) ++static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) + { + struct sm4_instruction instr = + { +@@ -5715,10 +6218,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + struct vkd3d_shader_src_param *src; + struct sm4_instruction instr; + +@@ -5735,7 +6239,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- if (tpf->ctx->profile->major_version < 5) ++ if (!vkd3d_shader_ver_ge(version, 5, 0)) + { + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); +@@ -5756,7 +6260,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; +@@ -5825,7 +6329,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h + } + } + +-static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) ++static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) + { + struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); + +@@ -5844,7 +6348,7 @@ static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct + write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); + } + +-static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) ++static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) + { + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; +@@ -5853,7 +6357,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + +- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); ++ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); + instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + +@@ -5863,7 +6367,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s) ++static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) + { + const struct hlsl_ir_node *selector = s->selector.node; + struct hlsl_ir_switch_case *c; +@@ -5903,7 +6407,7 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) ++static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) + { + unsigned int hlsl_swizzle; + struct sm4_instruction instr; +@@ -5924,7 +6428,7 @@ static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) ++static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) + { + const struct hlsl_ir_node *instr; + +@@ -5998,18 +6502,65 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc } } -static void write_sm4_shdr(struct hlsl_ctx *ctx, - const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) -+static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, -+ struct sm4_stat *stat, struct dxbc_writer *dxbc) ++static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) { - const struct hlsl_profile_info *profile = ctx->profile; +- const struct hlsl_profile_info *profile = ctx->profile; ++ struct hlsl_ctx *ctx = tpf->ctx; ++ const struct hlsl_scope *scope; ++ const struct hlsl_ir_var *var; ++ uint32_t temp_count; ++ ++ compute_liveness(ctx, func); ++ mark_indexable_vars(ctx, func); ++ temp_count = allocate_temp_registers(ctx, func); ++ if (ctx->result) ++ return; ++ ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if ((var->is_input_semantic && var->last_read) ++ || (var->is_output_semantic && var->first_write)) ++ tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); ++ } ++ ++ if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) ++ write_sm4_dcl_thread_group(tpf, ctx->thread_count); ++ ++ if (temp_count) ++ write_sm4_dcl_temps(tpf, temp_count); ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) ++ continue; ++ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) ++ continue; ++ ++ if (var->indexable) ++ { ++ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; ++ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; ++ ++ write_sm4_dcl_indexable_temp(tpf, id, size, 4); ++ } ++ } ++ } ++ ++ write_sm4_block(tpf, &func->body); ++ ++ write_sm4_ret(tpf); ++} ++ ++static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) ++{ ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; struct vkd3d_bytecode_buffer buffer = {0}; -@@ -6024,7 +6394,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + struct extern_resource *extern_resources; + unsigned int extern_resources_count, i; + const struct hlsl_buffer *cbuffer; +- const struct hlsl_scope *scope; +- const struct hlsl_ir_var *var; ++ struct hlsl_ctx *ctx = tpf->ctx; + size_t token_count_position; +- struct tpf_writer tpf; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { +@@ -6024,17 +6575,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, VKD3D_SM4_LIB, }; - tpf_writer_init(&tpf, ctx, &buffer); -+ tpf_writer_init(&tpf, ctx, stat, &buffer); ++ tpf->buffer = &buffer; extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -@@ -6049,6 +6419,9 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - write_sm4_dcl_textures(&tpf, resource, true); +- put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); ++ put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); + token_count_position = put_u32(&buffer, 0); + ++ if (version->type == VKD3D_SHADER_TYPE_HULL) ++ { ++ tpf_write_hs_decls(tpf); ++ ++ tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ ++ tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); ++ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); ++ tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); ++ tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); ++ } ++ + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- write_sm4_dcl_constant_buffer(&tpf, cbuffer); ++ write_sm4_dcl_constant_buffer(tpf, cbuffer); } -+ if (entry_func->early_depth_test && profile->major_version >= 5) -+ write_sm4_dcl_global_flags(&tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); -+ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (i = 0; i < extern_resources_count; ++i) +@@ -6042,59 +6604,40 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct extern_resource *resource = &extern_resources[i]; + + if (resource->regset == HLSL_REGSET_SAMPLERS) +- write_sm4_dcl_samplers(&tpf, resource); ++ write_sm4_dcl_samplers(tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) +- write_sm4_dcl_textures(&tpf, resource, false); ++ write_sm4_dcl_textures(tpf, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) +- write_sm4_dcl_textures(&tpf, resource, true); ++ write_sm4_dcl_textures(tpf, resource, true); + } + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) +- write_sm4_dcl_semantic(&tpf, var); +- } ++ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) ++ write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); + +- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) +- write_sm4_dcl_thread_group(&tpf, ctx->thread_count); ++ if (version->type == VKD3D_SHADER_TYPE_HULL) ++ tpf_write_hs_control_point_phase(tpf); + +- if (ctx->temp_count) +- write_sm4_dcl_temps(&tpf, ctx->temp_count); ++ tpf_write_shader_function(tpf, entry_func); + +- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ if (version->type == VKD3D_SHADER_TYPE_HULL) { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -@@ -6110,14 +6483,64 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) +- { +- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) +- continue; +- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) +- continue; +- +- if (var->indexable) +- { +- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; +- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; +- +- write_sm4_dcl_indexable_temp(&tpf, id, size, 4); +- } +- } ++ tpf_write_hs_fork_phase(tpf); ++ tpf_write_shader_function(tpf, ctx->patch_constant_func); + } + +- write_sm4_block(&tpf, &entry_func->body); +- +- write_sm4_ret(&tpf); +- + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + +- add_section(ctx, dxbc, TAG_SHDR, &buffer); ++ add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); ++ tpf->buffer = NULL; + + sm4_free_extern_resources(extern_resources, extern_resources_count); + } + +-static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) ++static void tpf_write_sfi0(struct tpf_compiler *tpf) + { + struct extern_resource *extern_resources; + unsigned int extern_resources_count; ++ struct hlsl_ctx *ctx = tpf->ctx; + uint64_t *flags; + + flags = vkd3d_calloc(1, sizeof(*flags)); +@@ -6110,29 +6653,94 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ - if (flags) +- dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); + if (*flags) - dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); ++ dxbc_writer_add_section(&tpf->dxbc, TAG_SFI0, flags, sizeof(*flags)); else vkd3d_free(flags); } -+static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, struct dxbc_writer *dxbc) -+{ +-int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) ++static void tpf_write_stat(struct tpf_compiler *tpf) + { +- struct dxbc_writer dxbc; + struct vkd3d_bytecode_buffer buffer = {0}; ++ const struct sm4_stat *stat = tpf->stat; ++ struct hlsl_ctx *ctx = tpf->ctx; + + put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); @@ -13851,28 +19924,55 @@ index 84f641cc316..00a525c9ac3 100644 + put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); + } + -+ add_section(ctx, dxbc, TAG_STAT, &buffer); ++ add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); +} + - int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) - { ++/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving ++ * data from the other parameters instead, so they can be removed from the ++ * arguments and this function can be independent of HLSL structs. */ ++int tpf_compile(struct vsir_program *program, uint64_t config_flags, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++{ ++ struct tpf_compiler tpf = {0}; + struct sm4_stat stat = {0}; - struct dxbc_writer dxbc; size_t i; int ret; -@@ -6127,8 +6550,9 @@ int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun - write_sm4_signature(ctx, &dxbc, false); - write_sm4_signature(ctx, &dxbc, true); - write_sm4_rdef(ctx, &dxbc); + +- dxbc_writer_init(&dxbc); +- +- write_sm4_signature(ctx, &dxbc, false); +- write_sm4_signature(ctx, &dxbc, true); +- write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &dxbc); -+ write_sm4_shdr(ctx, entry_func, &stat, &dxbc); - write_sm4_sfi0(ctx, &dxbc); -+ write_sm4_stat(ctx, &stat, &dxbc); +- write_sm4_sfi0(ctx, &dxbc); ++ tpf.ctx = ctx; ++ tpf.program = program; ++ tpf.buffer = NULL; ++ tpf.stat = &stat; ++ init_sm4_lookup_tables(&tpf.lookup); ++ dxbc_writer_init(&tpf.dxbc); ++ ++ tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); ++ tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) ++ tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); ++ write_sm4_rdef(ctx, &tpf.dxbc); ++ tpf_write_shdr(&tpf, entry_func); ++ tpf_write_sfi0(&tpf); ++ tpf_write_stat(&tpf); if (!(ret = ctx->result)) - ret = dxbc_writer_write(&dxbc, out); +- ret = dxbc_writer_write(&dxbc, out); +- for (i = 0; i < dxbc.section_count; ++i) +- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); ++ ret = dxbc_writer_write(&tpf.dxbc, out); ++ for (i = 0; i < tpf.dxbc.section_count; ++i) ++ vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); + return ret; + } diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 306c1ca0dd8..ee8a633431a 100644 +index 306c1ca0dd8..ca012d4948a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ @@ -14017,6 +20117,15 @@ index 306c1ca0dd8..ee8a633431a 100644 } static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) +@@ -1436,7 +1489,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + descriptor_info1, combined_sampler_info, message_context); + + if (TRACE_ON()) +- vkd3d_shader_trace(program); ++ vsir_program_trace(program); + + for (i = 0; i < program->instructions.count; ++i) + { @@ -1497,6 +1550,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { @@ -14059,18 +20168,21 @@ index 306c1ca0dd8..ee8a633431a 100644 vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; -@@ -1593,6 +1654,10 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1593,6 +1654,13 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; + case VKD3D_SHADER_TARGET_MSL: -+ ret = msl_compile(program, config_flags, compile_info, message_context); ++ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) ++ return ret; ++ ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, message_context); ++ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + default: /* Validation should prevent us from reaching this. */ vkd3d_unreachable(); -@@ -1620,6 +1685,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1620,6 +1688,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { struct vkd3d_shader_message_context message_context; @@ -14078,7 +20190,7 @@ index 306c1ca0dd8..ee8a633431a 100644 int ret; TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); -@@ -1634,7 +1700,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1634,12 +1703,17 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_message_context_init(&message_context, compile_info->log_level); @@ -14088,7 +20200,16 @@ index 306c1ca0dd8..ee8a633431a 100644 if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { -@@ -1676,6 +1743,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + ret = compile_hlsl(compile_info, out, &message_context); + } ++ else if (compile_info->source_type == VKD3D_SHADER_SOURCE_FX) ++ { ++ ret = fx_parse(compile_info, out, &message_context); ++ } + else + { + uint64_t config_flags = vkd3d_shader_init_config_flags(); +@@ -1676,6 +1750,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, } } @@ -14097,7 +20218,24 @@ index 306c1ca0dd8..ee8a633431a 100644 vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) ret = VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1888,6 +1957,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1777,6 +1853,8 @@ void shader_signature_cleanup(struct shader_signature *signature) + } + vkd3d_free(signature->elements); + signature->elements = NULL; ++ signature->elements_capacity = 0; ++ signature->element_count = 0; + } + + int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, +@@ -1868,6 +1946,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns + #ifdef VKD3D_SHADER_UNSUPPORTED_DXIL + VKD3D_SHADER_SOURCE_DXBC_DXIL, + #endif ++ VKD3D_SHADER_SOURCE_FX, + }; + + TRACE("count %p.\n", count); +@@ -1888,6 +1967,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, #ifdef VKD3D_SHADER_UNSUPPORTED_GLSL VKD3D_SHADER_TARGET_GLSL, @@ -14107,20 +20245,44 @@ index 306c1ca0dd8..ee8a633431a 100644 #endif }; +@@ -1923,6 +2005,11 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + }; + #endif + ++ static const enum vkd3d_shader_target_type fx_types[] = ++ { ++ VKD3D_SHADER_TARGET_D3D_ASM, ++ }; ++ + TRACE("source_type %#x, count %p.\n", source_type, count); + + switch (source_type) +@@ -1945,6 +2032,10 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + return dxbc_dxil_types; + #endif + ++ case VKD3D_SHADER_SOURCE_FX: ++ *count = ARRAY_SIZE(fx_types); ++ return fx_types; ++ + default: + *count = 0; + return NULL; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index ef66a8ca07a..eab164cc848 100644 +index ef66a8ca07a..9df538a0da0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -59,6 +59,8 @@ +@@ -59,6 +59,9 @@ #define VKD3D_VEC4_SIZE 4 #define VKD3D_DVEC2_SIZE 2 +#define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) ++#define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1) + enum vkd3d_shader_error { VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, -@@ -80,6 +82,7 @@ enum vkd3d_shader_error +@@ -80,6 +83,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_TPF_INVALID_CASE_VALUE = 1007, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_DIMENSION = 1008, VKD3D_SHADER_ERROR_TPF_INVALID_REGISTER_SWIZZLE = 1009, @@ -14128,7 +20290,7 @@ index ef66a8ca07a..eab164cc848 100644 VKD3D_SHADER_WARNING_TPF_MASK_NOT_CONTIGUOUS = 1300, VKD3D_SHADER_WARNING_TPF_UNHANDLED_INDEX_RANGE_MASK = 1301, -@@ -152,6 +155,13 @@ enum vkd3d_shader_error +@@ -152,6 +156,13 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, @@ -14142,7 +20304,7 @@ index ef66a8ca07a..eab164cc848 100644 VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -159,8 +169,11 @@ enum vkd3d_shader_error +@@ -159,8 +170,11 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, @@ -14154,7 +20316,7 @@ index ef66a8ca07a..eab164cc848 100644 VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF = 7000, VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN = 7001, -@@ -169,6 +182,11 @@ enum vkd3d_shader_error +@@ -169,6 +183,11 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, @@ -14166,20 +20328,40 @@ index ef66a8ca07a..eab164cc848 100644 VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, -@@ -225,8 +243,12 @@ enum vkd3d_shader_error +@@ -225,8 +244,18 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION = 9018, VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, + VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, + VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, + + VKD3D_SHADER_ERROR_MSL_INTERNAL = 10000, ++ VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND = 10001, ++ ++ VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED = 11000, ++ VKD3D_SHADER_ERROR_FX_INVALID_VERSION = 11001, ++ VKD3D_SHADER_ERROR_FX_INVALID_DATA = 11002, }; enum vkd3d_shader_opcode -@@ -642,9 +664,6 @@ enum vkd3d_data_type +@@ -625,6 +654,13 @@ enum vkd3d_shader_register_type + VKD3DSPR_INVALID = ~0u, + }; + ++enum vsir_rastout_register ++{ ++ VSIR_RASTOUT_POSITION = 0x0, ++ VSIR_RASTOUT_FOG = 0x1, ++ VSIR_RASTOUT_POINT_SIZE = 0x2, ++}; ++ + enum vkd3d_shader_register_precision + { + VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, +@@ -642,9 +678,6 @@ enum vkd3d_data_type { VKD3D_DATA_FLOAT, VKD3D_DATA_INT, @@ -14189,7 +20371,35 @@ index ef66a8ca07a..eab164cc848 100644 VKD3D_DATA_UINT, VKD3D_DATA_UNORM, VKD3D_DATA_SNORM, -@@ -1344,8 +1363,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins +@@ -1042,6 +1075,9 @@ enum vkd3d_shader_input_sysval_semantic + + struct signature_element + { ++ /* sort_index is not a property of the signature element, it is just a ++ * convenience field used to retain the original order in a signature and ++ * recover it after having permuted the signature itself. */ + unsigned int sort_index; + const char *semantic_name; + unsigned int semantic_index; +@@ -1145,7 +1181,7 @@ struct vkd3d_shader_tgsm_structured + bool zero_init; + }; + +-struct vkd3d_shader_thread_group_size ++struct vsir_thread_group_size + { + unsigned int x, y, z; + }; +@@ -1224,7 +1260,7 @@ struct vkd3d_shader_instruction + struct vkd3d_shader_structured_resource structured_resource; + struct vkd3d_shader_tgsm_raw tgsm_raw; + struct vkd3d_shader_tgsm_structured tgsm_structured; +- struct vkd3d_shader_thread_group_size thread_group_size; ++ struct vsir_thread_group_size thread_group_size; + enum vkd3d_tessellator_domain tessellator_domain; + enum vkd3d_shader_tessellator_output_primitive tessellator_output_primitive; + enum vkd3d_shader_tessellator_partitioning tessellator_partitioning; +@@ -1344,8 +1380,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins struct vkd3d_shader_immediate_constant_buffer *icb); bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, unsigned int dst, unsigned int src); @@ -14198,7 +20408,7 @@ index ef66a8ca07a..eab164cc848 100644 void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); enum vkd3d_shader_config_flags -@@ -1353,6 +1370,12 @@ enum vkd3d_shader_config_flags +@@ -1353,6 +1387,12 @@ enum vkd3d_shader_config_flags VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001, }; @@ -14211,21 +20421,31 @@ index ef66a8ca07a..eab164cc848 100644 struct vsir_program { struct vkd3d_shader_version shader_version; -@@ -1372,6 +1395,7 @@ struct vsir_program +@@ -1367,11 +1407,16 @@ struct vsir_program + bool free_parameters; + + unsigned int input_control_point_count, output_control_point_count; ++ struct vsir_thread_group_size thread_group_size; + unsigned int flat_constant_count[3]; + unsigned int block_count; unsigned int temp_count; unsigned int ssa_count; bool use_vocp; ++ bool has_point_size; + enum vsir_control_flow_type cf_type; ++ bool normalised_io; ++ bool normalised_hull_cp_io; const char **block_names; size_t block_name_count; -@@ -1384,11 +1408,16 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1384,11 +1429,17 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( const struct vsir_program *program, enum vkd3d_shader_parameter_name name); bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve); -enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type); ++ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, ++ bool normalised_io); +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, @@ -14238,7 +20458,16 @@ index ef66a8ca07a..eab164cc848 100644 static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) -@@ -1465,6 +1494,7 @@ enum vsir_asm_flags +@@ -1445,7 +1496,7 @@ struct vkd3d_shader_scan_descriptor_info1 + unsigned int descriptor_count; + }; + +-void vkd3d_shader_trace(const struct vsir_program *program); ++void vsir_program_trace(const struct vsir_program *program); + + const char *shader_get_type_prefix(enum vkd3d_shader_type type); + +@@ -1465,6 +1516,7 @@ enum vsir_asm_flags { VSIR_ASM_FLAG_NONE = 0, VSIR_ASM_FLAG_DUMP_TYPES = 0x1, @@ -14246,7 +20475,7 @@ index ef66a8ca07a..eab164cc848 100644 }; enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, -@@ -1549,7 +1579,6 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st +@@ -1549,18 +1601,29 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, enum vkd3d_shader_error error, const char *format, va_list args); @@ -14254,7 +20483,30 @@ index ef66a8ca07a..eab164cc848 100644 uint64_t vkd3d_shader_init_config_flags(void); void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); #define vkd3d_shader_trace_text(text, size) \ -@@ -1570,8 +1599,10 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + vkd3d_shader_trace_text_(text, size, __FUNCTION__) + ++bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); ++bool sm1_usage_from_semantic_name(const char *semantic_name, ++ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); ++bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, ++ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); ++bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, ++ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, ++ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); ++ + int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); + int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); + int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); ++int fx_parse(const struct vkd3d_shader_compile_info *compile_info, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + + void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); + +@@ -1570,8 +1633,10 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); int glsl_compile(struct vsir_program *program, uint64_t config_flags, @@ -14267,12 +20519,13 @@ index ef66a8ca07a..eab164cc848 100644 #define SPIRV_MAX_SRC_COUNT 6 -@@ -1580,7 +1611,16 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1580,7 +1645,17 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); +int msl_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + +enum vkd3d_md5_variant @@ -14285,17 +20538,17 @@ index ef66a8ca07a..eab164cc848 100644 int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -@@ -1853,7 +1893,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, +@@ -1853,7 +1928,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) -#define DXBC_MAX_SECTION_COUNT 5 -+#define DXBC_MAX_SECTION_COUNT 6 ++#define DXBC_MAX_SECTION_COUNT 7 struct dxbc_writer { diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index dcc7690876f..eab0436bebd 100644 +index dcc7690876f..5495809fcb9 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -19,6 +19,7 @@ @@ -14323,7 +20576,63 @@ index dcc7690876f..eab0436bebd 100644 list->is_predicated = false; list->current_framebuffer = VK_NULL_HANDLE; -@@ -3078,7 +3080,7 @@ done: +@@ -2793,39 +2795,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + /* We use separate bindings for buffer and texture SRVs/UAVs. + * See d3d12_root_signature_init(). For unbounded ranges the + * descriptors exist in two consecutive sets, otherwise they occur +- * in pairs in one set. */ +- if (range->descriptor_count == UINT_MAX) +- { +- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER +- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) +- { +- vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; +- vk_descriptor_write->dstBinding = 0; +- } +- } +- else +- { +- if (!use_array) +- vk_descriptor_write->dstBinding = vk_binding + 2 * index; +- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER +- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) +- ++vk_descriptor_write->dstBinding; +- } +- ++ * as consecutive ranges within a set. */ + if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + { + vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; ++ break; ++ } ++ ++ if (range->descriptor_count == UINT_MAX) ++ { ++ vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; ++ vk_descriptor_write->dstBinding = 0; + } + else + { +- vk_image_info->sampler = VK_NULL_HANDLE; +- vk_image_info->imageView = u.view->v.u.vk_image_view; +- vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV +- ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; +- +- vk_descriptor_write->pImageInfo = vk_image_info; ++ vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; + } ++ ++ vk_image_info->sampler = VK_NULL_HANDLE; ++ vk_image_info->imageView = u.view->v.u.vk_image_view; ++ vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV ++ ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; ++ ++ vk_descriptor_write->pImageInfo = vk_image_info; + break; + + case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: +@@ -3078,7 +3071,7 @@ done: vkd3d_free(vk_descriptor_writes); } @@ -14332,7 +20641,7 @@ index dcc7690876f..eab0436bebd 100644 enum vkd3d_pipeline_bind_point bind_point) { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; -@@ -3210,6 +3212,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) +@@ -3210,6 +3203,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) { @@ -14342,7 +20651,7 @@ index dcc7690876f..eab0436bebd 100644 if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) { if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -@@ -3296,6 +3301,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list +@@ -3296,6 +3292,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); } @@ -14358,7 +20667,7 @@ index dcc7690876f..eab0436bebd 100644 static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); -@@ -3303,7 +3317,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l +@@ -3303,7 +3308,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false; @@ -14367,7 +20676,7 @@ index dcc7690876f..eab0436bebd 100644 return true; } -@@ -3320,7 +3334,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list +@@ -3320,7 +3325,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false; @@ -14376,7 +20685,7 @@ index dcc7690876f..eab0436bebd 100644 if (list->current_render_pass != VK_NULL_HANDLE) return true; -@@ -3351,6 +3365,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list +@@ -3351,6 +3356,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list list->xfb_enabled = true; } @@ -14389,7 +20698,7 @@ index dcc7690876f..eab0436bebd 100644 return true; } -@@ -5939,7 +5959,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr +@@ -5939,7 +5950,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, FLOAT min, FLOAT max) { @@ -14416,7 +20725,7 @@ index dcc7690876f..eab0436bebd 100644 } static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, -@@ -6189,8 +6227,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d +@@ -6189,8 +6218,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d list->allocator = allocator; @@ -14438,10 +20747,156 @@ index 01841c89692..65339c7ba5d 100644 VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 682d488faa8..fb377177403 100644 +index 682d488faa8..8e5ec70a577 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -738,7 +738,7 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns +@@ -219,6 +219,30 @@ static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY vi + } + } + ++static VkShaderStageFlags stage_flags_from_vkd3d_shader_visibility(enum vkd3d_shader_visibility visibility) ++{ ++ switch (visibility) ++ { ++ case VKD3D_SHADER_VISIBILITY_ALL: ++ return VK_SHADER_STAGE_ALL; ++ case VKD3D_SHADER_VISIBILITY_VERTEX: ++ return VK_SHADER_STAGE_VERTEX_BIT; ++ case VKD3D_SHADER_VISIBILITY_HULL: ++ return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; ++ case VKD3D_SHADER_VISIBILITY_DOMAIN: ++ return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; ++ case VKD3D_SHADER_VISIBILITY_GEOMETRY: ++ return VK_SHADER_STAGE_GEOMETRY_BIT; ++ case VKD3D_SHADER_VISIBILITY_PIXEL: ++ return VK_SHADER_STAGE_FRAGMENT_BIT; ++ case VKD3D_SHADER_VISIBILITY_COMPUTE: ++ return VK_SHADER_STAGE_COMPUTE_BIT; ++ default: ++ FIXME("Unhandled visibility %#x.\n", visibility); ++ return VKD3D_SHADER_VISIBILITY_ALL; ++ } ++} ++ + static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility) + { + switch (visibility) +@@ -260,23 +284,6 @@ static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d + } + } + +-static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(D3D12_ROOT_PARAMETER_TYPE type) +-{ +- switch (type) +- { +- /* SRV and UAV root parameters are buffer views. */ +- case D3D12_ROOT_PARAMETER_TYPE_SRV: +- return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; +- case D3D12_ROOT_PARAMETER_TYPE_UAV: +- return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; +- case D3D12_ROOT_PARAMETER_TYPE_CBV: +- return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; +- default: +- FIXME("Unhandled descriptor root parameter type %#x.\n", type); +- return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; +- } +-} +- + static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( + D3D12_DESCRIPTOR_RANGE_TYPE type) + { +@@ -313,20 +320,6 @@ static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_p + } + } + +-static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutBinding *binding_desc, +- enum vkd3d_shader_descriptor_type descriptor_type, D3D12_SHADER_VISIBILITY shader_visibility, +- bool is_buffer, uint32_t vk_binding, unsigned int descriptor_count) +-{ +- binding_desc->binding = vk_binding; +- binding_desc->descriptorType +- = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, is_buffer); +- binding_desc->descriptorCount = descriptor_count; +- binding_desc->stageFlags = stage_flags_from_visibility(shader_visibility); +- binding_desc->pImmutableSamplers = NULL; +- +- return true; +-} +- + struct d3d12_root_signature_info + { + size_t binding_count; +@@ -719,18 +712,66 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat + return S_OK; + } + ++struct vk_binding_array ++{ ++ VkDescriptorSetLayoutBinding *bindings; ++ size_t capacity, count; ++ ++ unsigned int table_index; ++ unsigned int unbounded_offset; ++ VkDescriptorSetLayoutCreateFlags flags; ++}; ++ ++static void vk_binding_array_cleanup(struct vk_binding_array *array) ++{ ++ vkd3d_free(array->bindings); ++ array->bindings = NULL; ++} ++ ++static bool vk_binding_array_add_binding(struct vk_binding_array *array, ++ VkDescriptorType descriptor_type, unsigned int descriptor_count, ++ VkShaderStageFlags stage_flags, const VkSampler *immutable_sampler, unsigned int *binding_idx) ++{ ++ unsigned int binding_count = array->count; ++ VkDescriptorSetLayoutBinding *binding; ++ ++ if (!vkd3d_array_reserve((void **)&array->bindings, &array->capacity, ++ array->count + 1, sizeof(*array->bindings))) ++ { ++ ERR("Failed to reallocate the Vulkan binding array.\n"); ++ return false; ++ } ++ ++ *binding_idx = binding_count; ++ binding = &array->bindings[binding_count]; ++ binding->binding = binding_count; ++ binding->descriptorType = descriptor_type; ++ binding->descriptorCount = descriptor_count; ++ binding->stageFlags = stage_flags; ++ binding->pImmutableSamplers = immutable_sampler; ++ ++array->count; ++ ++ return true; ++} ++ + struct vkd3d_descriptor_set_context + { +- VkDescriptorSetLayoutBinding *current_binding; +- VkDescriptorSetLayoutBinding *first_binding; ++ struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; + unsigned int table_index; + unsigned int unbounded_offset; + unsigned int descriptor_index; + unsigned int uav_counter_index; + unsigned int push_constant_index; +- uint32_t descriptor_binding; + }; + ++static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) ++{ ++ size_t i; ++ ++ for (i = 0; i < ARRAY_SIZE(context->vk_bindings); ++i) ++ vk_binding_array_cleanup(&context->vk_bindings[i]); ++} ++ + static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, unsigned int set_count) + { + uint32_t max_count = min(VKD3D_MAX_DESCRIPTOR_SETS, device->vk_info.device_limits.maxBoundDescriptorSets); +@@ -738,63 +779,63 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns if (set_count > max_count) { /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ @@ -14450,7 +20905,194 @@ index 682d488faa8..fb377177403 100644 return false; } -@@ -1107,7 +1107,9 @@ static int compare_descriptor_range(const void *a, const void *b) + return true; + } + +-static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, +- VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded, +- const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout); +- +-static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_root_signature *root_signature, +- struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayoutCreateFlags flags) ++static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( ++ struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) + { +- struct d3d12_descriptor_set_layout *layout; +- unsigned int index; +- HRESULT hr; +- +- if (!context->descriptor_binding) +- return S_OK; ++ if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) ++ return NULL; + +- index = root_signature->vk_set_count; +- layout = &root_signature->descriptor_set_layouts[index]; ++ return &context->vk_bindings[root_signature->vk_set_count]; ++} + +- if (!vkd3d_validate_descriptor_set_count(root_signature->device, index + 1)) +- return E_INVALIDARG; ++static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, ++ VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) ++{ ++ struct vk_binding_array *array; + +- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, flags, context->descriptor_binding, +- context->unbounded_offset != UINT_MAX, context->first_binding, &layout->vk_layout))) +- return hr; +- layout->table_index = context->table_index; +- layout->unbounded_offset = context->unbounded_offset; +- ++root_signature->vk_set_count; ++ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) ++ return; + +- context->current_binding = context->first_binding; +- context->descriptor_binding = 0; ++ array->table_index = context->table_index; ++ array->unbounded_offset = context->unbounded_offset; ++ array->flags = flags; + +- return S_OK; ++ ++root_signature->vk_set_count; + } + + static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, +- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, +- bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, +- unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) ++ enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, ++ unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, ++ unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, ++ const VkSampler *immutable_sampler, unsigned int *binding_idx) + { + struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets + ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; +- struct vkd3d_shader_resource_binding *mapping +- = &root_signature->descriptor_mapping[context->descriptor_index++]; ++ struct vkd3d_shader_resource_binding *mapping; ++ struct vk_binding_array *array; ++ unsigned int idx; ++ ++ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) ++ || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], ++ vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, ++ stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) ++ return E_OUTOFMEMORY; + ++ mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; + mapping->type = descriptor_type; + mapping->register_space = register_space; + mapping->register_index = register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + mapping->binding.set = root_signature->vk_set_count; +- mapping->binding.binding = context->descriptor_binding++; ++ mapping->binding.binding = idx; + mapping->binding.count = descriptor_count; + if (offset) + { +@@ -803,37 +844,11 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur + } + + if (context->unbounded_offset != UINT_MAX) +- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); ++ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); + +- return S_OK; +-} ++ if (binding_idx) ++ *binding_idx = idx; + +-static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, +- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, +- unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, +- enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, +- uint32_t *first_binding) +-{ +- unsigned int i; +- HRESULT hr; +- +- is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; +- duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV +- || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) +- && duplicate_descriptors; +- +- *first_binding = context->descriptor_binding; +- for (i = 0; i < binding_count; ++i) +- { +- if (duplicate_descriptors +- && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, +- register_space, base_register_idx + i, true, shader_visibility, 1, context))) +- return hr; +- +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, +- base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) +- return hr; +- } + return S_OK; + } + +@@ -895,38 +910,41 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro + return min(count, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + } + +-static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature, ++static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, ++ unsigned int vk_binding_array_count, unsigned int bindings_per_range, + struct vkd3d_descriptor_set_context *context) + { + enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); +- bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; ++ bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; + enum vkd3d_shader_descriptor_type descriptor_type = range->type; ++ unsigned int i, register_space = range->register_space; + HRESULT hr; + + if (range->descriptor_count == UINT_MAX) + context->unbounded_offset = range->offset; + +- if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) ++ for (i = 0; i < bindings_per_range; ++i) + { +- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, +- descriptor_type, visibility, true, context->descriptor_binding, range->vk_binding_count)) +- return E_NOTIMPL; +- ++context->current_binding; +- +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, +- range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, ++ register_space, range->base_register_idx + i, is_buffer, shader_visibility, ++ vk_binding_array_count, context, NULL, NULL))) + return hr; + } + +- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, +- descriptor_type, visibility, is_buffer, context->descriptor_binding, range->vk_binding_count)) +- return E_NOTIMPL; +- ++context->current_binding; ++ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) ++ { ++ context->unbounded_offset = UINT_MAX; ++ return S_OK; ++ } + +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, +- range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) +- return hr; ++ for (i = 0; i < bindings_per_range; ++i) ++ { ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, ++ register_space, range->base_register_idx + i, false, shader_visibility, ++ vk_binding_array_count, context, NULL, NULL))) ++ return hr; ++ } + + context->unbounded_offset = UINT_MAX; + +@@ -1107,18 +1125,19 @@ static int compare_descriptor_range(const void *a, const void *b) if ((ret = vkd3d_u32_compare(range_a->offset, range_b->offset))) return ret; @@ -14461,7 +21103,261 @@ index 682d488faa8..fb377177403 100644 } static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, -@@ -3867,6 +3869,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta + const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, + struct vkd3d_descriptor_set_context *context) + { ++ unsigned int i, j, range_count, bindings_per_range, vk_binding_array_count; + const struct d3d12_device *device = root_signature->device; + bool use_vk_heaps = root_signature->device->use_vk_heaps; + struct d3d12_root_descriptor_table *table; +- unsigned int i, j, k, range_count; +- uint32_t vk_binding; + HRESULT hr; + + root_signature->descriptor_table_mask = 0; +@@ -1175,7 +1194,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + for (j = 0; j < range_count; ++j) + { + struct d3d12_root_descriptor_table_range *range; +- VkDescriptorSetLayoutBinding *cur_binding; + + range = &table->ranges[j]; + +@@ -1221,53 +1239,23 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + base_range = range; + } + +- range->binding = context->descriptor_binding; + range->vk_binding_count = vk_binding_count_from_descriptor_range(range, + info, &device->vk_info.descriptor_limits); +- +- if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature, +- range, p->ShaderVisibility, context))) +- return hr; +- +- continue; ++ vk_binding_array_count = range->vk_binding_count; ++ bindings_per_range = 1; + } +- +- cur_binding = context->current_binding; +- +- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, +- range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, +- shader_visibility, context, &vk_binding))) +- return hr; +- +- /* Unroll descriptor range. */ +- for (k = 0; k < range->descriptor_count; ++k) ++ else + { +- uint32_t vk_current_binding = vk_binding + k; +- +- if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV +- || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) +- { +- vk_current_binding = vk_binding + 2 * k; +- +- /* Assign binding for image view. */ +- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, +- range->type, p->ShaderVisibility, false, vk_current_binding + 1, 1)) +- return E_NOTIMPL; +- +- ++cur_binding; +- } +- +- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, +- range->type, p->ShaderVisibility, true, vk_current_binding, 1)) +- return E_NOTIMPL; +- +- ++cur_binding; ++ range->vk_binding_count = range->descriptor_count; ++ vk_binding_array_count = 1; ++ bindings_per_range = range->descriptor_count; + } + +- table->ranges[j].vk_binding_count = table->ranges[j].descriptor_count; +- table->ranges[j].binding = vk_binding; ++ range->binding = context->vk_bindings[root_signature->vk_set_count].count; + +- context->current_binding = cur_binding; ++ if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, ++ p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) ++ return hr; + } + ++context->push_constant_index; + } +@@ -1278,8 +1266,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) + { +- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; +- unsigned int i; ++ unsigned int binding, i; + HRESULT hr; + + root_signature->push_descriptor_mask = 0; +@@ -1294,23 +1281,16 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign + + root_signature->push_descriptor_mask |= 1u << i; + +- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, + vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), +- p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, +- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) ++ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, ++ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) + return hr; +- cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); +- cur_binding->descriptorCount = 1; +- cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); +- cur_binding->pImmutableSamplers = NULL; + + root_signature->parameters[i].parameter_type = p->ParameterType; +- root_signature->parameters[i].u.descriptor.binding = cur_binding->binding; +- +- ++cur_binding; ++ root_signature->parameters[i].u.descriptor.binding = binding; + } + +- context->current_binding = cur_binding; + return S_OK; + } + +@@ -1318,7 +1298,6 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, + struct vkd3d_descriptor_set_context *context) + { +- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; + unsigned int i; + HRESULT hr; + +@@ -1330,21 +1309,15 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) + return hr; + +- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, +- VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, +- vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, ++ vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, ++ &root_signature->static_samplers[i], NULL))) + return hr; +- cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; +- cur_binding->descriptorCount = 1; +- cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); +- cur_binding->pImmutableSamplers = &root_signature->static_samplers[i]; +- +- ++cur_binding; + } + +- context->current_binding = cur_binding; + if (device->use_vk_heaps) +- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); ++ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); + + return S_OK; + } +@@ -1477,6 +1450,34 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, + return S_OK; + } + ++static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, ++ struct vkd3d_descriptor_set_context *context) ++{ ++ unsigned int i; ++ HRESULT hr; ++ ++ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); ++ ++ if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) ++ return E_INVALIDARG; ++ ++ for (i = 0; i < root_signature->vk_set_count; ++i) ++ { ++ struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; ++ struct vk_binding_array *array = &context->vk_bindings[i]; ++ ++ VKD3D_ASSERT(array->count); ++ ++ if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, ++ array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) ++ return hr; ++ layout->unbounded_offset = array->unbounded_offset; ++ layout->table_index = array->table_index; ++ } ++ ++ return S_OK; ++} ++ + static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, + VkDescriptorSetLayout *vk_set_layouts) + { +@@ -1508,7 +1509,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; + const struct vkd3d_vulkan_info *vk_info = &device->vk_info; + struct vkd3d_descriptor_set_context context; +- VkDescriptorSetLayoutBinding *binding_desc; + struct d3d12_root_signature_info info; + bool use_vk_heaps; + unsigned int i; +@@ -1516,7 +1516,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + + memset(&context, 0, sizeof(context)); + context.unbounded_offset = UINT_MAX; +- binding_desc = NULL; + + root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; + root_signature->refcount = 1; +@@ -1578,20 +1577,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + sizeof(*root_signature->static_samplers)))) + goto fail; + +- if (!(binding_desc = vkd3d_calloc(info.binding_count, sizeof(*binding_desc)))) +- goto fail; +- context.first_binding = binding_desc; +- context.current_binding = binding_desc; +- + if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) + goto fail; + + /* We use KHR_push_descriptor for root descriptor parameters. */ + if (vk_info->KHR_push_descriptor) + { +- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, +- &context, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR))) +- goto fail; ++ d3d12_root_signature_append_vk_binding_array(root_signature, ++ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); + } + + root_signature->main_set = root_signature->vk_set_count; +@@ -1607,11 +1600,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + if (use_vk_heaps) + d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context); + +- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) ++ if (FAILED(hr = d3d12_root_signature_create_descriptor_set_layouts(root_signature, &context))) + goto fail; + +- vkd3d_free(binding_desc); +- binding_desc = NULL; ++ descriptor_set_context_cleanup(&context); + + i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts); + if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, +@@ -1627,7 +1619,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + return S_OK; + + fail: +- vkd3d_free(binding_desc); ++ descriptor_set_context_cleanup(&context); + d3d12_root_signature_cleanup(root_signature, device); + return hr; + } +@@ -3867,6 +3859,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_STENCIL_REFERENCE, diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch new file mode 100644 index 00000000..efed2b41 --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch @@ -0,0 +1,2560 @@ +From e0e7620dc6f16abd9530e54a7ee32756873098ff Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 23 Oct 2024 13:50:07 +1100 +Subject: [PATCH] Updated vkd3d to 4889c71857ce2152a9c9e014b9f5831f96dc349b. + +--- + libs/vkd3d/include/vkd3d_shader.h | 33 ++ + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 9 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 7 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 103 ++++- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 155 ++++++- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 11 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 2 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 48 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 400 ++++++++++++++---- + libs/vkd3d/libs/vkd3d-shader/ir.c | 233 +++++++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 7 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 25 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 119 ++++-- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 19 +- + libs/vkd3d/libs/vkd3d/command.c | 2 + + libs/vkd3d/libs/vkd3d/device.c | 135 ++++++ + 18 files changed, 1121 insertions(+), 190 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index 5c0d13ea9e2..1476387c6bd 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -700,6 +700,39 @@ enum vkd3d_shader_parameter_name + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, ++ /** ++ * Whether texture coordinate inputs should take their values from the ++ * point coordinate. ++ * ++ * When this parameter is provided to a pixel shader, and the value is ++ * nonzero, any fragment shader input with the semantic name "TEXCOORD" ++ * takes its value from the point coordinates instead of from the previous ++ * shader. The point coordinates here are defined as a four-component vector ++ * whose X and Y components are the X and Y coordinates of the fragment ++ * within a point being rasterized, and whose Z and W components are zero. ++ * ++ * In GLSL, the X and Y components are drawn from gl_PointCoord; in SPIR-V, ++ * they are drawn from a variable with the BuiltinPointCoord decoration. ++ * ++ * This includes t# fragment shader inputs in shader model 2 shaders, ++ * as well as texture sampling in shader model 1 shaders. ++ * ++ * This parameter can be used to implement fixed function point sprite, as ++ * present in Direct3D versions 8 and 9, if the target environment does not ++ * support point sprite as part of its own fixed-function API (as Vulkan and ++ * core OpenGL). ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ * ++ * The default value is zero, i.e. use the original varyings. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.14 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), + }; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 38d566d9fe0..bc28aebed4d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -393,14 +393,13 @@ static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type regi + } + } + +-static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_global_flags global_flags) ++static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags) + { + unsigned int i; + + static const struct + { +- enum vkd3d_shader_global_flags flag; ++ enum vsir_global_flags flag; + const char *name; + } + global_flag_info[] = +@@ -1190,6 +1189,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); + break; + ++ case VKD3DSPR_POINT_COORD: ++ vkd3d_string_buffer_printf(buffer, "vPointCoord"); ++ break; ++ + default: + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->type, compiler->colours.reset); +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index ae8e864c179..bbebf86e6d5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1320,7 +1320,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vsir_program_init(program, compile_info, &version, +- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, false)) ++ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index f9f44f34bcf..570af5eca5a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -9564,7 +9564,7 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons + + static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) + { +- enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; ++ enum vsir_global_flags global_flags, mask, rotated_flags; + struct vkd3d_shader_instruction *ins; + + if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) +@@ -9574,7 +9574,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm + "Global flags metadata value is not an integer."); + return; + } +- /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ ++ /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vsir_global_flags. */ + mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; + rotated_flags = global_flags & mask; + rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); +@@ -9582,6 +9582,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); + ins->declaration.global_flags = global_flags; ++ sm6->p.program->global_flags = global_flags; + } + + static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) +@@ -10312,7 +10313,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; + if (!vsir_program_init(program, compile_info, &version, +- (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, true)) ++ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); + sm6->ptr = &sm6->start[1]; +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index f3f7a2c765c..d901f08d50d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -2854,6 +2854,7 @@ struct fx_parser + struct vkd3d_shader_message_context *message_context; + struct vkd3d_string_buffer buffer; + unsigned int indent; ++ unsigned int version; + struct + { + const uint8_t *ptr; +@@ -2862,6 +2863,7 @@ struct fx_parser + } unstructured; + uint32_t buffer_count; + uint32_t object_count; ++ uint32_t group_count; + bool failed; + }; + +@@ -3085,7 +3087,6 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) + vkd3d_string_buffer_printf(&parser->buffer, ">"); + } + +- + static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) + { + struct fx_4_numeric_variable +@@ -3212,6 +3213,97 @@ static void fx_4_parse_objects(struct fx_parser *parser) + } + } + ++static void fx_parse_fx_4_technique(struct fx_parser *parser) ++{ ++ struct fx_technique ++ { ++ uint32_t name; ++ uint32_t count; ++ } technique; ++ struct fx_pass ++ { ++ uint32_t name; ++ uint32_t count; ++ } pass; ++ const char *name; ++ uint32_t i; ++ ++ if (parser->failed) ++ return; ++ ++ fx_parser_read_u32s(parser, &technique, sizeof(technique)); ++ ++ name = fx_4_get_string(parser, technique.name); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "technique%u %s", parser->version, name); ++ fx_parse_fx_4_annotations(parser); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); ++ ++ parse_fx_start_indent(parser); ++ for (i = 0; i < technique.count; ++i) ++ { ++ fx_parser_read_u32s(parser, &pass, sizeof(pass)); ++ name = fx_4_get_string(parser, pass.name); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "pass %s", name); ++ fx_parse_fx_4_annotations(parser); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); ++ ++ if (pass.count) ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, ++ "Parsing pass states is not implemented.\n"); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); ++ } ++ ++ parse_fx_end_indent(parser); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); ++} ++ ++static void fx_parse_groups(struct fx_parser *parser) ++{ ++ struct fx_group ++ { ++ uint32_t name; ++ uint32_t count; ++ } group; ++ const char *name; ++ uint32_t i, j; ++ ++ if (parser->failed) ++ return; ++ ++ for (i = 0; i < parser->group_count; ++i) ++ { ++ fx_parser_read_u32s(parser, &group, sizeof(group)); ++ ++ name = fx_4_get_string(parser, group.name); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "fxgroup %s", name); ++ fx_parse_fx_4_annotations(parser); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); ++ parse_fx_start_indent(parser); ++ ++ for (j = 0; j < group.count; ++j) ++ fx_parse_fx_4_technique(parser); ++ ++ parse_fx_end_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); ++ } ++} ++ + static int fx_4_parse(struct fx_parser *parser) + { + struct fx_4_header +@@ -3236,7 +3328,9 @@ static int fx_4_parse(struct fx_parser *parser) + uint32_t shader_count; + uint32_t inline_shader_count; + } header; ++ uint32_t i; + ++ parser->version = 10; + fx_parser_read_u32s(parser, &header, sizeof(header)); + parser->buffer_count = header.buffer_count; + parser->object_count = header.object_count; +@@ -3255,6 +3349,9 @@ static int fx_4_parse(struct fx_parser *parser) + fx_parse_buffers(parser); + fx_4_parse_objects(parser); + ++ for (i = 0; i < header.technique_count; ++i) ++ fx_parse_fx_4_technique(parser); ++ + return parser->failed ? - 1 : 0; + } + +@@ -3288,9 +3385,11 @@ static int fx_5_parse(struct fx_parser *parser) + uint32_t class_instance_element_count; + } header; + ++ parser->version = 11; + fx_parser_read_u32s(parser, &header, sizeof(header)); + parser->buffer_count = header.buffer_count; + parser->object_count = header.object_count; ++ parser->group_count = header.group_count; + + if (parser->end - parser->ptr < header.unstructured_size) + { +@@ -3306,6 +3405,8 @@ static int fx_5_parse(struct fx_parser *parser) + fx_parse_buffers(parser); + fx_4_parse_objects(parser); + ++ fx_parse_groups(parser); ++ + return parser->failed ? - 1 : 0; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 46515818d07..a2a090e1c21 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -274,6 +274,10 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, + gen->prefix, reg->idx[0].offset, reg->idx[2].offset); + break; + ++ case VKD3DSPR_THREADID: ++ vkd3d_string_buffer_printf(buffer, "gl_GlobalInvocationID"); ++ break; ++ + case VKD3DSPR_IDXTEMP: + vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); + shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); +@@ -385,7 +389,7 @@ static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); + +- if (reg->type == VKD3DSPR_IMMCONST) ++ if (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_THREADID) + src_data_type = VKD3D_DATA_UINT; + else + src_data_type = VKD3D_DATA_FLOAT; +@@ -825,6 +829,37 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ + glsl_dst_cleanup(&dst, &gen->string_buffers); + } + ++static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_src_param *coord, const struct vkd3d_shader_src_param *ref, unsigned int coord_size) ++{ ++ uint32_t coord_mask = vkd3d_write_mask_from_component_count(coord_size); ++ ++ switch (coord_size) ++ { ++ case 1: ++ vkd3d_string_buffer_printf(buffer, "vec3("); ++ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ", 0.0, "); ++ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ")"); ++ break; ++ ++ case 4: ++ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ", "); ++ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); ++ break; ++ ++ default: ++ vkd3d_string_buffer_printf(buffer, "vec%u(", coord_size + 1); ++ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ", "); ++ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ")"); ++ break; ++ } ++} ++ + static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + const struct glsl_resource_type_info *resource_type_info; +@@ -835,9 +870,9 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + enum vkd3d_shader_resource_type resource_type; + struct vkd3d_string_buffer *sample; + enum vkd3d_data_type data_type; +- struct glsl_src coord; ++ unsigned int coord_size; + struct glsl_dst dst; +- uint32_t coord_mask; ++ bool shadow; + + if (vkd3d_shader_instruction_has_texel_offset(ins)) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +@@ -868,13 +903,13 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + + if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) + { +- coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); ++ coord_size = resource_type_info->coord_size; + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x.", resource_type); +- coord_mask = vkd3d_write_mask_from_component_count(2); ++ coord_size = 2; + } + + sampler_id = ins->src[2].reg.idx[0].offset; +@@ -882,6 +917,20 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) + { + sampler_space = d->register_space; ++ shadow = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; ++ ++ if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) ++ { ++ if (!shadow) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); ++ } ++ else ++ { ++ if (shadow) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); ++ } + } + else + { +@@ -891,17 +940,94 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk + } + + glsl_dst_init(&dst, gen, ins, &ins->dst[0]); +- glsl_src_init(&coord, gen, &ins->src[0], coord_mask); + sample = vkd3d_string_buffer_get(&gen->string_buffers); + +- vkd3d_string_buffer_printf(sample, "texture("); ++ if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) ++ vkd3d_string_buffer_printf(sample, "textureLod("); ++ else ++ vkd3d_string_buffer_printf(sample, "texture("); + shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); +- vkd3d_string_buffer_printf(sample, ", %s)", coord.str->buffer); ++ vkd3d_string_buffer_printf(sample, ", "); ++ if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) ++ shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size); ++ else ++ shader_glsl_print_src(sample, gen, &ins->src[0], ++ vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type); ++ if (ins->opcode == VKD3DSIH_SAMPLE_B) ++ { ++ vkd3d_string_buffer_printf(sample, ", "); ++ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); ++ } ++ else if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) ++ { ++ vkd3d_string_buffer_printf(sample, ", 0.0"); ++ } ++ vkd3d_string_buffer_printf(sample, ")"); + shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); + + shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, sample); ++ glsl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void shader_glsl_load_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ const struct glsl_resource_type_info *resource_type_info; ++ enum vkd3d_shader_component_type component_type; ++ const struct vkd3d_shader_descriptor_info1 *d; ++ enum vkd3d_shader_resource_type resource_type; ++ unsigned int uav_id, uav_idx, uav_space; ++ struct vkd3d_string_buffer *load; ++ struct glsl_src coord; ++ struct glsl_dst dst; ++ uint32_t coord_mask; ++ ++ if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, ++ "Descriptor indexing is not supported."); ++ ++ uav_id = ins->src[1].reg.idx[0].offset; ++ uav_idx = ins->src[1].reg.idx[1].offset; ++ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) ++ { ++ resource_type = d->resource_type; ++ uav_space = d->register_space; ++ component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); ++ } ++ else ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); ++ uav_space = 0; ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ } ++ ++ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) ++ { ++ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); ++ } ++ else ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled UAV type %#x.", resource_type); ++ coord_mask = vkd3d_write_mask_from_component_count(2); ++ } ++ ++ glsl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ glsl_src_init(&coord, gen, &ins->src[0], coord_mask); ++ load = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ vkd3d_string_buffer_printf(load, "imageLoad("); ++ shader_glsl_print_image_name(load, gen, uav_idx, uav_space); ++ vkd3d_string_buffer_printf(load, ", %s)", coord.str->buffer); ++ shader_glsl_print_swizzle(load, ins->src[1].swizzle, ins->dst[0].write_mask); ++ ++ shader_glsl_print_assignment_ext(gen, &dst, ++ vkd3d_data_type_from_component_type(component_type), "%s", load->buffer); ++ ++ vkd3d_string_buffer_release(&gen->string_buffers, load); + glsl_src_cleanup(&coord, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); + } +@@ -1385,6 +1511,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + case VKD3DSIH_LD: + shader_glsl_ld(gen, ins); + break; ++ case VKD3DSIH_LD_UAV_TYPED: ++ shader_glsl_load_uav_typed(gen, ins); ++ break; + case VKD3DSIH_LOG: + shader_glsl_intrinsic(gen, ins, "log2"); + break; +@@ -1425,6 +1554,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + shader_glsl_intrinsic(gen, ins, "inversesqrt"); + break; + case VKD3DSIH_SAMPLE: ++ case VKD3DSIH_SAMPLE_B: ++ case VKD3DSIH_SAMPLE_C: ++ case VKD3DSIH_SAMPLE_C_LZ: + shader_glsl_sample(gen, ins); + break; + case VKD3DSIH_SQRT: +@@ -2078,6 +2210,10 @@ static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) + group_size->x, group_size->y, group_size->z); + } + ++ if (program->global_flags) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)program->global_flags); ++ + shader_glsl_generate_descriptor_declarations(gen); + shader_glsl_generate_input_declarations(gen); + shader_glsl_generate_output_declarations(gen); +@@ -2213,8 +2349,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + +- VKD3D_ASSERT(program->normalised_io); +- VKD3D_ASSERT(program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + + vkd3d_glsl_generator_init(&generator, program, compile_info, + descriptor_info, combined_sampler_info, message_context); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 6ad0117fd5c..c7aa148ea11 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -3164,6 +3164,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_DSY_FINE] = "dsy_fine", + [HLSL_OP1_EXP2] = "exp2", + [HLSL_OP1_F16TOF32] = "f16tof32", ++ [HLSL_OP1_F32TOF16] = "f32tof16", + [HLSL_OP1_FLOOR] = "floor", + [HLSL_OP1_FRACT] = "fract", + [HLSL_OP1_LOG2] = "log2", +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index efe3aec024b..e234cd0ba40 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -410,10 +410,12 @@ struct hlsl_attribute + #define HLSL_MODIFIER_SINGLE 0x00020000 + #define HLSL_MODIFIER_EXPORT 0x00040000 + #define HLSL_STORAGE_ANNOTATION 0x00080000 ++#define HLSL_MODIFIER_UNORM 0x00100000 ++#define HLSL_MODIFIER_SNORM 0x00200000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +- HLSL_MODIFIER_COLUMN_MAJOR) ++ HLSL_MODIFIER_COLUMN_MAJOR | HLSL_MODIFIER_UNORM | HLSL_MODIFIER_SNORM) + + #define HLSL_INTERPOLATION_MODIFIERS_MASK (HLSL_STORAGE_NOINTERPOLATION | HLSL_STORAGE_CENTROID | \ + HLSL_STORAGE_NOPERSPECTIVE | HLSL_STORAGE_LINEAR) +@@ -514,6 +516,9 @@ struct hlsl_ir_var + + /* Whether the shader performs dereferences with non-constant offsets in the variable. */ + bool indexable; ++ /* Whether this is a semantic variable that was split from an array, or is the first ++ * element of a struct, and thus needs to be aligned when packed in the signature. */ ++ bool force_align; + + uint32_t is_input_semantic : 1; + uint32_t is_output_semantic : 1; +@@ -688,6 +693,7 @@ enum hlsl_ir_expr_op + HLSL_OP1_DSY_FINE, + HLSL_OP1_EXP2, + HLSL_OP1_F16TOF32, ++ HLSL_OP1_F32TOF16, + HLSL_OP1_FLOOR, + HLSL_OP1_FRACT, + HLSL_OP1_LOG2, +@@ -1634,6 +1640,9 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + ++enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, ++ unsigned int storage_modifiers); ++ + struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); + + int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index 97d8b13772b..ca983fc5ffd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -142,6 +142,7 @@ samplerCUBE {return KW_SAMPLERCUBE; } + SamplerState {return KW_SAMPLER; } + sampler_state {return KW_SAMPLER_STATE; } + shared {return KW_SHARED; } ++snorm {return KW_SNORM; } + stateblock {return KW_STATEBLOCK; } + stateblock_state {return KW_STATEBLOCK_STATE; } + static {return KW_STATIC; } +@@ -171,6 +172,7 @@ true {return KW_TRUE; } + typedef {return KW_TYPEDEF; } + unsigned {return KW_UNSIGNED; } + uniform {return KW_UNIFORM; } ++unorm {return KW_UNORM; } + vector {return KW_VECTOR; } + VertexShader {return KW_VERTEXSHADER; } + vertexshader {return KW_VERTEXSHADER; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 213cec79c3d..49cff4c81b8 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -4024,6 +4024,21 @@ static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, + return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); + } + ++static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_type *type; ++ ++ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) ++ return false; ++ ++ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); ++ ++ operands[0] = params->args[0]; ++ return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); ++} ++ + static bool intrinsic_floor(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -5199,6 +5214,7 @@ intrinsic_functions[] = + {"exp", 1, true, intrinsic_exp}, + {"exp2", 1, true, intrinsic_exp2}, + {"f16tof32", 1, true, intrinsic_f16tof32}, ++ {"f32tof16", 1, true, intrinsic_f32tof16}, + {"faceforward", 3, true, intrinsic_faceforward}, + {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, +@@ -6479,6 +6495,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_SAMPLER_STATE + %token KW_SAMPLERCOMPARISONSTATE + %token KW_SHARED ++%token KW_SNORM + %token KW_STATEBLOCK + %token KW_STATEBLOCK_STATE + %token KW_STATIC +@@ -6503,6 +6520,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_TYPEDEF + %token KW_UNSIGNED + %token KW_UNIFORM ++%token KW_UNORM + %token KW_VECTOR + %token KW_VERTEXSHADER + %token KW_VOID +@@ -6642,6 +6660,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %type type + %type type_no_void + %type typedef_type ++%type resource_format + + %type state_block_list + %type type_spec +@@ -7637,6 +7656,15 @@ rov_type: + $$ = HLSL_SAMPLER_DIM_3D; + } + ++resource_format: ++ var_modifiers type ++ { ++ uint32_t modifiers = $1; ++ ++ if (!($$ = apply_type_modifiers(ctx, $2, &modifiers, false, &@1))) ++ YYABORT; ++ } ++ + type_no_void: + KW_VECTOR '<' type ',' C_INTEGER '>' + { +@@ -7730,18 +7758,18 @@ type_no_void: + { + $$ = hlsl_new_texture_type(ctx, $1, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); + } +- | texture_type '<' type '>' ++ | texture_type '<' resource_format '>' + { + validate_texture_format_type(ctx, $3, &@3); + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); + } +- | texture_ms_type '<' type '>' ++ | texture_ms_type '<' resource_format '>' + { + validate_texture_format_type(ctx, $3, &@3); + + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); + } +- | texture_ms_type '<' type ',' shift_expr '>' ++ | texture_ms_type '<' resource_format ',' shift_expr '>' + { + unsigned int sample_count; + struct hlsl_block block; +@@ -7757,14 +7785,14 @@ type_no_void: + + $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); + } +- | uav_type '<' type '>' ++ | uav_type '<' resource_format '>' + { + validate_uav_type(ctx, $1, $3, &@3); + $$ = hlsl_new_uav_type(ctx, $1, $3, false); + } +- | rov_type '<' type '>' ++ | rov_type '<' resource_format '>' + { +- validate_uav_type(ctx, $1, $3, &@3); ++ validate_uav_type(ctx, $1, $3, &@4); + $$ = hlsl_new_uav_type(ctx, $1, $3, true); + } + | KW_STRING +@@ -8314,6 +8342,14 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); + } ++ | KW_UNORM var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_UNORM, &@1); ++ } ++ | KW_SNORM var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SNORM, &@1); ++ } + | var_identifier var_modifiers + { + $$ = $2; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 88bec8610cb..6e1b2b437b0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -278,7 +278,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls + + static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, +- uint32_t index, bool output, const struct vkd3d_shader_location *loc) ++ uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc) + { + struct hlsl_semantic new_semantic; + struct hlsl_ir_var *ext_var; +@@ -338,14 +338,32 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + else + ext_var->is_input_semantic = 1; + ext_var->is_param = var->is_param; ++ ext_var->force_align = force_align; + list_add_before(&var->scope_entry, &ext_var->scope_entry); + list_add_tail(&func->extern_vars, &ext_var->extern_entry); + + return ext_var; + } + ++static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t field_modifiers) ++{ ++ field_modifiers |= modifiers; ++ ++ /* TODO: 'sample' modifier is not supported yet. */ ++ ++ /* 'nointerpolation' always takes precedence, next the same is done for ++ * 'sample', remaining modifiers are combined. */ ++ if (field_modifiers & HLSL_STORAGE_NOINTERPOLATION) ++ { ++ field_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; ++ field_modifiers |= HLSL_STORAGE_NOINTERPOLATION; ++ } ++ ++ return field_modifiers; ++} ++ + static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++ uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) + { + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; + struct vkd3d_shader_location *loc = &lhs->node.loc; +@@ -369,14 +387,17 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec + if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); + ++ if (hlsl_type_major_size(type) > 1) ++ force_align = true; ++ + for (i = 0; i < hlsl_type_major_size(type); ++i) + { + struct hlsl_ir_node *store, *cast; + struct hlsl_ir_var *input; + struct hlsl_ir_load *load; + +- if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, +- semantic_index + i, false, loc))) ++ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, ++ modifiers, semantic, semantic_index + i, false, force_align, loc))) + return; + + if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) +@@ -408,8 +429,9 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec + } + } + +-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +- struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers, ++ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) + { + struct vkd3d_shader_location *loc = &lhs->node.loc; + struct hlsl_type *type = lhs->node.data_type; +@@ -425,12 +447,14 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + + for (i = 0; i < hlsl_type_element_count(type); ++i) + { +- uint32_t element_modifiers = modifiers; ++ uint32_t element_modifiers; + + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; ++ element_modifiers = modifiers; ++ force_align = true; + } + else + { +@@ -444,17 +468,8 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; +- element_modifiers |= field->storage_modifiers; +- +- /* TODO: 'sample' modifier is not supported yet */ +- +- /* 'nointerpolation' always takes precedence, next the same is done for 'sample', +- remaining modifiers are combined. */ +- if (element_modifiers & HLSL_STORAGE_NOINTERPOLATION) +- { +- element_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; +- element_modifiers |= HLSL_STORAGE_NOINTERPOLATION; +- } ++ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); ++ force_align = (i == 0); + } + + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) +@@ -466,12 +481,13 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + return; + list_add_after(&c->entry, &element_load->node.entry); + +- prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); ++ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, ++ semantic, elem_semantic_index, force_align); + } + } + else + { +- prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); ++ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index, force_align); + } + } + +@@ -486,11 +502,12 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function + return; + list_add_head(&func->body.instrs, &load->node.entry); + +- prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); + } + +-static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_load *rhs, uint32_t modifiers, ++ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) + { + struct hlsl_type *type = rhs->node.data_type, *vector_type; + struct vkd3d_shader_location *loc = &rhs->node.loc; +@@ -511,6 +528,9 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec + + vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); + ++ if (hlsl_type_major_size(type) > 1) ++ force_align = true; ++ + for (i = 0; i < hlsl_type_major_size(type); ++i) + { + struct hlsl_ir_node *store; +@@ -518,7 +538,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec + struct hlsl_ir_load *load; + + if (!(output = add_semantic_var(ctx, func, var, vector_type, +- modifiers, semantic, semantic_index + i, true, loc))) ++ modifiers, semantic, semantic_index + i, true, force_align, loc))) + return; + + if (type->class == HLSL_CLASS_MATRIX) +@@ -546,8 +566,9 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec + } + } + +-static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +- struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, ++ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) + { + struct vkd3d_shader_location *loc = &rhs->node.loc; + struct hlsl_type *type = rhs->node.data_type; +@@ -563,10 +584,14 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + + for (i = 0; i < hlsl_type_element_count(type); ++i) + { ++ uint32_t element_modifiers; ++ + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; ++ element_modifiers = modifiers; ++ force_align = true; + } + else + { +@@ -577,6 +602,8 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; ++ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); ++ force_align = (i == 0); + } + + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) +@@ -587,12 +614,13 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func + return; + hlsl_block_add_instr(&func->body, &element_load->node); + +- append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); ++ append_output_copy_recurse(ctx, func, element_load, element_modifiers, ++ semantic, elem_semantic_index, force_align); + } + } + else + { +- append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); ++ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align); + } + } + +@@ -608,7 +636,7 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function + return; + hlsl_block_add_instr(&func->body, &load->node); + +- append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); + } + + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +@@ -4051,6 +4079,44 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return true; + } + ++static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *cond, *cond_cast, *abs, *neg; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_jump *jump; ++ struct hlsl_block block; ++ ++ if (instr->type != HLSL_IR_JUMP) ++ return false; ++ jump = hlsl_ir_jump(instr); ++ if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) ++ return false; ++ ++ cond = jump->condition.node; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx); ++ ++ hlsl_block_init(&block); ++ ++ if (!(cond_cast = hlsl_new_cast(ctx, cond, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, cond_cast); ++ ++ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond_cast, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, abs); ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, neg); ++ ++ list_move_tail(&instr->entry, &block.instrs); ++ hlsl_src_remove(&jump->condition); ++ hlsl_src_from_node(&jump->condition, neg); ++ jump->type = HLSL_IR_JUMP_DISCARD_NEG; ++ ++ return true; ++} ++ + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + switch (instr->type) +@@ -4419,6 +4485,9 @@ struct register_allocator + uint32_t reg; + unsigned int writemask; + unsigned int first_write, last_read; ++ ++ /* Two allocations with different mode can't share the same register. */ ++ int mode; + } *allocations; + size_t count, capacity; + +@@ -4428,10 +4497,17 @@ struct register_allocator + + /* Total number of registers allocated so far. Used to declare sm4 temp count. */ + uint32_t reg_count; ++ ++ /* Special flag so allocations that can share registers prioritize those ++ * that will result in smaller writemasks. ++ * For instance, a single-register allocation would prefer to share a register ++ * whose .xy components are already allocated (becoming .z) instead of a ++ * register whose .xyz components are already allocated (becoming .w). */ ++ bool prioritize_smaller_writemasks; + }; + + static unsigned int get_available_writemask(const struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_idx) ++ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) + { + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i; +@@ -4446,7 +4522,11 @@ static unsigned int get_available_writemask(const struct register_allocator *all + + if (allocation->reg == reg_idx + && first_write < allocation->last_read && last_read > allocation->first_write) ++ { + writemask &= ~allocation->writemask; ++ if (allocation->mode != mode) ++ writemask = 0; ++ } + + if (!writemask) + break; +@@ -4455,8 +4535,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all + return writemask; + } + +-static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) ++static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, ++ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) + { + struct allocation *allocation; + +@@ -4469,6 +4549,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a + allocation->writemask = writemask; + allocation->first_write = first_write; + allocation->last_read = last_read; ++ allocation->mode = mode; + + allocator->reg_count = max(allocator->reg_count, reg_idx + 1); + } +@@ -4478,26 +4559,35 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a + * register, even if they don't use it completely. */ + static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, unsigned int reg_size, +- unsigned int component_count) ++ unsigned int component_count, int mode, bool force_align) + { ++ unsigned int required_size = force_align ? 4 : reg_size; ++ unsigned int writemask = 0, pref; + struct hlsl_reg ret = {0}; +- unsigned int writemask; + uint32_t reg_idx; + + VKD3D_ASSERT(component_count <= reg_size); + +- for (reg_idx = 0;; ++reg_idx) ++ pref = allocator->prioritize_smaller_writemasks ? 4 : required_size; ++ for (; pref >= required_size; --pref) + { +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); +- +- if (vkd3d_popcount(writemask) >= reg_size) ++ for (reg_idx = 0; pref == required_size || reg_idx < allocator->reg_count; ++reg_idx) + { +- writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); +- break; ++ unsigned int available_writemask = get_available_writemask(allocator, ++ first_write, last_read, reg_idx, mode); ++ ++ if (vkd3d_popcount(available_writemask) >= pref) ++ { ++ writemask = hlsl_combine_writemasks(available_writemask, (1u << reg_size) - 1); ++ break; ++ } + } ++ if (writemask) ++ break; + } + +- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); ++ VKD3D_ASSERT(vkd3d_popcount(writemask) == reg_size); ++ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); + + ret.id = reg_idx; + ret.allocation_size = 1; +@@ -4508,7 +4598,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + + /* Allocate a register with writemask, while reserving reg_writemask. */ + static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) ++ unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4517,11 +4607,12 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + + for (reg_idx = 0;; ++reg_idx) + { +- if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) ++ if ((get_available_writemask(allocator, first_write, last_read, ++ reg_idx, mode) & reg_writemask) == reg_writemask) + break; + } + +- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); ++ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); + + ret.id = reg_idx; + ret.allocation_size = 1; +@@ -4530,8 +4621,8 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + return ret; + } + +-static bool is_range_available(const struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) ++static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, ++ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) + { + unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; + unsigned int writemask; +@@ -4539,18 +4630,18 @@ static bool is_range_available(const struct register_allocator *allocator, + + for (i = 0; i < (reg_size / 4); ++i) + { +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); + if (writemask != VKD3DSP_WRITEMASK_ALL) + return false; + } +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4)); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); + if ((writemask & last_reg_mask) != last_reg_mask) + return false; + return true; + } + + static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, unsigned int reg_size) ++ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4558,14 +4649,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo + + for (reg_idx = 0;; ++reg_idx) + { +- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) ++ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) + break; + } + + for (i = 0; i < reg_size / 4; ++i) +- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); ++ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); + if (reg_size % 4) +- record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, first_write, last_read); ++ record_allocation(ctx, allocator, reg_idx + (reg_size / 4), ++ (1u << (reg_size % 4)) - 1, first_write, last_read, mode); + + ret.id = reg_idx; + ret.allocation_size = align(reg_size, 4) / 4; +@@ -4581,9 +4673,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + /* FIXME: We could potentially pack structs or arrays more efficiently... */ + + if (type->class <= HLSL_CLASS_VECTOR) +- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); ++ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); + else +- return allocate_range(ctx, allocator, first_write, last_read, reg_size); ++ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); + } + + static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) +@@ -4762,7 +4854,7 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, + + if (reg_writemask) + instr->reg = allocate_register_with_masks(ctx, allocator, +- instr->index, instr->last_read, reg_writemask, dst_writemask); ++ instr->index, instr->last_read, reg_writemask, dst_writemask, 0); + else + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, + instr->index, instr->last_read, instr->data_type); +@@ -5083,14 +5175,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + { + if (i < bind_count) + { +- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) ++ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } +- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); ++ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + } +- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); ++ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + } + + var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; +@@ -5113,7 +5205,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { +- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } +@@ -5156,7 +5248,7 @@ uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_d + var = entry_func->parameters.vars[i]; + if (var->is_output_semantic) + { +- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); ++ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); + break; + } + } +@@ -5168,8 +5260,38 @@ uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_d + return allocator.reg_count; + } + ++enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers) ++{ ++ unsigned int i; ++ ++ static const struct ++ { ++ unsigned int modifiers; ++ enum vkd3d_shader_interpolation_mode mode; ++ } ++ modes[] = ++ { ++ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID}, ++ {HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE}, ++ {HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID}, ++ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID}, ++ }; ++ ++ if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) ++ || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT) ++ return VKD3DSIM_CONSTANT; ++ ++ for (i = 0; i < ARRAY_SIZE(modes); ++i) ++ { ++ if ((storage_modifiers & modes[i].modifiers) == modes[i].modifiers) ++ return modes[i].mode; ++ } ++ ++ return VKD3DSIM_LINEAR; ++} ++ + static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, +- unsigned int *counter, bool output, bool is_patch_constant_func) ++ struct register_allocator *allocator, bool output, bool optimize, bool is_patch_constant_func) + { + static const char *const shader_names[] = + { +@@ -5228,6 +5350,13 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) + reg = has_idx ? var->semantic.index : 0; ++ ++ if (semantic == VKD3D_SHADER_SV_TESS_FACTOR_TRIINT) ++ { ++ /* While SV_InsideTessFactor can be declared as 'float' for "tri" ++ * domains, it is allocated as if it was 'float[1]'. */ ++ var->force_align = true; ++ } + } + + if (builtin) +@@ -5237,28 +5366,39 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + } + else + { +- var->regs[HLSL_REGSET_NUMERIC].allocated = true; +- var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; +- var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; +- var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; +- TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', +- var->regs[HLSL_REGSET_NUMERIC], var->data_type)); ++ int mode = (ctx->profile->major_version < 4) ++ ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); ++ unsigned int reg_size = optimize ? var->data_type->dimx : 4; ++ ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, ++ UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); ++ ++ TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', ++ var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); + } + } + + static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { ++ struct register_allocator input_allocator = {0}, output_allocator = {0}; ++ bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; ++ bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; + bool is_patch_constant_func = entry_func == ctx->patch_constant_func; +- unsigned int input_counter = 0, output_counter = 0; + struct hlsl_ir_var *var; + ++ input_allocator.prioritize_smaller_writemasks = true; ++ output_allocator.prioritize_smaller_writemasks = true; ++ + LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_input_semantic) +- allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); ++ allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader, is_patch_constant_func); + if (var->is_output_semantic) +- allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); ++ allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader, is_patch_constant_func); + } ++ ++ vkd3d_free(input_allocator.allocations); ++ vkd3d_free(output_allocator.allocations); + } + + static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, +@@ -6282,7 +6422,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + } + + static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, +- struct shader_signature *signature, bool output, struct hlsl_ir_var *var) ++ struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) + { + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + enum vkd3d_shader_component_type component_type; +@@ -6296,9 +6436,8 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + struct vkd3d_string_buffer *string; + bool has_idx, ret; + +- ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, +- ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, +- output, signature == &program->patch_constant_signature); ++ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, ++ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + VKD3D_ASSERT(ret); + if (sysval == ~0u) + return; +@@ -6306,16 +6445,15 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) + { + register_index = has_idx ? var->semantic.index : ~0u; ++ mask = (1u << var->data_type->dimx) - 1; + } + else + { + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ mask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + +- /* NOTE: remember to change this to the actually allocated mask once +- * we start optimizing interstage signatures. */ +- mask = (1u << var->data_type->dimx) - 1; + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + + switch (var->data_type->e.numeric.type) +@@ -6410,21 +6548,27 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog + static void generate_vsir_signature(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_function_decl *func) + { ++ bool is_domain = program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; ++ bool is_patch_constant_func = func == ctx->patch_constant_func; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (func == ctx->patch_constant_func) ++ if (var->is_input_semantic) + { +- generate_vsir_signature_entry(ctx, program, +- &program->patch_constant_signature, var->is_output_semantic, var); ++ if (is_patch_constant_func) ++ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, true, var); ++ else if (is_domain) ++ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, false, var); ++ else ++ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, false, var); + } +- else ++ if (var->is_output_semantic) + { +- if (var->is_input_semantic) +- generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); +- if (var->is_output_semantic) +- generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); ++ if (is_patch_constant_func) ++ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, true, var); ++ else ++ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, false, var); + } + } + } +@@ -7366,7 +7510,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; +@@ -7404,7 +7548,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; +@@ -7715,6 +7859,78 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru + return true; + } + ++static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *call, *rhs, *store; ++ struct hlsl_ir_function_decl *func; ++ unsigned int component_count; ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_expr *expr; ++ struct hlsl_ir_var *lhs; ++ char *body; ++ ++ static const char template[] = ++ "typedef uint%u uintX;\n" ++ "uintX soft_f32tof16(float%u x)\n" ++ "{\n" ++ " uintX v = asuint(x);\n" ++ " uintX v_abs = v & 0x7fffffff;\n" ++ " uintX sign_bit = (v >> 16) & 0x8000;\n" ++ " uintX exp = (v >> 23) & 0xff;\n" ++ " uintX mantissa = v & 0x7fffff;\n" ++ " uintX nan16;\n" ++ " uintX nan = (v & 0x7f800000) == 0x7f800000;\n" ++ " uintX val;\n" ++ "\n" ++ " val = 113 - exp;\n" ++ " val = (mantissa + 0x800000) >> val;\n" ++ " val >>= 13;\n" ++ "\n" ++ " val = (exp - 127) < -38 ? 0 : val;\n" ++ "\n" ++ " val = v_abs < 0x38800000 ? val : (v_abs + 0xc8000000) >> 13;\n" ++ " val = v_abs > 0x47ffe000 ? 0x7bff : val;\n" ++ "\n" ++ " nan16 = (((v >> 13) | (v >> 3) | v) & 0x3ff) + 0x7c00;\n" ++ " val = nan ? nan16 : val;\n" ++ "\n" ++ " return (val & 0x7fff) + sign_bit;\n" ++ "}\n"; ++ ++ if (node->type != HLSL_IR_EXPR) ++ return false; ++ ++ expr = hlsl_ir_expr(node); ++ ++ if (expr->op != HLSL_OP1_F32TOF16) ++ return false; ++ ++ rhs = expr->operands[0].node; ++ component_count = hlsl_type_component_count(rhs->data_type); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) ++ return false; ++ ++ if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body))) ++ return false; ++ ++ lhs = func->parameters.vars[0]; ++ ++ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) ++ return false; ++ hlsl_block_add_instr(block, store); ++ ++ if (!(call = hlsl_new_call(ctx, func, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, call); ++ ++ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, &load->node); ++ ++ return true; ++} ++ + static void process_entry_function(struct hlsl_ctx *ctx, + const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) + { +@@ -7743,7 +7959,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, + return; + + if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) ++ { + lower_ir(ctx, lower_f16tof32, body); ++ lower_ir(ctx, lower_f32tof16, body); ++ } + + lower_return(ctx, entry_func, body, false); + +@@ -7797,6 +8016,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } ++ else ++ { ++ hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); ++ } + + transform_unroll_loops(ctx, body); + hlsl_run_const_passes(ctx, body); +@@ -7893,6 +8116,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); ++ else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, ++ "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); + + hlsl_block_init(&global_uniform_block); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index b47f12d2188..19dc36d9191 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -75,7 +75,7 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil + + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, +- bool normalised_io) ++ enum vsir_normalisation_level normalisation_level) + { + memset(program, 0, sizeof(*program)); + +@@ -98,8 +98,7 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c + + program->shader_version = *version; + program->cf_type = cf_type; +- program->normalised_io = normalised_io; +- program->normalised_hull_cp_io = normalised_io; ++ program->normalisation_level = normalisation_level; + return shader_instruction_array_init(&program->instructions, reserve); + } + +@@ -265,6 +264,13 @@ static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigne + dst->reg.idx[0].offset = idx; + } + ++static void dst_param_init_temp_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ dst->reg.idx[0].offset = idx; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++} ++ + static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +@@ -693,6 +699,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_CONSTANT_BUFFER: ++ case VKD3DSIH_DCL_GLOBAL_FLAGS: + case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: + case VKD3DSIH_DCL_THREAD_GROUP: +@@ -1135,11 +1142,11 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + enum vkd3d_result ret; + unsigned int i, j; + +- VKD3D_ASSERT(!program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); + + if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + { +- program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; + return VKD3D_OK; + } + +@@ -1186,7 +1193,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + program->instructions = normaliser.instructions; +- program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; + return VKD3D_OK; + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +@@ -1195,7 +1202,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, + input_control_point_count, i, &location); + program->instructions = normaliser.instructions; +- program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; + return ret; + default: + break; +@@ -1203,7 +1210,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + } + + program->instructions = normaliser.instructions; +- program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; + return VKD3D_OK; + } + +@@ -1917,7 +1924,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + struct vkd3d_shader_instruction *ins; + unsigned int i; + +- VKD3D_ASSERT(!program->normalised_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO); + + normaliser.phase = VKD3DSIH_INVALID; + normaliser.shader_type = program->shader_version.type; +@@ -1975,7 +1982,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + + program->instructions = normaliser.instructions; + program->use_vocp = normaliser.use_vocp; +- program->normalised_io = true; ++ program->normalisation_level = VSIR_FULLY_NORMALISED_IO; + return VKD3D_OK; + } + +@@ -6133,6 +6140,192 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra + return VKD3D_OK; + } + ++static bool has_texcoord_signature_element(const struct shader_signature *signature) ++{ ++ for (size_t i = 0; i < signature->element_count; ++i) ++ { ++ if (!ascii_strcasecmp(signature->elements[i].semantic_name, "TEXCOORD")) ++ return true; ++ } ++ return false; ++} ++ ++/* Returns true if replacement was done. */ ++static bool replace_texcoord_with_point_coord(struct vsir_program *program, ++ struct vkd3d_shader_src_param *src, unsigned int coord_temp) ++{ ++ uint32_t prev_swizzle = src->swizzle; ++ const struct signature_element *e; ++ ++ /* The input semantic may have a nontrivial mask, which we need to ++ * correct for. E.g. if the mask is .yz, and we read from .y, that needs ++ * to become .x. */ ++ static const uint32_t inverse_swizzles[16] = ++ { ++ /* Use _ for "undefined" components, for clarity. */ ++#define VKD3D_SHADER_SWIZZLE__ VKD3D_SHADER_SWIZZLE_X ++ 0, ++ /* .x */ VKD3D_SHADER_SWIZZLE(X, _, _, _), ++ /* .y */ VKD3D_SHADER_SWIZZLE(_, X, _, _), ++ /* .xy */ VKD3D_SHADER_SWIZZLE(X, Y, _, _), ++ /* .z */ VKD3D_SHADER_SWIZZLE(_, _, X, _), ++ /* .xz */ VKD3D_SHADER_SWIZZLE(X, _, Y, _), ++ /* .yz */ VKD3D_SHADER_SWIZZLE(_, X, Y, _), ++ /* .xyz */ VKD3D_SHADER_SWIZZLE(X, Y, Z, _), ++ /* .w */ VKD3D_SHADER_SWIZZLE(_, _, _, X), ++ /* .xw */ VKD3D_SHADER_SWIZZLE(X, _, _, Y), ++ /* .yw */ VKD3D_SHADER_SWIZZLE(_, X, _, Y), ++ /* .xyw */ VKD3D_SHADER_SWIZZLE(X, Y, _, Z), ++ /* .zw */ VKD3D_SHADER_SWIZZLE(_, _, X, Y), ++ /* .xzw */ VKD3D_SHADER_SWIZZLE(X, _, Y, Z), ++ /* .yzw */ VKD3D_SHADER_SWIZZLE(_, X, Y, Z), ++ /* .xyzw */ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), ++#undef VKD3D_SHADER_SWIZZLE__ ++ }; ++ ++ if (src->reg.type != VKD3DSPR_INPUT) ++ return false; ++ e = &program->input_signature.elements[src->reg.idx[0].offset]; ++ ++ if (ascii_strcasecmp(e->semantic_name, "TEXCOORD")) ++ return false; ++ ++ src->reg.type = VKD3DSPR_TEMP; ++ src->reg.idx[0].offset = coord_temp; ++ ++ /* If the mask is already contiguous and zero-based, no need to remap ++ * the swizzle. */ ++ if (!(e->mask & (e->mask + 1))) ++ return true; ++ ++ src->swizzle = 0; ++ for (unsigned int i = 0; i < 4; ++i) ++ { ++ src->swizzle |= vsir_swizzle_get_component(inverse_swizzles[e->mask], ++ vsir_swizzle_get_component(prev_swizzle, i)) << VKD3D_SHADER_SWIZZLE_SHIFT(i); ++ } ++ ++ return true; ++} ++ ++static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ const struct vkd3d_shader_parameter1 *sprite_parameter = NULL; ++ static const struct vkd3d_shader_location no_loc; ++ struct vkd3d_shader_instruction *ins; ++ bool used_texcoord = false; ++ unsigned int coord_temp; ++ size_t i, insert_pos; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; ++ ++ for (i = 0; i < program->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; ++ ++ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE) ++ sprite_parameter = parameter; ++ } ++ ++ if (!sprite_parameter) ++ return VKD3D_OK; ++ ++ if (sprite_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported point sprite parameter type %#x.", sprite_parameter->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (sprite_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid point sprite parameter data type %#x.", sprite_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ if (!sprite_parameter->u.immediate_constant.u.u32) ++ return VKD3D_OK; ++ ++ if (!has_texcoord_signature_element(&program->input_signature)) ++ return VKD3D_OK; ++ ++ /* VKD3DSPR_POINTCOORD is a two-component value; fill the remaining two ++ * components with zeroes. */ ++ coord_temp = program->temp_count++; ++ ++ /* Construct the new temp after all LABEL, DCL, and NOP instructions. ++ * We need to skip NOP instructions because they might result from removed ++ * DCLs, and there could still be DCLs after NOPs. */ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) ++ break; ++ } ++ ++ insert_pos = i; ++ ++ /* Replace each texcoord read with a read from the point coord. */ ++ for (; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ for (unsigned int j = 0; j < ins->src_count; ++j) ++ { ++ used_texcoord |= replace_texcoord_with_point_coord(program, &ins->src[j], coord_temp); ++ ++ for (unsigned int k = 0; k < ins->src[j].reg.idx_count; ++k) ++ { ++ if (ins->src[j].reg.idx[k].rel_addr) ++ used_texcoord |= replace_texcoord_with_point_coord(program, ++ ins->src[j].reg.idx[k].rel_addr, coord_temp); ++ } ++ } ++ ++ for (unsigned int j = 0; j < ins->dst_count; ++j) ++ { ++ for (unsigned int k = 0; k < ins->dst[j].reg.idx_count; ++k) ++ { ++ if (ins->dst[j].reg.idx[k].rel_addr) ++ used_texcoord |= replace_texcoord_with_point_coord(program, ++ ins->dst[j].reg.idx[k].rel_addr, coord_temp); ++ } ++ } ++ } ++ ++ if (used_texcoord) ++ { ++ if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[insert_pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_temp_float4(&ins->dst[0], coord_temp); ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ins; ++ ++ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_temp_float4(&ins->dst[0], coord_temp); ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3; ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ins; ++ ++ program->has_point_coord = true; ++ } ++ ++ return VKD3D_OK; ++} ++ + struct validation_context + { + struct vkd3d_shader_message_context *message_context; +@@ -6234,15 +6427,11 @@ static void vsir_validate_io_register(struct validation_context *ctx, + switch (ctx->program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: +- if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE ++ || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) + { + signature = &ctx->program->output_signature; +- has_control_point = ctx->program->normalised_hull_cp_io; +- } +- else if (ctx->program->normalised_io) +- { +- signature = &ctx->program->output_signature; +- has_control_point = true; ++ has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; + } + else + { +@@ -6274,7 +6463,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, + vkd3d_unreachable(); + } + +- if (!ctx->program->normalised_io) ++ if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) + { + /* Indices are [register] or [control point, register]. Both are + * allowed to have a relative address. */ +@@ -7700,8 +7889,10 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + + switch (program->shader_version.type) + { +- case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: ++ break; ++ ++ case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_GEOMETRY: + if (program->input_control_point_count == 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, +@@ -7718,9 +7909,6 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: +- if (program->output_control_point_count == 0) +- validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, +- "Invalid zero output control point count."); + break; + + default: +@@ -7844,6 +8032,7 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t + vsir_transform(&ctx, vsir_program_insert_clip_planes); + vsir_transform(&ctx, vsir_program_insert_point_size); + vsir_transform(&ctx, vsir_program_insert_point_size_clamp); ++ vsir_transform(&ctx, vsir_program_insert_point_coord); + + if (TRACE_ON()) + vsir_program_trace(program); +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index 5baefbc1f44..a0dbb06342d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -785,6 +785,10 @@ static void msl_generator_generate(struct msl_generator *gen) + + vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + ++ if (gen->program->global_flags) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)gen->program->global_flags); ++ + vkd3d_string_buffer_printf(gen->buffer, "union vkd3d_vec4\n{\n"); + vkd3d_string_buffer_printf(gen->buffer, " uint4 u;\n"); + vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); +@@ -869,8 +873,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + +- VKD3D_ASSERT(program->normalised_io); +- VKD3D_ASSERT(program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + + if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) + return ret; +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 6a28e2cd68e..802fe221747 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -3252,6 +3252,9 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s + case VKD3DSPR_WAVELANEINDEX: + snprintf(buffer, buffer_size, "vWaveLaneIndex"); + break; ++ case VKD3DSPR_POINT_COORD: ++ snprintf(buffer, buffer_size, "vPointCoord"); ++ break; + default: + FIXME("Unhandled register %#x.\n", reg->type); + snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); +@@ -4886,6 +4889,8 @@ vkd3d_register_builtins[] = + + {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, + ++ {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, ++ + {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + +@@ -5907,11 +5912,8 @@ static size_t spirv_compiler_get_current_function_location(struct spirv_compiler + return builder->main_function_location; + } + +-static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_global_flags(struct spirv_compiler *compiler, enum vsir_global_flags flags) + { +- enum vkd3d_shader_global_flags flags = instruction->declaration.global_flags; +- + if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) + { + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeEarlyFragmentTests, NULL, 0); +@@ -10180,9 +10182,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + + switch (instruction->opcode) + { +- case VKD3DSIH_DCL_GLOBAL_FLAGS: +- spirv_compiler_emit_dcl_global_flags(compiler, instruction); +- break; + case VKD3DSIH_DCL_INDEXABLE_TEMP: + spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); + break; +@@ -10596,6 +10595,14 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) + dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + spirv_compiler_emit_output_register(compiler, &dst); + } ++ ++ if (compiler->program->has_point_coord) ++ { ++ struct vkd3d_shader_dst_param dst; ++ ++ vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); ++ spirv_compiler_emit_input_register(compiler, &dst); ++ } + } + + static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) +@@ -10650,8 +10657,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + compile_info, compiler->message_context)) < 0) + return result; + +- VKD3D_ASSERT(program->normalised_io); +- VKD3D_ASSERT(program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); + if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) +@@ -10663,6 +10669,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); + if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) + spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); ++ spirv_compiler_emit_global_flags(compiler, program->global_flags); + + spirv_compiler_emit_descriptor_declarations(compiler); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 848e78a34d3..f96d300676c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -1212,9 +1212,10 @@ static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction * + } + + static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) + { + ins->declaration.global_flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; ++ sm4->p.program->global_flags = ins->declaration.global_flags; + } + + static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +@@ -2793,7 +2794,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vsir_program_init(program, compile_info, +- &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, false)) ++ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + return false; + vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); + sm4->ptr = sm4->start; +@@ -3017,6 +3018,9 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, + ++ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_TESSCOORD, false}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, ++ + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, + + {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, +@@ -3115,6 +3119,12 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + ++ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, ++ ++ {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, ++ + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, +@@ -3179,6 +3189,16 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + return false; + } + } ++ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ { ++ if (!output) ++ { ++ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) ++ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) ++ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ } ++ } + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { +@@ -3213,18 +3233,37 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + ctx->result = buffer->status; + } + ++static int signature_element_pointer_compare(const void *x, const void *y) ++{ ++ const struct signature_element *e = *(const struct signature_element **)x; ++ const struct signature_element *f = *(const struct signature_element **)y; ++ int ret; ++ ++ if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) ++ return ret; ++ return vkd3d_u32_compare(e->mask, f->mask); ++} ++ + static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) + { +- bool output = tag == TAG_OSGN || tag == TAG_PCSG; ++ bool output = tag == TAG_OSGN || (tag == TAG_PCSG ++ && tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL); ++ const struct signature_element **sorted_elements; + struct vkd3d_bytecode_buffer buffer = {0}; + unsigned int i; + + put_u32(&buffer, signature->element_count); + put_u32(&buffer, 8); /* unknown */ + ++ if (!(sorted_elements = vkd3d_calloc(signature->element_count, sizeof(*sorted_elements)))) ++ return; ++ for (i = 0; i < signature->element_count; ++i) ++ sorted_elements[i] = &signature->elements[i]; ++ qsort(sorted_elements, signature->element_count, sizeof(*sorted_elements), signature_element_pointer_compare); ++ + for (i = 0; i < signature->element_count; ++i) + { +- const struct signature_element *element = &signature->elements[i]; ++ const struct signature_element *element = sorted_elements[i]; + enum vkd3d_shader_sysval_semantic sysval; + uint32_t used_mask = element->used_mask; + +@@ -3245,7 +3284,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si + + for (i = 0; i < signature->element_count; ++i) + { +- const struct signature_element *element = &signature->elements[i]; ++ const struct signature_element *element = sorted_elements[i]; + size_t string_offset; + + string_offset = put_string(&buffer, element->semantic_name); +@@ -3253,6 +3292,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si + } + + add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); ++ vkd3d_free(sorted_elements); + } + + static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +@@ -3410,13 +3450,19 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) + + static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) + { +- switch (type->e.resource.format->e.numeric.type) ++ const struct hlsl_type *format = type->e.resource.format; ++ ++ switch (format->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + return VKD3D_SM4_DATA_DOUBLE; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: ++ if (format->modifiers & HLSL_MODIFIER_UNORM) ++ return VKD3D_SM4_DATA_UNORM; ++ if (format->modifiers & HLSL_MODIFIER_SNORM) ++ return VKD3D_SM4_DATA_SNORM; + return VKD3D_SM4_DATA_FLOAT; + + case HLSL_TYPE_INT: +@@ -4224,7 +4270,11 @@ static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); +- reg->type = VKD3DSPR_INPUT; ++ ++ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ reg->type = VKD3DSPR_PATCHCONST; ++ else ++ reg->type = VKD3DSPR_INPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; +@@ -4818,7 +4868,13 @@ static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, + } + else + { +- instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ if (output) ++ instr.dsts[0].reg.type = VKD3DSPR_OUTPUT; ++ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ instr.dsts[0].reg.type = VKD3DSPR_PATCHCONST; ++ else ++ instr.dsts[0].reg.type = VKD3DSPR_INPUT; ++ + instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; +@@ -4858,38 +4914,9 @@ static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, + + if (version->type == VKD3D_SHADER_TYPE_PIXEL) + { +- enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; +- +- if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) +- { +- mode = VKD3DSIM_CONSTANT; +- } +- else +- { +- static const struct +- { +- unsigned int modifiers; +- enum vkd3d_shader_interpolation_mode mode; +- } +- modes[] = +- { +- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID }, +- { HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE }, +- { HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID }, +- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID }, +- }; +- unsigned int i; +- +- for (i = 0; i < ARRAY_SIZE(modes); ++i) +- { +- if ((var->storage_modifiers & modes[i].modifiers) == modes[i].modifiers) +- { +- mode = modes[i].mode; +- break; +- } +- } +- } ++ enum vkd3d_shader_interpolation_mode mode; + ++ mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); + instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; + } + } +@@ -5667,6 +5694,12 @@ static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_ + write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); + break; + ++ case HLSL_OP1_F32TOF16: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_F32TOF16, &expr->node, arg1, 0); ++ break; ++ + case HLSL_OP1_FLOOR: + VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); +@@ -6592,6 +6625,11 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec + tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); + tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); + } ++ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ { ++ tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ ++ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); ++ } + + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { +@@ -6717,6 +6755,7 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { ++ enum vkd3d_shader_type shader_type = program->shader_version.type; + struct tpf_compiler tpf = {0}; + struct sm4_stat stat = {0}; + size_t i; +@@ -6731,7 +6770,7 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, + + tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); + tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); +- if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) ++ if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) + tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); + write_sm4_rdef(ctx, &tpf.dxbc); + tpf_write_shdr(&tpf, entry_func); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 9df538a0da0..d6c68155ee7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -648,6 +648,7 @@ enum vkd3d_shader_register_type + VKD3DSPR_WAVELANECOUNT, + VKD3DSPR_WAVELANEINDEX, + VKD3DSPR_PARAMETER, ++ VKD3DSPR_POINT_COORD, + + VKD3DSPR_COUNT, + +@@ -773,7 +774,7 @@ enum vkd3d_shader_interpolation_mode + VKD3DSIM_COUNT = 8, + }; + +-enum vkd3d_shader_global_flags ++enum vsir_global_flags + { + VKD3DSGF_REFACTORING_ALLOWED = 0x01, + VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = 0x02, +@@ -1246,7 +1247,7 @@ struct vkd3d_shader_instruction + const struct vkd3d_shader_src_param *predicate; + union + { +- enum vkd3d_shader_global_flags global_flags; ++ enum vsir_global_flags global_flags; + struct vkd3d_shader_semantic semantic; + struct vkd3d_shader_register_semantic register_semantic; + struct vkd3d_shader_primitive_type primitive_type; +@@ -1393,6 +1394,13 @@ enum vsir_control_flow_type + VSIR_CF_BLOCKS, + }; + ++enum vsir_normalisation_level ++{ ++ VSIR_NOT_NORMALISED, ++ VSIR_NORMALISED_HULL_CONTROL_POINT_IO, ++ VSIR_FULLY_NORMALISED_IO, ++}; ++ + struct vsir_program + { + struct vkd3d_shader_version shader_version; +@@ -1412,11 +1420,12 @@ struct vsir_program + unsigned int block_count; + unsigned int temp_count; + unsigned int ssa_count; ++ enum vsir_global_flags global_flags; + bool use_vocp; + bool has_point_size; ++ bool has_point_coord; + enum vsir_control_flow_type cf_type; +- bool normalised_io; +- bool normalised_hull_cp_io; ++ enum vsir_normalisation_level normalisation_level; + + const char **block_names; + size_t block_name_count; +@@ -1430,7 +1439,7 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( + const struct vsir_program *program, enum vkd3d_shader_parameter_name name); + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, +- bool normalised_io); ++ enum vsir_normalisation_level normalisation_level); + enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 5495809fcb9..ed4cc370639 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -2005,6 +2005,8 @@ static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *li + + vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size, + state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views)); ++ memset(bindings->vk_uav_counter_views, 0, ++ state->uav_counters.binding_count * sizeof(*bindings->vk_uav_counter_views)); + bindings->uav_counters_dirty = true; + } + } +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index 65339c7ba5d..fd0ca20838f 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -1573,6 +1573,111 @@ static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, + return S_OK; + } + ++static void vkd3d_override_caps(struct d3d12_device *device) ++{ ++ const char *caps_override, *p; ++ ++ static const struct override_value ++ { ++ const char *str; ++ uint32_t value; ++ } ++ feature_level_override_values[] = ++ { ++ {"11.0", D3D_FEATURE_LEVEL_11_0}, ++ {"11.1", D3D_FEATURE_LEVEL_11_1}, ++ {"12.0", D3D_FEATURE_LEVEL_12_0}, ++ {"12.1", D3D_FEATURE_LEVEL_12_1}, ++ {"12.2", D3D_FEATURE_LEVEL_12_2}, ++ }, ++ resource_binding_tier_override_values[] = ++ { ++ {"1", D3D12_RESOURCE_BINDING_TIER_1}, ++ {"2", D3D12_RESOURCE_BINDING_TIER_2}, ++ {"3", D3D12_RESOURCE_BINDING_TIER_3}, ++ }; ++ static const struct override_field ++ { ++ const char *name; ++ size_t offset; ++ const struct override_value *values; ++ size_t value_count; ++ } ++ override_fields[] = ++ { ++ { ++ "feature_level", ++ offsetof(struct d3d12_device, vk_info.max_feature_level), ++ feature_level_override_values, ++ ARRAY_SIZE(feature_level_override_values) ++ }, ++ { ++ "resource_binding_tier", ++ offsetof(struct d3d12_device, feature_options.ResourceBindingTier), ++ resource_binding_tier_override_values, ++ ARRAY_SIZE(resource_binding_tier_override_values) ++ }, ++ }; ++ ++ if (!(caps_override = getenv("VKD3D_CAPS_OVERRIDE"))) ++ return; ++ ++ p = caps_override; ++ for (;;) ++ { ++ size_t i; ++ ++ for (i = 0; i < ARRAY_SIZE(override_fields); ++i) ++ { ++ const struct override_field *field = &override_fields[i]; ++ size_t len = strlen(field->name); ++ ++ if (strncmp(p, field->name, len) == 0 && p[len] == '=') ++ { ++ size_t j; ++ ++ p += len + 1; ++ ++ for (j = 0; j < field->value_count; ++j) ++ { ++ const struct override_value *value = &field->values[j]; ++ size_t value_len = strlen(value->str); ++ ++ if (strncmp(p, value->str, value_len) == 0 ++ && (p[value_len] == '\0' || p[value_len] == ',')) ++ { ++ memcpy(&((uint8_t *)device)[field->offset], (uint8_t *)&value->value, sizeof(value->value)); ++ ++ p += value_len; ++ if (p[0] == '\0') ++ { ++ TRACE("Overriding caps with: %s\n", caps_override); ++ return; ++ } ++ p += 1; ++ ++ break; ++ } ++ } ++ ++ if (j == field->value_count) ++ { ++ WARN("Cannot parse the override caps string: %s\n", caps_override); ++ return; ++ } ++ ++ break; ++ } ++ } ++ ++ if (i == ARRAY_SIZE(override_fields)) ++ { ++ WARN("Cannot parse the override caps string: %s\n", caps_override); ++ return; ++ } ++ } ++} ++ + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + const struct vkd3d_device_create_info *create_info, + struct vkd3d_physical_device_info *physical_device_info, +@@ -1742,6 +1847,9 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + vulkan_info->EXT_shader_viewport_index_layer; + + vkd3d_init_feature_level(vulkan_info, features, &device->feature_options); ++ ++ vkd3d_override_caps(device); ++ + if (vulkan_info->max_feature_level < create_info->minimum_feature_level) + { + WARN("Feature level %#x is not supported.\n", create_info->minimum_feature_level); +@@ -1810,6 +1918,26 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + && descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind + && descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind; + ++ /* Many Vulkan implementations allow up to 8 descriptor sets. Unfortunately ++ * using vkd3d with Vulkan heaps and push descriptors currently requires up ++ * to 9 descriptor sets (up to one for the push descriptors, up to one for ++ * the static samplers and seven for Vulkan heaps, one for each needed ++ * descriptor type). If we detect such situation, we disable push ++ * descriptors, which allows us to stay within the limits (not doing so is ++ * fatal on many implmentations). ++ * ++ * It is possible that a different strategy might be used. For example, we ++ * could move the static samplers to one of the seven Vulkan heaps sets. Or ++ * we could decide whether to create the push descriptor set when creating ++ * the root signature, depending on whether there are static samplers or ++ * not. */ ++ if (device->vk_info.device_limits.maxBoundDescriptorSets == 8 && device->use_vk_heaps ++ && device->vk_info.KHR_push_descriptor) ++ { ++ TRACE("Disabling VK_KHR_push_descriptor to save a descriptor set.\n"); ++ device->vk_info.KHR_push_descriptor = VK_FALSE; ++ } ++ + if (device->use_vk_heaps) + vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->descriptor_indexing_properties); +@@ -1817,6 +1945,13 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits); + ++ TRACE("Device %p: using %s descriptor heaps, with%s descriptor indexing, " ++ "with%s push descriptors, with%s mutable descriptors\n", ++ device, device->use_vk_heaps ? "Vulkan" : "virtual", ++ device->vk_info.EXT_descriptor_indexing ? "" : "out", ++ device->vk_info.KHR_push_descriptor ? "" : "out", ++ device->vk_info.EXT_mutable_descriptor_type ? "" : "out"); ++ + vkd3d_chain_physical_device_info_structures(physical_device_info, device); + + return S_OK; +-- +2.45.2 + diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch deleted file mode 100644 index e1770dd8..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch +++ /dev/null @@ -1,1718 +0,0 @@ -From 1652829e1c0845b53db7cc789c6ea1043beb4f55 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 9 Oct 2024 20:40:25 +1100 -Subject: [PATCH] Updated vkd3d to cd74461d6dabae4e702de61a90533d811aa0a3fb. - ---- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 59 ++- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 2 - - libs/vkd3d/libs/vkd3d-shader/dxil.c | 10 +- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 254 ++++++++++--- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 - - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 103 ++--- - libs/vkd3d/libs/vkd3d-shader/ir.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/msl.c | 357 +++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/preproc.h | 3 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 53 +-- - libs/vkd3d/libs/vkd3d-shader/preproc.y | 13 - - libs/vkd3d/libs/vkd3d-shader/spirv.c | 17 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 7 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 2 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 10 +- - 16 files changed, 726 insertions(+), 176 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 9fe4b74486a..38d566d9fe0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -2268,7 +2268,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic - } - } - --static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, -+static enum vkd3d_result dump_dxbc_signature(struct vkd3d_d3d_asm_compiler *compiler, - const char *name, const char *register_name, const struct shader_signature *signature) - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; -@@ -2335,21 +2335,21 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, - return VKD3D_OK; - } - --static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, -+static enum vkd3d_result dump_dxbc_signatures(struct vkd3d_d3d_asm_compiler *compiler, - const struct vsir_program *program) - { - enum vkd3d_result ret; - -- if ((ret = dump_signature(compiler, ".input", -+ if ((ret = dump_dxbc_signature(compiler, ".input", - program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", - &program->input_signature)) < 0) - return ret; - -- if ((ret = dump_signature(compiler, ".output", "o", -+ if ((ret = dump_dxbc_signature(compiler, ".output", "o", - &program->output_signature)) < 0) - return ret; - -- if ((ret = dump_signature(compiler, ".patch_constant", -+ if ((ret = dump_dxbc_signature(compiler, ".patch_constant", - program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", - &program->patch_constant_signature)) < 0) - return ret; -@@ -2437,7 +2437,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - * doesn't even have an explicit concept of signature. */ - if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) - { -- if ((result = dump_signatures(&compiler, program)) < 0) -+ if ((result = dump_dxbc_signatures(&compiler, program)) < 0) - { - vkd3d_string_buffer_cleanup(buffer); - return result; -@@ -2499,12 +2499,57 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - return result; - } - --void vkd3d_shader_trace(const struct vsir_program *program) -+/* This is meant exclusively for development use. Therefore, differently from -+ * dump_dxbc_signature(), it doesn't try particularly hard to make the output -+ * nice or easily parsable, and it dumps all fields, not just the DXBC ones. -+ * This format isn't meant to be stable. */ -+static void trace_signature(const struct shader_signature *signature, const char *signature_type) -+{ -+ struct vkd3d_string_buffer buffer; -+ unsigned int i; -+ -+ TRACE("%s signature:%s\n", signature_type, signature->element_count == 0 ? " empty" : ""); -+ -+ vkd3d_string_buffer_init(&buffer); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ const struct signature_element *element = &signature->elements[i]; -+ -+ vkd3d_string_buffer_clear(&buffer); -+ -+ vkd3d_string_buffer_printf(&buffer, "Element %u: %s %u-%u %s", i, -+ get_component_type_name(element->component_type), -+ element->register_index, element->register_index + element->register_count, -+ element->semantic_name); -+ if (element->semantic_index != -1) -+ vkd3d_string_buffer_printf(&buffer, "%u", element->semantic_index); -+ vkd3d_string_buffer_printf(&buffer, -+ " mask %#x used_mask %#x sysval %s min_precision %s interpolation %u stream %u", -+ element->mask, element->used_mask, get_sysval_semantic_name(element->sysval_semantic), -+ get_minimum_precision_name(element->min_precision), element->interpolation_mode, -+ element->stream_index); -+ if (element->target_location != -1) -+ vkd3d_string_buffer_printf(&buffer, " target %u", element->target_location); -+ else -+ vkd3d_string_buffer_printf(&buffer, " unused"); -+ -+ TRACE("%s\n", buffer.buffer); -+ } -+ -+ vkd3d_string_buffer_cleanup(&buffer); -+} -+ -+void vsir_program_trace(const struct vsir_program *program) - { - const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; - struct vkd3d_shader_code code; - const char *p, *q, *end; - -+ trace_signature(&program->input_signature, "Input"); -+ trace_signature(&program->output_signature, "Output"); -+ trace_signature(&program->patch_constant_signature, "Patch-constant"); -+ - if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) - return; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 267cf410cbe..589b800f8c9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1390,7 +1390,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - WARN("Failed to validate shader after parsing, ret %d.\n", ret); - - if (TRACE_ON()) -- vkd3d_shader_trace(program); -+ vsir_program_trace(program); - - vsir_program_cleanup(program); - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 93fc993e0d1..f6ac8e0829e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -419,8 +419,6 @@ static int shader_parse_signature(const struct vkd3d_shader_dxbc_section_desc *s - const char *name; - uint32_t mask; - -- e[i].sort_index = i; -- - if (has_stream_index) - e[i].stream_index = read_u32(&ptr); - else -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index d4296ef4bc5..c66b059325a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -4174,6 +4174,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - const struct sm6_type *type_b, struct sm6_parser *sm6) - { - bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); -+ bool is_double = sm6_type_is_double(type_a); - bool is_bool = sm6_type_is_bool(type_a); - enum vkd3d_shader_opcode op; - bool is_valid; -@@ -4198,7 +4199,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - case BINOP_ADD: - case BINOP_SUB: - /* NEG is applied later for subtraction. */ -- op = is_int ? VKD3DSIH_IADD : VKD3DSIH_ADD; -+ op = is_int ? VKD3DSIH_IADD : (is_double ? VKD3DSIH_DADD : VKD3DSIH_ADD); - is_valid = !is_bool; - break; - case BINOP_AND: -@@ -4214,7 +4215,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - is_valid = is_int && !is_bool; - break; - case BINOP_MUL: -- op = is_int ? VKD3DSIH_UMUL : VKD3DSIH_MUL; -+ op = is_int ? VKD3DSIH_UMUL : (is_double ? VKD3DSIH_DMUL : VKD3DSIH_MUL); - is_valid = !is_bool; - break; - case BINOP_OR: -@@ -4222,7 +4223,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty - is_valid = is_int; - break; - case BINOP_SDIV: -- op = is_int ? VKD3DSIH_IDIV : VKD3DSIH_DIV; -+ op = is_int ? VKD3DSIH_IDIV : (is_double ? VKD3DSIH_DDIV : VKD3DSIH_DIV); - is_valid = !is_bool; - break; - case BINOP_SREM: -@@ -9637,6 +9638,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co - ins->declaration.thread_group_size.x = group_sizes[0]; - ins->declaration.thread_group_size.y = group_sizes[1]; - ins->declaration.thread_group_size.z = group_sizes[2]; -+ sm6->p.program->thread_group_size = ins->declaration.thread_group_size; - - return VKD3D_OK; - } -@@ -10592,7 +10594,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - WARN("Failed to validate shader after parsing, ret %d.\n", ret); - - if (TRACE_ON()) -- vkd3d_shader_trace(program); -+ vsir_program_trace(program); - - sm6_parser_cleanup(&sm6); - vsir_program_cleanup(program); -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index c2fb58c55e6..4dc95899a11 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -62,6 +62,9 @@ struct vkd3d_glsl_generator - const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - }; - -+static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *rel_addr, unsigned int offset); -+ - static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( - struct vkd3d_glsl_generator *generator, - enum vkd3d_shader_error error, const char *fmt, ...) -@@ -263,6 +266,11 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, - gen->prefix, reg->idx[0].offset, reg->idx[2].offset); - break; - -+ case VKD3DSPR_IDXTEMP: -+ vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); -+ shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); -+ break; -+ - default: - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled register type %#x.", reg->type); -@@ -438,6 +446,26 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener - return write_mask; - } - -+static void shader_glsl_print_subscript(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *rel_addr, unsigned int offset) -+{ -+ struct glsl_src r; -+ -+ if (!rel_addr) -+ { -+ vkd3d_string_buffer_printf(buffer, "[%u]", offset); -+ return; -+ } -+ -+ glsl_src_init(&r, gen, rel_addr, VKD3DSP_WRITEMASK_0); -+ vkd3d_string_buffer_printf(buffer, "[%s", r.str->buffer); -+ if (offset) -+ vkd3d_string_buffer_printf(buffer, " + %u", offset); -+ else -+ vkd3d_string_buffer_printf(buffer, "]"); -+ glsl_src_cleanup(&r, &gen->string_buffers); -+} -+ - static void VKD3D_PRINTF_FUNC(4, 0) shader_glsl_vprint_assignment(struct vkd3d_glsl_generator *gen, - struct glsl_dst *dst, enum vkd3d_data_type data_type, const char *format, va_list args) - { -@@ -903,19 +931,27 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st - switch (sysval) - { - case VKD3D_SHADER_SV_POSITION: -- if (version->type == VKD3D_SHADER_TYPE_PIXEL || version->type == VKD3D_SHADER_TYPE_COMPUTE) -+ if (version->type == VKD3D_SHADER_TYPE_COMPUTE) - { - vkd3d_string_buffer_printf(buffer, "", sysval); - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled system value %#x.", sysval); -+ "Internal compiler error: Unhandled SV_POSITION in shader type #%x.", version->type); -+ break; - } -+ if (idx) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_POSITION index %u.", idx); -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); - else -- { - vkd3d_string_buffer_printf(buffer, "gl_Position"); -- if (idx) -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled SV_POSITION index %u.", idx); -- } -+ break; -+ -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ if (version->type != VKD3D_SHADER_TYPE_VERTEX) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_VERTEX_ID in shader type #%x.", version->type); -+ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_VertexID, 0, 0, 0))"); - break; - - case VKD3D_SHADER_SV_IS_FRONT_FACE: -@@ -972,7 +1008,19 @@ static void shader_glsl_shader_prologue(struct vkd3d_glsl_generator *gen) - } - else - { -- vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i); -+ switch (e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, " = uintBitsToFloat(shader_in_%u)", i); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled input component type %#x.", e->component_type); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, " = shader_in_%u", i); -+ break; -+ } - } - } - else -@@ -989,6 +1037,7 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) - { - const struct shader_signature *signature = &gen->program->output_signature; - struct vkd3d_string_buffer *buffer = gen->buffer; -+ enum vkd3d_shader_component_type type; - const struct signature_element *e; - unsigned int i; - -@@ -999,11 +1048,13 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) - if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) - continue; - -+ type = e->component_type; - shader_glsl_print_indent(buffer, gen->indent); - if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) - { - if (gen->interstage_output) - { -+ type = VKD3D_SHADER_COMPONENT_FLOAT; - vkd3d_string_buffer_printf(buffer, "shader_out.reg_%u", e->target_location); - if (e->target_location >= gen->limits.output_count) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -@@ -1023,7 +1074,19 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) - shader_glsl_print_sysval_name(buffer, gen, e->sysval_semantic, e->semantic_index); - } - shader_glsl_print_write_mask(buffer, e->mask); -- vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); -+ switch (type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled output component type %#x.", e->component_type); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); -+ break; -+ } - shader_glsl_print_write_mask(buffer, e->mask); - vkd3d_string_buffer_printf(buffer, ";\n"); - } -@@ -1041,6 +1104,15 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d - } - } - -+static void shader_glsl_dcl_indexable_temp(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "vec4 x%u[%u];\n", -+ ins->declaration.indexable_temp.register_idx, -+ ins->declaration.indexable_temp.register_size); -+} -+ - static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - const struct vkd3d_shader_instruction *ins) - { -@@ -1049,14 +1121,19 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - switch (ins->opcode) - { - case VKD3DSIH_ADD: -+ case VKD3DSIH_IADD: - shader_glsl_binop(gen, ins, "+"); - break; - case VKD3DSIH_AND: - shader_glsl_binop(gen, ins, "&"); - break; -+ case VKD3DSIH_DCL_INDEXABLE_TEMP: -+ shader_glsl_dcl_indexable_temp(gen, ins); -+ break; - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_INPUT_PS: - case VKD3DSIH_DCL_INPUT_PS_SIV: -+ case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: -@@ -1079,6 +1156,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_ENDIF: - shader_glsl_endif(gen); - break; -+ case VKD3DSIH_EQO: - case VKD3DSIH_IEQ: - shader_glsl_relop(gen, ins, "==", "equal"); - break; -@@ -1108,26 +1186,30 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_LTO: - shader_glsl_relop(gen, ins, "<", "lessThan"); - break; -- case VKD3DSIH_IMUL: -- shader_glsl_mul_extended(gen, ins); -- break; -- case VKD3DSIH_ISHL: -- shader_glsl_binop(gen, ins, "<<"); -- break; -- case VKD3DSIH_ISHR: -- case VKD3DSIH_USHR: -- shader_glsl_binop(gen, ins, ">>"); -- break; -+ case VKD3DSIH_IMAX: - case VKD3DSIH_MAX: - shader_glsl_intrinsic(gen, ins, "max"); - break; - case VKD3DSIH_MIN: - shader_glsl_intrinsic(gen, ins, "min"); - break; -+ case VKD3DSIH_IMUL: -+ shader_glsl_mul_extended(gen, ins); -+ break; - case VKD3DSIH_INE: - case VKD3DSIH_NEU: - shader_glsl_relop(gen, ins, "!=", "notEqual"); - break; -+ case VKD3DSIH_INEG: -+ shader_glsl_unary_op(gen, ins, "-"); -+ break; -+ case VKD3DSIH_ISHL: -+ shader_glsl_binop(gen, ins, "<<"); -+ break; -+ case VKD3DSIH_ISHR: -+ case VKD3DSIH_USHR: -+ shader_glsl_binop(gen, ins, ">>"); -+ break; - case VKD3DSIH_ITOF: - case VKD3DSIH_UTOF: - shader_glsl_cast(gen, ins, "float", "vec"); -@@ -1485,15 +1567,62 @@ static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_gener - vkd3d_string_buffer_printf(gen->buffer, "\n"); - } - --static void shader_glsl_generate_interface_block(struct vkd3d_string_buffer *buffer, -- const char *type, unsigned int count) -+static const struct signature_element *signature_get_element_by_location( -+ const struct shader_signature *signature, unsigned int location) - { -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location != location) -+ continue; -+ -+ return e; -+ } -+ -+ return NULL; -+} -+ -+static const char *shader_glsl_get_interpolation(struct vkd3d_glsl_generator *gen, -+ const struct shader_signature *signature, const char *type, unsigned int location) -+{ -+ enum vkd3d_shader_interpolation_mode m; -+ const struct signature_element *e; -+ -+ if ((e = signature_get_element_by_location(signature, location))) -+ m = e->interpolation_mode; -+ else -+ m = VKD3DSIM_NONE; -+ -+ switch (m) -+ { -+ case VKD3DSIM_NONE: -+ case VKD3DSIM_LINEAR: -+ return ""; -+ case VKD3DSIM_CONSTANT: -+ return "flat "; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x for %s location %u.", m, type, location); -+ return ""; -+ } -+} -+ -+static void shader_glsl_generate_interface_block(struct vkd3d_glsl_generator *gen, -+ const struct shader_signature *signature, const char *type, unsigned int count) -+{ -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const char *interpolation; - unsigned int i; - - vkd3d_string_buffer_printf(buffer, "%s shader_in_out\n{\n", type); - for (i = 0; i < count; ++i) - { -- vkd3d_string_buffer_printf(buffer, " vec4 reg_%u;\n", i); -+ interpolation = shader_glsl_get_interpolation(gen, signature, type, i); -+ vkd3d_string_buffer_printf(buffer, " %svec4 reg_%u;\n", interpolation, i); - } - vkd3d_string_buffer_printf(buffer, "} shader_%s;\n", type); - } -@@ -1503,30 +1632,16 @@ static void shader_glsl_generate_input_declarations(struct vkd3d_glsl_generator - const struct shader_signature *signature = &gen->program->input_signature; - struct vkd3d_string_buffer *buffer = gen->buffer; - const struct signature_element *e; -- unsigned int i; -+ unsigned int i, count; - - if (!gen->interstage_input) - { -- for (i = 0; i < signature->element_count; ++i) -+ for (i = 0, count = 0; i < signature->element_count; ++i) - { - e = &signature->elements[i]; - -- if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -- continue; -- -- if (e->sysval_semantic) -- { -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); -- continue; -- } -- -- if (e->component_type != VKD3D_SHADER_COMPONENT_FLOAT) -- { -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled component type %#x.", e->component_type); -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED || e->sysval_semantic) - continue; -- } - - if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) - { -@@ -1542,15 +1657,32 @@ static void shader_glsl_generate_input_declarations(struct vkd3d_glsl_generator - continue; - } - -- vkd3d_string_buffer_printf(buffer, -- "layout(location = %u) in vec4 shader_in_%u;\n", e->target_location, i); -+ vkd3d_string_buffer_printf(buffer, "layout(location = %u) in ", e->target_location); -+ switch (e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uvec4"); -+ break; -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "vec4"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "", e->component_type); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled input component type %#x.", e->component_type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, " shader_in_%u;\n", i); -+ ++count; - } -+ if (count) -+ vkd3d_string_buffer_printf(buffer, "\n"); - } - else if (gen->limits.input_count) - { -- shader_glsl_generate_interface_block(buffer, "in", gen->limits.input_count); -+ shader_glsl_generate_interface_block(gen, signature, "in", gen->limits.input_count); -+ vkd3d_string_buffer_printf(buffer, "\n"); - } -- vkd3d_string_buffer_printf(buffer, "\n"); - } - - static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator *gen) -@@ -1558,11 +1690,11 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator - const struct shader_signature *signature = &gen->program->output_signature; - struct vkd3d_string_buffer *buffer = gen->buffer; - const struct signature_element *e; -- unsigned int i; -+ unsigned int i, count; - - if (!gen->interstage_output) - { -- for (i = 0; i < signature->element_count; ++i) -+ for (i = 0, count = 0; i < signature->element_count; ++i) - { - e = &signature->elements[i]; - -@@ -1576,13 +1708,6 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator - continue; - } - -- if (e->component_type != VKD3D_SHADER_COMPONENT_FLOAT) -- { -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled component type %#x.", e->component_type); -- continue; -- } -- - if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) - { - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -@@ -1597,15 +1722,32 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator - continue; - } - -- vkd3d_string_buffer_printf(buffer, -- "layout(location = %u) out vec4 shader_out_%u;\n", e->target_location, i); -+ vkd3d_string_buffer_printf(buffer, "layout(location = %u) out ", e->target_location); -+ switch (e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uvec4"); -+ break; -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "vec4"); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, "", e->component_type); -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled output component type %#x.", e->component_type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, " shader_out_%u;\n", i); -+ ++count; - } -+ if (count) -+ vkd3d_string_buffer_printf(buffer, "\n"); - } - else if (gen->limits.output_count) - { -- shader_glsl_generate_interface_block(buffer, "out", gen->limits.output_count); -+ shader_glsl_generate_interface_block(gen, signature, "out", gen->limits.output_count); -+ vkd3d_string_buffer_printf(buffer, "\n"); - } -- vkd3d_string_buffer_printf(buffer, "\n"); - } - - static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 9ace1930c1b..b44c0296f69 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -1621,7 +1621,6 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; - -- VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index ef37eb75f03..2230cd5b919 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -461,6 +461,40 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct - return add_cast(ctx, block, node, dst_type, loc); - } - -+static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_type *dst_type, const struct parse_array_sizes *arrays, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *instr = node_from_block(block); -+ struct hlsl_type *src_type = instr->data_type; -+ unsigned int i; -+ -+ for (i = 0; i < arrays->count; ++i) -+ { -+ if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); -+ dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); -+ } -+ -+ if (instr->data_type->class == HLSL_CLASS_ERROR) -+ return true; -+ -+ if (!explicit_compatible_data_types(ctx, src_type, dst_type)) -+ { -+ struct vkd3d_string_buffer *src_string, *dst_string; -+ -+ src_string = hlsl_type_to_string(ctx, src_type); -+ dst_string = hlsl_type_to_string(ctx, dst_type); -+ if (src_string && dst_string) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", -+ src_string->buffer, dst_string->buffer); -+ hlsl_release_string_buffer(ctx, src_string); -+ hlsl_release_string_buffer(ctx, dst_string); -+ return false; -+ } -+ -+ return add_cast(ctx, block, instr, dst_type, loc); -+} -+ - static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod, - const struct vkd3d_shader_location *loc) - { -@@ -978,6 +1012,12 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str - const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; - struct hlsl_ir_node *return_index, *cast; - -+ if (array->data_type->class == HLSL_CLASS_ERROR || index->data_type->class == HLSL_CLASS_ERROR) -+ { -+ block->value = ctx->error_instr; -+ return true; -+ } -+ - if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) - && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) - { -@@ -2314,6 +2354,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - struct hlsl_ir_node *lhs = node_from_block(block); - struct hlsl_ir_node *one; - -+ if (lhs->data_type->class == HLSL_CLASS_ERROR) -+ return true; -+ - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, - "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); -@@ -2366,9 +2409,9 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - { - struct hlsl_default_value default_value = {0}; - -- if (hlsl_is_numeric_type(dst_comp_type)) -+ if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) - { -- if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) -+ if (hlsl_is_numeric_type(dst_comp_type)) - { - /* Default values are discarded if they contain an object - * literal expression for a numeric component. */ -@@ -2381,17 +2424,17 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - dst->default_values = NULL; - } - } -- else -- { -- if (!hlsl_clone_block(ctx, &block, instrs)) -- return; -- default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); -+ } -+ else -+ { -+ if (!hlsl_clone_block(ctx, &block, instrs)) -+ return; -+ default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - -- if (dst->default_values) -- dst->default_values[*store_index] = default_value; -+ if (dst->default_values) -+ dst->default_values[*store_index] = default_value; - -- hlsl_block_cleanup(&block); -- } -+ hlsl_block_cleanup(&block); - } - } - else -@@ -8770,7 +8813,6 @@ postfix_expr: - YYABORT; - } - vkd3d_free($3); -- $$ = $1; - } - else if (hlsl_is_numeric_type(node->data_type)) - { -@@ -8784,14 +8826,14 @@ postfix_expr: - } - hlsl_block_add_instr($1, swizzle); - vkd3d_free($3); -- $$ = $1; - } -- else -+ else if (node->data_type->class != HLSL_CLASS_ERROR) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3); - vkd3d_free($3); - YYABORT; - } -+ $$ = $1; - } - | postfix_expr '[' expr ']' - { -@@ -8903,10 +8945,6 @@ unary_expr: - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ - | '(' var_modifiers type arrays ')' unary_expr - { -- struct hlsl_type *src_type = node_from_block($6)->data_type; -- struct hlsl_type *dst_type; -- unsigned int i; -- - if ($2) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -@@ -8914,36 +8952,13 @@ unary_expr: - YYABORT; - } - -- dst_type = $3; -- for (i = 0; i < $4.count; ++i) -- { -- if ($4.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -- { -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Implicit size arrays not allowed in casts."); -- } -- dst_type = hlsl_new_array_type(ctx, dst_type, $4.sizes[i]); -- } -- -- if (!explicit_compatible_data_types(ctx, src_type, dst_type)) -- { -- struct vkd3d_string_buffer *src_string, *dst_string; -- -- src_string = hlsl_type_to_string(ctx, src_type); -- dst_string = hlsl_type_to_string(ctx, dst_type); -- if (src_string && dst_string) -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Can't cast from %s to %s.", -- src_string->buffer, dst_string->buffer); -- hlsl_release_string_buffer(ctx, src_string); -- hlsl_release_string_buffer(ctx, dst_string); -- YYABORT; -- } -- -- if (!add_cast(ctx, $6, node_from_block($6), dst_type, &@3)) -+ if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) - { - destroy_block($6); -+ vkd3d_free($4.sizes); - YYABORT; - } -+ vkd3d_free($4.sizes); - $$ = $6; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 6a74e2eb8de..14cf23e8d1a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -681,6 +681,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - case VKD3DSIH_DCL_CONSTANT_BUFFER: - case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_TEMPS: -+ case VKD3DSIH_DCL_THREAD_GROUP: - vkd3d_shader_instruction_make_nop(ins); - break; - -@@ -763,7 +764,7 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i - } - - for (i = 0; i < ins->dst_count; ++i) -- shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id); -+ shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); - } - - static const struct vkd3d_shader_varying_map *find_varying_map( -@@ -1470,6 +1471,9 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - return false; - memcpy(elements, s->elements, element_count * sizeof(*elements)); - -+ for (i = 0; i < element_count; ++i) -+ elements[i].sort_index = i; -+ - qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); - - for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) -@@ -1667,7 +1671,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - vkd3d_unreachable(); - e = &signature->elements[element_idx]; - -- dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask); - if (is_io_dcl) - { - /* Validated in the TPF reader. */ -@@ -7149,7 +7152,7 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t - vsir_transform(&ctx, vsir_program_insert_clip_planes); - - if (TRACE_ON()) -- vkd3d_shader_trace(program); -+ vsir_program_trace(program); - - return ctx.result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 6b41363d60e..bfc013959e7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -38,6 +38,7 @@ struct msl_generator - struct vkd3d_shader_location location; - struct vkd3d_shader_message_context *message_context; - unsigned int indent; -+ const char *prefix; - }; - - static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, -@@ -50,16 +51,37 @@ static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen - va_end(args); - } - -+static const char *msl_get_prefix(enum vkd3d_shader_type type) -+{ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ return "vs"; -+ case VKD3D_SHADER_TYPE_HULL: -+ return "hs"; -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ return "ds"; -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ return "gs"; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ return "ps"; -+ case VKD3D_SHADER_TYPE_COMPUTE: -+ return "cs"; -+ default: -+ return NULL; -+ } -+} -+ - static void msl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) - { - vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); - } - - static void msl_print_register_datatype(struct vkd3d_string_buffer *buffer, -- struct msl_generator *gen, const struct vkd3d_shader_register *reg) -+ struct msl_generator *gen, enum vkd3d_data_type data_type) - { - vkd3d_string_buffer_printf(buffer, "."); -- switch (reg->data_type) -+ switch (data_type) - { - case VKD3D_DATA_FLOAT: - vkd3d_string_buffer_printf(buffer, "f"); -@@ -72,8 +94,8 @@ static void msl_print_register_datatype(struct vkd3d_string_buffer *buffer, - break; - default: - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -- "Internal compiler error: Unhandled register datatype %#x.", reg->data_type); -- vkd3d_string_buffer_printf(buffer, "", reg->data_type); -+ "Internal compiler error: Unhandled register datatype %#x.", data_type); -+ vkd3d_string_buffer_printf(buffer, "", data_type); - break; - } - } -@@ -85,7 +107,7 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, - { - case VKD3DSPR_TEMP: - vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); -- msl_print_register_datatype(buffer, gen, reg); -+ msl_print_register_datatype(buffer, gen, reg->data_type); - break; - default: - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -@@ -244,6 +266,309 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - } - } - -+static void msl_generate_input_struct_declarations(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->input_signature; -+ enum vkd3d_shader_type type = gen->program->shader_version.type; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_in\n{\n", gen->prefix); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ if (e->sysval_semantic) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); -+ continue; -+ } -+ -+ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); -+ continue; -+ } -+ -+ if (e->interpolation_mode != VKD3DSIM_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -+ continue; -+ } -+ -+ if(e->register_count > 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled register count %u.", e->register_count); -+ continue; -+ } -+ -+ msl_print_indent(gen->buffer, 1); -+ -+ switch(e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "float4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "int4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uint4 "); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, " ", e->component_type); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", e->component_type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "shader_in_%u ", i); -+ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ vkd3d_string_buffer_printf(gen->buffer, "[[attribute(%u)]]", e->target_location); -+ break; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "};\n\n"); -+} -+ -+static void msl_generate_vertex_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) -+{ -+ switch (e->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_POSITION: -+ vkd3d_string_buffer_printf(gen->buffer, "[[position]]"); -+ break; -+ case VKD3D_SHADER_SV_NONE: -+ vkd3d_string_buffer_printf(gen->buffer, "[[user(locn%u)]]", e->target_location); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled vertex shader system value %#x.", e->sysval_semantic); -+ break; -+ } -+} -+ -+static void msl_generate_pixel_output_element_attribute(struct msl_generator *gen, const struct signature_element *e) -+{ -+ switch (e->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_TARGET: -+ vkd3d_string_buffer_printf(gen->buffer, "[[color(%u)]]", e->target_location); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled pixel shader system value %#x.", e->sysval_semantic); -+ break; -+ } -+} -+ -+static void msl_generate_output_struct_declarations(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->output_signature; -+ enum vkd3d_shader_type type = gen->program->shader_version.type; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_out\n{\n", gen->prefix); -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ if (e->min_precision != VKD3D_SHADER_MINIMUM_PRECISION_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled minimum precision %#x.", e->min_precision); -+ continue; -+ } -+ -+ if (e->interpolation_mode != VKD3DSIM_NONE) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -+ continue; -+ } -+ -+ if(e->register_count > 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled register count %u.", e->register_count); -+ continue; -+ } -+ -+ msl_print_indent(gen->buffer, 1); -+ -+ switch(e->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "float4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "int4 "); -+ break; -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(buffer, "uint4 "); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, " ", e->component_type); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", e->component_type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "shader_out_%u ", i); -+ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ msl_generate_vertex_output_element_attribute(gen, e); -+ break; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ msl_generate_pixel_output_element_attribute(gen, e); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "};\n\n"); -+} -+ -+static void msl_generate_entrypoint_prologue(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->input_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ vkd3d_string_buffer_printf(buffer, " %s_in[%u]", gen->prefix, e->register_index); -+ if (e->sysval_semantic == VKD3D_SHADER_SV_NONE) -+ { -+ msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); -+ msl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); -+ msl_print_write_mask(buffer, e->mask); -+ } -+ else -+ { -+ vkd3d_string_buffer_printf(buffer, " = ", e->sysval_semantic); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); -+ } -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+} -+ -+static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) -+{ -+ const struct shader_signature *signature = &gen->program->output_signature; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ -+ if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) -+ continue; -+ -+ switch (e->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: -+ case VKD3D_SHADER_SV_TARGET: -+ case VKD3D_SHADER_SV_POSITION: -+ vkd3d_string_buffer_printf(buffer, " output.shader_out_%u", i); -+ msl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, " = %s_out", gen->prefix); -+ msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); -+ msl_print_write_mask(buffer, e->mask); -+ break; -+ default: -+ vkd3d_string_buffer_printf(buffer, " ", e->sysval_semantic); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled system value %#x input.", e->sysval_semantic); -+ } -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+ } -+} -+ -+static void msl_generate_entrypoint(struct msl_generator *gen) -+{ -+ enum vkd3d_shader_type type = gen->program->shader_version.type; -+ -+ switch (type) -+ { -+ case VKD3D_SHADER_TYPE_VERTEX: -+ vkd3d_string_buffer_printf(gen->buffer, "vertex "); -+ break; -+ case VKD3D_SHADER_TYPE_PIXEL: -+ vkd3d_string_buffer_printf(gen->buffer, "fragment "); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ return; -+ } -+ -+ vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); -+ -+ /* TODO: descriptor declaration */ -+ -+ msl_print_indent(gen->buffer, 1); -+ vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_in input [[stage_in]])\n{\n", gen->prefix); -+ -+ /* TODO: declare #maximum_register + 1 */ -+ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_in[%u];\n", gen->prefix, 32); -+ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); -+ vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); -+ -+ msl_generate_entrypoint_prologue(gen); -+ -+ vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out);\n", gen->prefix, gen->prefix, gen->prefix); -+ -+ msl_generate_entrypoint_epilogue(gen); -+ -+ vkd3d_string_buffer_printf(gen->buffer, " return output;\n}\n"); -+} -+ - static void msl_generator_generate(struct msl_generator *gen) - { - const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; -@@ -258,7 +583,13 @@ static void msl_generator_generate(struct msl_generator *gen) - vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); - vkd3d_string_buffer_printf(gen->buffer, " float4 f;\n};\n\n"); - -- vkd3d_string_buffer_printf(gen->buffer, "void shader_main()\n{\n"); -+ msl_generate_input_struct_declarations(gen); -+ msl_generate_output_struct_declarations(gen); -+ -+ vkd3d_string_buffer_printf(gen->buffer, -+ "void %s_main(thread vkd3d_vec4 *v, " -+ "thread vkd3d_vec4 *o)\n{\n", -+ gen->prefix); - - ++gen->indent; - -@@ -273,7 +604,11 @@ static void msl_generator_generate(struct msl_generator *gen) - msl_handle_instruction(gen, &instructions->elements[i]); - } - -- vkd3d_string_buffer_printf(gen->buffer, "}\n"); -+ --gen->indent; -+ -+ vkd3d_string_buffer_printf(gen->buffer, "}\n\n"); -+ -+ msl_generate_entrypoint(gen); - - if (TRACE_ON()) - vkd3d_string_buffer_trace(gen->buffer); -@@ -288,6 +623,8 @@ static void msl_generator_cleanup(struct msl_generator *gen) - static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) - { -+ enum vkd3d_shader_type type = program->shader_version.type; -+ - memset(gen, 0, sizeof(*gen)); - gen->program = program; - vkd3d_string_buffer_cache_init(&gen->string_buffers); -@@ -297,6 +634,12 @@ static int msl_generator_init(struct msl_generator *gen, struct vsir_program *pr - return VKD3D_ERROR_OUT_OF_MEMORY; - } - gen->message_context = message_context; -+ if (!(gen->prefix = msl_get_prefix(type))) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled shader type %#x.", type); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } - - return VKD3D_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h -index 9806614a35b..a98c8ae3df5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.h -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h -@@ -60,6 +60,7 @@ struct preproc_expansion - { - struct preproc_buffer buffer; - const struct preproc_text *text; -+ struct preproc_text *arg_values; - /* Back-pointer to the macro, if this expansion a macro body. This is - * necessary so that argument tokens can be correctly replaced. */ - struct preproc_macro *macro; -@@ -72,7 +73,6 @@ struct preproc_macro - - char **arg_names; - size_t arg_count; -- struct preproc_text *arg_values; - - struct preproc_text body; - }; -@@ -117,6 +117,7 @@ struct preproc_ctx - STATE_ARGS, - } state; - unsigned int paren_depth; -+ struct preproc_text *arg_values; - } text_func, directive_func; - - int current_directive; -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index 41c21cca1f5..d167415c356 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -30,11 +30,11 @@ - - #define YY_DECL static int preproc_lexer_lex(YYSTYPE *yylval_param, YYLTYPE *yylloc_param, yyscan_t yyscanner) - --static struct preproc_macro *preproc_get_top_macro(struct preproc_ctx *ctx) -+static struct preproc_expansion *preproc_get_top_expansion(struct preproc_ctx *ctx) - { - if (!ctx->expansion_count) - return NULL; -- return ctx->expansion_stack[ctx->expansion_count - 1].macro; -+ return &ctx->expansion_stack[ctx->expansion_count - 1]; - } - - static void update_location(struct preproc_ctx *ctx); -@@ -133,14 +133,14 @@ INT_SUFFIX [uUlL]{0,2} - - if (!ctx->last_was_newline) - { -- struct preproc_macro *macro; -+ struct preproc_expansion *exp; - - /* Stringification is only done for function-like macro bodies. - * Anywhere else, we need to parse it as two separate tokens. - * We could use a state for this, but yyless() is easier and cheap. - */ - -- if ((macro = preproc_get_top_macro(ctx)) && macro->arg_count) -+ if ((exp = preproc_get_top_expansion(ctx)) && exp->macro && exp->macro->arg_count) - return T_HASHSTRING; - - yyless(1); -@@ -259,6 +259,12 @@ static void preproc_pop_buffer(struct preproc_ctx *ctx) - - yy_delete_buffer(exp->buffer.lexer_buffer, ctx->scanner); - -+ if (exp->macro) -+ { -+ for (unsigned int i = 0; i < exp->macro->arg_count; ++i) -+ vkd3d_string_buffer_cleanup(&exp->arg_values[i].text); -+ free(exp->arg_values); -+ } - --ctx->expansion_count; - TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); - } -@@ -311,15 +317,15 @@ static int return_token(int token, YYSTYPE *lval, const char *text) - - static const struct preproc_text *find_arg_expansion(struct preproc_ctx *ctx, const char *s) - { -- struct preproc_macro *macro; -+ struct preproc_expansion *exp; - unsigned int i; - -- if ((macro = preproc_get_top_macro(ctx))) -+ if ((exp = preproc_get_top_expansion(ctx)) && exp->macro) - { -- for (i = 0; i < macro->arg_count; ++i) -+ for (i = 0; i < exp->macro->arg_count; ++i) - { -- if (!strcmp(s, macro->arg_names[i])) -- return ¯o->arg_values[i]; -+ if (!strcmp(s, exp->macro->arg_names[i])) -+ return &exp->arg_values[i]; - } - } - return NULL; -@@ -331,7 +337,7 @@ static void preproc_text_add(struct preproc_text *text, const char *string) - } - - static bool preproc_push_expansion(struct preproc_ctx *ctx, -- const struct preproc_text *text, struct preproc_macro *macro) -+ const struct preproc_text *text, struct preproc_macro *macro, struct preproc_text *arg_values) - { - struct preproc_expansion *exp; - -@@ -343,6 +349,7 @@ static bool preproc_push_expansion(struct preproc_ctx *ctx, - exp->buffer.lexer_buffer = yy_scan_bytes(text->text.buffer, text->text.content_size, ctx->scanner); - exp->buffer.location = text->location; - exp->macro = macro; -+ exp->arg_values = arg_values; - TRACE("Expansion stack size is now %zu.\n", ctx->expansion_count); - return true; - } -@@ -543,7 +550,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - - if ((expansion = find_arg_expansion(ctx, text))) - { -- preproc_push_expansion(ctx, expansion, NULL); -+ preproc_push_expansion(ctx, expansion, NULL, NULL); - continue; - } - -@@ -551,7 +558,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - if (!macro->arg_count) - { -- preproc_push_expansion(ctx, ¯o->body, macro); -+ preproc_push_expansion(ctx, ¯o->body, macro, NULL); - } - else - { -@@ -617,16 +624,19 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - case STATE_IDENTIFIER: - if (token == '(') - { -- struct preproc_text *first_arg = &func_state->macro->arg_values[0]; -- unsigned int i; -+ struct preproc_text *arg_values; -+ -+ if (!(arg_values = calloc(func_state->macro->arg_count, sizeof(*arg_values)))) -+ return 0; -+ -+ for (unsigned int i = 0; i < func_state->macro->arg_count; ++i) -+ vkd3d_string_buffer_init(&arg_values[i].text); -+ arg_values[0].location = *lloc; - - func_state->arg_count = 0; - func_state->paren_depth = 1; - func_state->state = STATE_ARGS; -- for (i = 0; i < func_state->macro->arg_count; ++i) -- func_state->macro->arg_values[i].text.content_size = 0; -- -- first_arg->location = *lloc; -+ func_state->arg_values = arg_values; - } - else - { -@@ -650,7 +660,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - VKD3D_ASSERT(func_state->macro->arg_count); - - if (func_state->arg_count < func_state->macro->arg_count) -- current_arg = &func_state->macro->arg_values[func_state->arg_count]; -+ current_arg = &func_state->arg_values[func_state->arg_count]; - - switch (token) - { -@@ -665,7 +675,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - - if ((expansion = find_arg_expansion(ctx, text))) - { -- preproc_push_expansion(ctx, expansion, NULL); -+ preproc_push_expansion(ctx, expansion, NULL, NULL); - continue; - } - -@@ -701,7 +711,8 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - if (++func_state->arg_count == func_state->macro->arg_count) - { -- preproc_push_expansion(ctx, &func_state->macro->body, func_state->macro); -+ preproc_push_expansion(ctx, &func_state->macro->body, -+ func_state->macro, func_state->arg_values); - } - else - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y -index 366e351e3b5..c6be17bd230 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.y -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y -@@ -91,7 +91,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati - size_t arg_count, const struct vkd3d_shader_location *body_loc, struct vkd3d_string_buffer *body) - { - struct preproc_macro *macro; -- unsigned int i; - int ret; - - if ((macro = preproc_find_macro(ctx, name))) -@@ -108,14 +107,6 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati - macro->name = name; - macro->arg_names = arg_names; - macro->arg_count = arg_count; -- macro->arg_values = NULL; -- if (arg_count && !(macro->arg_values = vkd3d_calloc(arg_count, sizeof(*macro->arg_values)))) -- { -- vkd3d_free(macro); -- return false; -- } -- for (i = 0; i < arg_count; ++i) -- vkd3d_string_buffer_init(¯o->arg_values[i].text); - macro->body.text = *body; - macro->body.location = *body_loc; - ret = rb_put(&ctx->macros, name, ¯o->entry); -@@ -129,12 +120,8 @@ void preproc_free_macro(struct preproc_macro *macro) - - vkd3d_free(macro->name); - for (i = 0; i < macro->arg_count; ++i) -- { -- vkd3d_string_buffer_cleanup(¯o->arg_values[i].text); - vkd3d_free(macro->arg_names[i]); -- } - vkd3d_free(macro->arg_names); -- vkd3d_free(macro->arg_values); - vkd3d_string_buffer_cleanup(¯o->body.text); - vkd3d_free(macro); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 11c054a28f5..cb610c929b6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -5510,7 +5510,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - const struct shader_signature *shader_signature; - const struct vkd3d_spirv_builtin *builtin; - enum vkd3d_shader_sysval_semantic sysval; -- uint32_t write_mask, reg_write_mask; -+ uint32_t write_mask; - bool use_private_variable = false; - struct vkd3d_symbol reg_symbol; - SpvStorageClass storage_class; -@@ -5561,7 +5561,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - use_private_variable = true; - } - -- reg_write_mask = write_mask >> component_idx; - vkd3d_symbol_make_io(®_symbol, reg_type, element_idx); - - if (rb_get(&compiler->symbol_table, ®_symbol)) -@@ -5639,7 +5638,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, -- use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); -+ use_private_variable ? VKD3DSP_WRITEMASK_ALL : write_mask); - reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; - VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); - -@@ -5650,7 +5649,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - if (use_private_variable) - { - compiler->private_output_variable[element_idx] = var_id; -- compiler->private_output_variable_write_mask[element_idx] |= reg_write_mask; -+ compiler->private_output_variable_write_mask[element_idx] |= write_mask >> component_idx; - if (!compiler->epilogue_function_id) - compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); - } -@@ -6891,10 +6890,9 @@ static void spirv_compiler_emit_tessellator_partitioning(struct spirv_compiler * - spirv_compiler_emit_execution_mode(compiler, mode, NULL, 0); - } - --static void spirv_compiler_emit_dcl_thread_group(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compiler, -+ const struct vsir_thread_group_size *group_size) - { -- const struct vkd3d_shader_thread_group_size *group_size = &instruction->declaration.thread_group_size; - const uint32_t local_size[] = {group_size->x, group_size->y, group_size->z}; - - spirv_compiler_emit_execution_mode(compiler, -@@ -10228,9 +10226,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - spirv_compiler_emit_tessellator_partitioning(compiler, - instruction->declaration.tessellator_partitioning); - break; -- case VKD3DSIH_DCL_THREAD_GROUP: -- spirv_compiler_emit_dcl_thread_group(compiler, instruction); -- break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -@@ -10650,6 +10645,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - spirv_compiler_emit_temps(compiler, program->temp_count); - if (program->ssa_count) - spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); -+ if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) -+ spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); - - spirv_compiler_emit_descriptor_declarations(compiler); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 00a525c9ac3..75bdb06fe0e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -1289,11 +1289,14 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio - } - - static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) - { -+ struct vsir_program *program = sm4->p.program; -+ - ins->declaration.thread_group_size.x = *tokens++; - ins->declaration.thread_group_size.y = *tokens++; - ins->declaration.thread_group_size.z = *tokens++; -+ program->thread_group_size = ins->declaration.thread_group_size; - } - - static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -@@ -2959,7 +2962,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - WARN("Failed to validate shader after parsing, ret %d.\n", ret); - - if (TRACE_ON()) -- vkd3d_shader_trace(program); -+ vsir_program_trace(program); - - vsir_program_cleanup(program); - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index ee8a633431a..9b320106340 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1489,7 +1489,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - descriptor_info1, combined_sampler_info, message_context); - - if (TRACE_ON()) -- vkd3d_shader_trace(program); -+ vsir_program_trace(program); - - for (i = 0; i < program->instructions.count; ++i) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index eab164cc848..1a42f385fc0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1061,6 +1061,9 @@ enum vkd3d_shader_input_sysval_semantic - - struct signature_element - { -+ /* sort_index is not a property of the signature element, it is just a -+ * convenience field used to retain the original order in a signature and -+ * recover it after having permuted the signature itself. */ - unsigned int sort_index; - const char *semantic_name; - unsigned int semantic_index; -@@ -1164,7 +1167,7 @@ struct vkd3d_shader_tgsm_structured - bool zero_init; - }; - --struct vkd3d_shader_thread_group_size -+struct vsir_thread_group_size - { - unsigned int x, y, z; - }; -@@ -1243,7 +1246,7 @@ struct vkd3d_shader_instruction - struct vkd3d_shader_structured_resource structured_resource; - struct vkd3d_shader_tgsm_raw tgsm_raw; - struct vkd3d_shader_tgsm_structured tgsm_structured; -- struct vkd3d_shader_thread_group_size thread_group_size; -+ struct vsir_thread_group_size thread_group_size; - enum vkd3d_tessellator_domain tessellator_domain; - enum vkd3d_shader_tessellator_output_primitive tessellator_output_primitive; - enum vkd3d_shader_tessellator_partitioning tessellator_partitioning; -@@ -1390,6 +1393,7 @@ struct vsir_program - bool free_parameters; - - unsigned int input_control_point_count, output_control_point_count; -+ struct vsir_thread_group_size thread_group_size; - unsigned int flat_constant_count[3]; - unsigned int block_count; - unsigned int temp_count; -@@ -1474,7 +1478,7 @@ struct vkd3d_shader_scan_descriptor_info1 - unsigned int descriptor_count; - }; - --void vkd3d_shader_trace(const struct vsir_program *program); -+void vsir_program_trace(const struct vsir_program *program); - - const char *shader_get_type_prefix(enum vkd3d_shader_type type); - --- -2.45.2 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch deleted file mode 100644 index dc4c9ab6..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 4b1bc5d293eca9de7f1f00f3afac32e329364515 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 10 Oct 2024 07:16:15 +1100 -Subject: [PATCH] Updated vkd3d to 9cb4207c92ec3ee05fce15580c89f2e5146354db. - ---- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 15 +++ - libs/vkd3d/libs/vkd3d-shader/glsl.c | 101 ++++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 40 +++++++ - libs/vkd3d/libs/vkd3d-shader/spirv.c | 24 ++--- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 15 +++ - .../libs/vkd3d-shader/vkd3d_shader_main.c | 2 + - .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + - 7 files changed, 173 insertions(+), 25 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index c66b059325a..5db9d6da063 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -10315,6 +10315,21 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - sm6->ptr = &sm6->start[1]; - sm6->bitpos = 2; - -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ break; -+ -+ default: -+ if (program->patch_constant_signature.element_count != 0) -+ { -+ WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); -+ shader_signature_cleanup(&program->patch_constant_signature); -+ } -+ break; -+ } -+ - input_signature = &program->input_signature; - output_signature = &program->output_signature; - patch_constant_signature = &program->patch_constant_signature; -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 4dc95899a11..91ee355ed39 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -651,6 +651,20 @@ static void shader_glsl_cast(struct vkd3d_glsl_generator *gen, const struct vkd3 - glsl_dst_cleanup(&dst, &gen->string_buffers); - } - -+static void shader_glsl_end_block(struct vkd3d_glsl_generator *gen) -+{ -+ --gen->indent; -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "}\n"); -+} -+ -+static void shader_glsl_begin_block(struct vkd3d_glsl_generator *gen) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "{\n"); -+ ++gen->indent; -+} -+ - static void shader_glsl_if(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - const char *condition; -@@ -664,23 +678,65 @@ static void shader_glsl_if(struct vkd3d_glsl_generator *gen, const struct vkd3d_ - - glsl_src_cleanup(&src, &gen->string_buffers); - -- shader_glsl_print_indent(gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(gen->buffer, "{\n"); -- ++gen->indent; -+ shader_glsl_begin_block(gen); - } - - static void shader_glsl_else(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -- unsigned int i = 4 * (gen->indent - 1); -+ shader_glsl_end_block(gen); -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "else\n"); -+ shader_glsl_begin_block(gen); -+} - -- vkd3d_string_buffer_printf(gen->buffer, "%*s}\n%*selse\n%*s{\n", i, "", i, "", i, ""); -+static void shader_glsl_loop(struct vkd3d_glsl_generator *gen) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "for (;;)\n"); -+ shader_glsl_begin_block(gen); - } - --static void shader_glsl_endif(struct vkd3d_glsl_generator *gen) -+static void shader_glsl_break(struct vkd3d_glsl_generator *gen) - { -- --gen->indent; - shader_glsl_print_indent(gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(gen->buffer, "}\n"); -+ vkd3d_string_buffer_printf(gen->buffer, "break;\n"); -+} -+ -+static void shader_glsl_continue(struct vkd3d_glsl_generator *gen) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "continue;\n"); -+} -+ -+static void shader_glsl_switch(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src; -+ -+ glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "switch (%s)\n", src.str->buffer); -+ shader_glsl_begin_block(gen); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+} -+ -+static void shader_glsl_case(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src; -+ -+ glsl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "case %s:\n", src.str->buffer); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+} -+ -+static void shader_glsl_default(struct vkd3d_glsl_generator *gen) -+{ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "default:\n"); - } - - static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -@@ -1013,6 +1069,9 @@ static void shader_glsl_shader_prologue(struct vkd3d_glsl_generator *gen) - case VKD3D_SHADER_COMPONENT_UINT: - vkd3d_string_buffer_printf(buffer, " = uintBitsToFloat(shader_in_%u)", i); - break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, " = intBitsToFloat(shader_in_%u)", i); -+ break; - default: - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled input component type %#x.", e->component_type); -@@ -1127,6 +1186,15 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_AND: - shader_glsl_binop(gen, ins, "&"); - break; -+ case VKD3DSIH_BREAK: -+ shader_glsl_break(gen); -+ break; -+ case VKD3DSIH_CASE: -+ shader_glsl_case(gen, ins); -+ break; -+ case VKD3DSIH_CONTINUE: -+ shader_glsl_continue(gen); -+ break; - case VKD3DSIH_DCL_INDEXABLE_TEMP: - shader_glsl_dcl_indexable_temp(gen, ins); - break; -@@ -1138,6 +1206,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; -+ case VKD3DSIH_DEFAULT: -+ shader_glsl_default(gen); -+ break; - case VKD3DSIH_DIV: - shader_glsl_binop(gen, ins, "/"); - break; -@@ -1154,7 +1225,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - shader_glsl_else(gen, ins); - break; - case VKD3DSIH_ENDIF: -- shader_glsl_endif(gen); -+ case VKD3DSIH_ENDLOOP: -+ case VKD3DSIH_ENDSWITCH: -+ shader_glsl_end_block(gen); - break; - case VKD3DSIH_EQO: - case VKD3DSIH_IEQ: -@@ -1184,6 +1257,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - break; - case VKD3DSIH_ILT: - case VKD3DSIH_LTO: -+ case VKD3DSIH_ULT: - shader_glsl_relop(gen, ins, "<", "lessThan"); - break; - case VKD3DSIH_IMAX: -@@ -1220,6 +1294,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_LOG: - shader_glsl_intrinsic(gen, ins, "log2"); - break; -+ case VKD3DSIH_LOOP: -+ shader_glsl_loop(gen); -+ break; - case VKD3DSIH_MOV: - shader_glsl_mov(gen, ins); - break; -@@ -1259,6 +1336,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_SQRT: - shader_glsl_intrinsic(gen, ins, "sqrt"); - break; -+ case VKD3DSIH_SWITCH: -+ shader_glsl_switch(gen, ins); -+ break; - default: - shader_glsl_unhandled(gen, ins); - break; -@@ -1663,6 +1743,9 @@ static void shader_glsl_generate_input_declarations(struct vkd3d_glsl_generator - case VKD3D_SHADER_COMPONENT_UINT: - vkd3d_string_buffer_printf(buffer, "uvec4"); - break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "ivec4"); -+ break; - case VKD3D_SHADER_COMPONENT_FLOAT: - vkd3d_string_buffer_printf(buffer, "vec4"); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 14cf23e8d1a..affbae3ea4e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -6505,6 +6505,30 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, - return true; - } - -+static void vsir_validate_signature_element(struct validation_context *ctx, -+ const struct shader_signature *signature, const char *signature_type, -+ unsigned int idx) -+{ -+ const struct signature_element *element = &signature->elements[idx]; -+ -+ if (element->register_count == 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid zero register count.", idx, signature_type); -+ -+ if (element->mask == 0 || (element->mask & ~0xf)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid mask %#x.", idx, signature_type, element->mask); -+} -+ -+static void vsir_validate_signature(struct validation_context *ctx, -+ const struct shader_signature *signature, const char *signature_type) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ vsir_validate_signature_element(ctx, signature, signature_type, i); -+} -+ - static const char *name_from_cf_type(enum vsir_control_flow_type type) - { - switch (type) -@@ -7042,6 +7066,22 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) - return VKD3D_OK; - -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ break; -+ -+ default: -+ if (program->patch_constant_signature.element_count != 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Patch constant signature is only valid for hull and domain shaders."); -+ } -+ -+ vsir_validate_signature(&ctx, &program->input_signature, "input"); -+ vsir_validate_signature(&ctx, &program->output_signature, "output"); -+ vsir_validate_signature(&ctx, &program->patch_constant_signature, "patch constant"); -+ - if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) - goto fail; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index cb610c929b6..692432d5513 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -3267,18 +3267,6 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder * - vkd3d_spirv_build_op_name(builder, id, "%s", debug_name); - } - --static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, -- struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, -- enum vkd3d_shader_component_type component_type, unsigned int component_count) --{ -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- uint32_t type_id, ptr_type_id; -- -- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); -- ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); -- return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); --} -- - static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, - struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, - enum vkd3d_shader_component_type component_type, unsigned int component_count, -@@ -3288,10 +3276,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil - uint32_t type_id, length_id, ptr_type_id; - unsigned int i; - -- if (!length_count) -- return spirv_compiler_emit_variable(compiler, -- stream, storage_class, component_type, component_count); -- - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - for (i = 0; i < length_count; ++i) - { -@@ -3305,6 +3289,14 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil - return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); - } - -+static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, -+ struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, -+ enum vkd3d_shader_component_type component_type, unsigned int component_count) -+{ -+ return spirv_compiler_emit_array_variable(compiler, stream, storage_class, -+ component_type, component_count, NULL, 0); -+} -+ - static const struct vkd3d_spec_constant_info - { - enum vkd3d_shader_parameter_name name; -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 75bdb06fe0e..f79e97e92d4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2914,6 +2914,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) - uninvert_used_masks(&program->patch_constant_signature); - -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ break; -+ -+ default: -+ if (program->patch_constant_signature.element_count != 0) -+ { -+ WARN("The patch constant signature only makes sense for Hull and Domain Shaders, ignoring it.\n"); -+ shader_signature_cleanup(&program->patch_constant_signature); -+ } -+ break; -+ } -+ - if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, - sm4.input_register_masks, "Input") - || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 9b320106340..f84ac551272 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1846,6 +1846,8 @@ void shader_signature_cleanup(struct shader_signature *signature) - } - vkd3d_free(signature->elements); - signature->elements = NULL; -+ signature->elements_capacity = 0; -+ signature->element_count = 0; - } - - int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 1a42f385fc0..41b879af4b4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -245,6 +245,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019, - VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, - VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, - - VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, - --- -2.45.2 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch deleted file mode 100644 index d24c8625..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch +++ /dev/null @@ -1,1533 +0,0 @@ -From 52de9a875b2a75a0d32486b990e685fed075bcab Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 11 Oct 2024 07:30:59 +1100 -Subject: [PATCH] Updated vkd3d to cd249a47b86545fe0b3a4b477f854965e858b744. - ---- - libs/vkd3d/include/vkd3d_shader.h | 52 +++ - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 26 +- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 297 +++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 349 ++++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/msl.c | 226 +++++++++++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 35 +- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 5 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 11 + - 8 files changed, 921 insertions(+), 80 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d08ee74a3a0..e22f236ecd1 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -648,6 +648,58 @@ enum vkd3d_shader_parameter_name - VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_5, - VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_6, - VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_7, -+ /** -+ * Point size. -+ * -+ * When this parameter is provided to a vertex, tessellation, or geometry -+ * shader, and the source shader does not write point size, it specifies a -+ * uniform value which will be written to point size. -+ * If the source shader writes point size, this parameter is ignored. -+ * -+ * This parameter can be used to implement fixed function point size, as -+ * present in Direct3D versions 8 and 9, if the target environment does not -+ * support point size as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, -+ /** -+ * Minimum point size. -+ * -+ * When this parameter is provided to a vertex, tessellation, or geometry -+ * shader, and the source shader writes point size or uses the -+ * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE parameter, the point size will -+ * be clamped to the provided minimum value. -+ * If point size is not written in one of these ways, -+ * this parameter is ignored. -+ * If this parameter is not provided, the point size will not be clamped -+ * to a minimum size by vkd3d-shader. -+ * -+ * This parameter can be used to implement fixed function point size, as -+ * present in Direct3D versions 8 and 9, if the target environment does not -+ * support point size as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, -+ /** -+ * Maximum point size. -+ * -+ * This parameter has identical behaviour to -+ * VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, except that it provides -+ * the maximum size rather than the minimum. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 589b800f8c9..44b1714b56b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -110,13 +110,6 @@ enum vkd3d_sm1_misc_register - VKD3D_SM1_MISC_FACE = 0x1, - }; - --enum vkd3d_sm1_rastout_register --{ -- VKD3D_SM1_RASTOUT_POSITION = 0x0, -- VKD3D_SM1_RASTOUT_FOG = 0x1, -- VKD3D_SM1_RASTOUT_POINT_SIZE = 0x2, --}; -- - enum vkd3d_sm1_opcode - { - VKD3D_SM1_OP_NOP = 0x00, -@@ -957,6 +950,9 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const - shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr); - } - shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param); -+ -+ if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -+ sm1->p.program->has_point_size = true; - } - - static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, -@@ -1434,17 +1430,17 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, VKD3D_SM1_MISC_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VKD3D_SM1_RASTOUT_POSITION}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, VSIR_RASTOUT_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, - }; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 91ee355ed39..c8efdae3386 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -22,7 +22,7 @@ struct glsl_resource_type_info - { - size_t coord_size; - bool shadow; -- const char *sampler_type; -+ const char *type_suffix; - }; - - struct glsl_src -@@ -102,17 +102,17 @@ static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info( - { - static const struct glsl_resource_type_info info[] = - { -- {0, 0, "samplerNone"}, /* VKD3D_SHADER_RESOURCE_NONE */ -- {1, 0, "samplerBuffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ -- {1, 1, "sampler1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ -- {2, 1, "sampler2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ -- {2, 0, "sampler2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ -- {3, 0, "sampler3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ -- {3, 1, "samplerCube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ -- {2, 1, "sampler1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ -- {3, 1, "sampler2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ -- {3, 0, "sampler2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ -- {4, 1, "samplerCubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ -+ {0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ -+ {1, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ -+ {1, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ -+ {2, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ -+ {2, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ -+ {3, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ -+ {3, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ -+ {2, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ -+ {3, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ -+ {3, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ -+ {4, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ - }; - - if (!t || t >= ARRAY_SIZE(info)) -@@ -173,6 +173,14 @@ static void shader_glsl_print_combined_sampler_name(struct vkd3d_string_buffer * - } - } - -+static void shader_glsl_print_image_name(struct vkd3d_string_buffer *buffer, -+ struct vkd3d_glsl_generator *gen, unsigned int idx, unsigned int space) -+{ -+ vkd3d_string_buffer_printf(buffer, "%s_image_%u", gen->prefix, idx); -+ if (space) -+ vkd3d_string_buffer_printf(buffer, "_%u", space); -+} -+ - static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, - struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) - { -@@ -363,15 +371,14 @@ static void shader_glsl_print_bitcast(struct vkd3d_string_buffer *dst, struct vk - vkd3d_string_buffer_printf(dst, "%s", src); - } - --static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, -- const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) -+static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask, enum vkd3d_data_type data_type) - { - const struct vkd3d_shader_register *reg = &vsir_src->reg; - struct vkd3d_string_buffer *register_name, *str; - enum vkd3d_data_type src_data_type; - unsigned int size; - -- glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); - register_name = vkd3d_string_buffer_get(&gen->string_buffers); - - if (reg->non_uniform) -@@ -386,12 +393,12 @@ static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator - shader_glsl_print_register_name(register_name, gen, reg); - - if (!vsir_src->modifiers) -- str = glsl_src->str; -+ str = buffer; - else - str = vkd3d_string_buffer_get(&gen->string_buffers); - - size = reg->dimension == VSIR_DIMENSION_VEC4 ? 4 : 1; -- shader_glsl_print_bitcast(str, gen, register_name->buffer, reg->data_type, src_data_type, size); -+ shader_glsl_print_bitcast(str, gen, register_name->buffer, data_type, src_data_type, size); - if (reg->dimension == VSIR_DIMENSION_VEC4) - shader_glsl_print_swizzle(str, vsir_src->swizzle, mask); - -@@ -400,24 +407,31 @@ static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator - case VKD3DSPSM_NONE: - break; - case VKD3DSPSM_NEG: -- vkd3d_string_buffer_printf(glsl_src->str, "-%s", str->buffer); -+ vkd3d_string_buffer_printf(buffer, "-%s", str->buffer); - break; - case VKD3DSPSM_ABS: -- vkd3d_string_buffer_printf(glsl_src->str, "abs(%s)", str->buffer); -+ vkd3d_string_buffer_printf(buffer, "abs(%s)", str->buffer); - break; - default: -- vkd3d_string_buffer_printf(glsl_src->str, "(%s)", -+ vkd3d_string_buffer_printf(buffer, "(%s)", - vsir_src->modifiers, str->buffer); - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - break; - } - -- if (str != glsl_src->str) -+ if (str != buffer) - vkd3d_string_buffer_release(&gen->string_buffers, str); - vkd3d_string_buffer_release(&gen->string_buffers, register_name); - } - -+static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) -+{ -+ glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); -+ shader_glsl_print_src(glsl_src->str, gen, vsir_src, mask, vsir_src->reg.data_type); -+} -+ - static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) - { - vkd3d_string_buffer_release(cache, dst->mask); -@@ -892,6 +906,85 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - glsl_dst_cleanup(&dst, &gen->string_buffers); - } - -+static void shader_glsl_store_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct glsl_resource_type_info *resource_type_info; -+ enum vkd3d_shader_component_type component_type; -+ const struct vkd3d_shader_descriptor_info1 *d; -+ enum vkd3d_shader_resource_type resource_type; -+ unsigned int uav_id, uav_idx, uav_space; -+ struct vkd3d_string_buffer *image_data; -+ struct glsl_src image_coord; -+ uint32_t coord_mask; -+ -+ if (ins->dst[0].reg.idx[0].rel_addr || ins->dst[0].reg.idx[1].rel_addr) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Descriptor indexing is not supported."); -+ -+ uav_id = ins->dst[0].reg.idx[0].offset; -+ uav_idx = ins->dst[0].reg.idx[1].offset; -+ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) -+ { -+ resource_type = d->resource_type; -+ uav_space = d->register_space; -+ component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); -+ uav_space = 0; -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) -+ { -+ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled UAV type %#x.", resource_type); -+ coord_mask = vkd3d_write_mask_from_component_count(2); -+ } -+ -+ glsl_src_init(&image_coord, gen, &ins->src[0], coord_mask); -+ image_data = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) -+ { -+ switch (component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ vkd3d_string_buffer_printf(image_data, "uvec4("); -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(image_data, "ivec4("); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x.", component_type); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ vkd3d_string_buffer_printf(image_data, "vec4("); -+ break; -+ } -+ } -+ shader_glsl_print_src(image_data, gen, &ins->src[1], VKD3DSP_WRITEMASK_ALL, -+ vkd3d_data_type_from_component_type(component_type)); -+ if (ins->src[1].reg.dimension == VSIR_DIMENSION_SCALAR) -+ vkd3d_string_buffer_printf(image_data, ", 0, 0, 0)"); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "imageStore("); -+ shader_glsl_print_image_name(gen->buffer, gen, uav_idx, uav_space); -+ vkd3d_string_buffer_printf(gen->buffer, ", %s, %s);\n", image_coord.str->buffer, image_data->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, image_data); -+ glsl_src_cleanup(&image_coord, &gen->string_buffers); -+} -+ - static void shader_glsl_unary_op(struct vkd3d_glsl_generator *gen, - const struct vkd3d_shader_instruction *ins, const char *op) - { -@@ -1336,6 +1429,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_SQRT: - shader_glsl_intrinsic(gen, ins, "sqrt"); - break; -+ case VKD3DSIH_STORE_UAV_TYPED: -+ shader_glsl_store_uav_typed(gen, ins); -+ break; - case VKD3DSIH_SWITCH: - shader_glsl_switch(gen, ins); - break; -@@ -1372,6 +1468,137 @@ static bool shader_glsl_check_shader_visibility(const struct vkd3d_glsl_generato - } - } - -+static bool shader_glsl_get_uav_binding(const struct vkd3d_glsl_generator *gen, unsigned int register_space, -+ unsigned int register_idx, enum vkd3d_shader_resource_type resource_type, unsigned int *binding_idx) -+{ -+ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; -+ const struct vkd3d_shader_resource_binding *binding; -+ enum vkd3d_shader_binding_flag resource_type_flag; -+ unsigned int i; -+ -+ if (!interface_info) -+ return false; -+ -+ resource_type_flag = resource_type == VKD3D_SHADER_RESOURCE_BUFFER -+ ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; -+ -+ for (i = 0; i < interface_info->binding_count; ++i) -+ { -+ binding = &interface_info->bindings[i]; -+ -+ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ continue; -+ if (binding->register_space != register_space) -+ continue; -+ if (binding->register_index != register_idx) -+ continue; -+ if (!shader_glsl_check_shader_visibility(gen, binding->shader_visibility)) -+ continue; -+ if (!(binding->flags & resource_type_flag)) -+ continue; -+ *binding_idx = i; -+ return true; -+ } -+ -+ return false; -+} -+ -+static void shader_glsl_generate_uav_declaration(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_descriptor_info1 *uav) -+{ -+ const struct glsl_resource_type_info *resource_type_info; -+ const char *image_type_prefix, *image_type, *read_format; -+ const struct vkd3d_shader_descriptor_binding *binding; -+ const struct vkd3d_shader_descriptor_offset *offset; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ enum vkd3d_shader_component_type component_type; -+ unsigned int binding_idx; -+ -+ if (uav->count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "UAV %u has unsupported descriptor array size %u.", uav->register_id, uav->count); -+ return; -+ } -+ -+ if (!shader_glsl_get_uav_binding(gen, uav->register_space, -+ uav->register_index, uav->resource_type, &binding_idx)) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "No descriptor binding specified for UAV %u.", uav->register_id); -+ return; -+ } -+ -+ binding = &gen->interface_info->bindings[binding_idx].binding; -+ -+ if (binding->set != 0) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding set %u specified for UAV %u.", binding->set, uav->register_id); -+ return; -+ } -+ -+ if (binding->count != 1) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ "Unsupported binding count %u specified for UAV %u.", binding->count, uav->register_id); -+ return; -+ } -+ -+ if (gen->offset_info && gen->offset_info->binding_offsets) -+ { -+ offset = &gen->offset_info->binding_offsets[binding_idx]; -+ if (offset->static_offset || offset->dynamic_offset_index != ~0u) -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled descriptor offset specified for UAV %u.", -+ uav->register_id); -+ return; -+ } -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(uav->resource_type))) -+ { -+ image_type = resource_type_info->type_suffix; -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled UAV type %#x.", uav->resource_type); -+ image_type = ""; -+ } -+ -+ switch ((component_type = vkd3d_component_type_from_resource_data_type(uav->resource_data_type))) -+ { -+ case VKD3D_SHADER_COMPONENT_UINT: -+ image_type_prefix = "u"; -+ read_format = "r32ui"; -+ break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ image_type_prefix = "i"; -+ read_format = "r32i"; -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled component type %#x for UAV %u.", -+ component_type, uav->register_id); -+ /* fall through */ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ image_type_prefix = ""; -+ read_format = "r32f"; -+ break; -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "layout(binding = %u", binding->binding); -+ if (uav->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ) -+ vkd3d_string_buffer_printf(buffer, ", %s) ", read_format); -+ else -+ vkd3d_string_buffer_printf(buffer, ") writeonly "); -+ vkd3d_string_buffer_printf(buffer, "uniform %simage%s ", image_type_prefix, image_type); -+ shader_glsl_print_image_name(buffer, gen, uav->register_index, uav->register_space); -+ vkd3d_string_buffer_printf(buffer, ";\n"); -+} -+ - static bool shader_glsl_get_cbv_binding(const struct vkd3d_glsl_generator *gen, - unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) - { -@@ -1415,7 +1642,7 @@ static void shader_glsl_generate_cbv_declaration(struct vkd3d_glsl_generator *ge - - if (cbv->count != 1) - { -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, - "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); - return; - } -@@ -1540,7 +1767,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator - - if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type))) - { -- sampler_type = resource_type_info->sampler_type; -+ sampler_type = resource_type_info->type_suffix; - if (shadow && !resource_type_info->shadow) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, - "Comparison samplers are not supported with resource type %#x.", srv->resource_type); -@@ -1603,7 +1830,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator - return; - } - -- vkd3d_string_buffer_printf(buffer, "layout(binding = %u) uniform %s%s%s ", -+ vkd3d_string_buffer_printf(buffer, "layout(binding = %u) uniform %ssampler%s%s ", - binding->binding, sampler_type_prefix, sampler_type, shadow ? "Shadow" : ""); - shader_glsl_print_combined_sampler_name(buffer, gen, crs->resource_index, - crs->resource_space, crs->sampler_index, crs->sampler_space); -@@ -1628,6 +1855,10 @@ static void shader_glsl_generate_descriptor_declarations(struct vkd3d_glsl_gener - /* GLSL uses combined resource/sampler descriptors.*/ - break; - -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: -+ shader_glsl_generate_uav_declaration(gen, descriptor); -+ break; -+ - case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: - shader_glsl_generate_cbv_declaration(gen, descriptor); - break; -@@ -1820,7 +2051,7 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator - "Internal compiler error: Unhandled output component type %#x.", e->component_type); - break; - } -- vkd3d_string_buffer_printf(buffer, " shader_out_%u;\n", i); -+ vkd3d_string_buffer_printf(buffer, " shader_out_%u;\n", e->semantic_index); - ++count; - } - if (count) -@@ -1837,6 +2068,14 @@ static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) - { - const struct vsir_program *program = gen->program; - struct vkd3d_string_buffer *buffer = gen->buffer; -+ const struct vsir_thread_group_size *group_size; -+ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) -+ { -+ group_size = &program->thread_group_size; -+ vkd3d_string_buffer_printf(buffer, "layout(local_size_x = %u, local_size_y = %u, local_size_z = %u) in;\n\n", -+ group_size->x, group_size->y, group_size->z); -+ } - - shader_glsl_generate_descriptor_declarations(gen); - shader_glsl_generate_input_declarations(gen); -@@ -1918,6 +2157,10 @@ static void shader_glsl_init_limits(struct vkd3d_glsl_generator *gen, const stru - limits->input_count = 32; - limits->output_count = 8; - break; -+ case VKD3D_SHADER_TYPE_COMPUTE: -+ limits->input_count = 0; -+ limits->output_count = 0; -+ break; - default: - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled shader type %#x.", version->type); -@@ -1948,8 +2191,8 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - gen->prefix = "unknown"; - } - shader_glsl_init_limits(gen, &program->shader_version); -- gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX; -- gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL; -+ gen->interstage_input = type != VKD3D_SHADER_TYPE_VERTEX && type != VKD3D_SHADER_TYPE_COMPUTE; -+ gen->interstage_output = type != VKD3D_SHADER_TYPE_PIXEL && type != VKD3D_SHADER_TYPE_COMPUTE; - - gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); - gen->offset_info = vkd3d_find_struct(compile_info->next, DESCRIPTOR_OFFSET_INFO); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index affbae3ea4e..9d24126fba8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -203,6 +203,12 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned - src->reg.idx[0].offset = idx; - } - -+static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); -+ src->reg.idx[0].offset = idx; -+} -+ - static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) - { - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -@@ -244,6 +250,12 @@ static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned - dst->reg.idx[0].offset = idx; - } - -+static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); -+ dst->reg.idx[0].offset = idx; -+} -+ - static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -@@ -254,7 +266,6 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne - { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - dst->reg.idx[0].offset = idx; -- dst->write_mask = VKD3DSP_WRITEMASK_0; - } - - void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -@@ -682,6 +693,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_TEMPS: - case VKD3DSIH_DCL_THREAD_GROUP: -+ case VKD3DSIH_DCL_UAV_TYPED: - vkd3d_shader_instruction_make_nop(ins); - break; - -@@ -1221,7 +1233,7 @@ static bool shader_signature_find_element_for_reg(const struct shader_signature - unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) - { - const struct signature_element *e; -- unsigned int i, base_write_mask; -+ unsigned int i; - - for (i = 0; i < signature->element_count; ++i) - { -@@ -1234,14 +1246,6 @@ static bool shader_signature_find_element_for_reg(const struct shader_signature - } - } - -- /* Validated in the TPF reader, but failure in signature_element_range_expand_mask() -- * can land us here on an unmatched vector mask. */ -- FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n", -- reg_idx, write_mask); -- base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask); -- if (base_write_mask != write_mask) -- return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask, element_idx); -- - return false; - } - -@@ -1655,6 +1659,9 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - break; - - case VKD3DSPR_RASTOUT: -+ /* Leave point size as a system value for the backends to consume. */ -+ if (reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -+ return true; - reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; - signature = normaliser->output_signature; - reg->type = VKD3DSPR_OUTPUT; -@@ -5777,11 +5784,12 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog - return VKD3D_OK; - } - --static bool find_position_signature_idx(const struct shader_signature *signature, uint32_t *idx) -+static bool find_sysval_signature_idx(const struct shader_signature *signature, -+ enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx) - { - for (unsigned int i = 0; i < signature->element_count; ++i) - { -- if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_POSITION) -+ if (signature->elements[i].sysval_semantic == sysval) - { - *idx = i; - return true; -@@ -5846,7 +5854,7 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr - } - } - -- if (!find_position_signature_idx(signature, &position_signature_idx)) -+ if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx)) - { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, - "Shader does not write position."); -@@ -5923,6 +5931,191 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr - return VKD3D_OK; - } - -+static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, -+ const struct vkd3d_shader_instruction *ret, size_t *ret_pos) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); -+ -+ *ret_pos = pos + 1; -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ const struct vkd3d_shader_parameter1 *size_parameter = NULL; -+ static const struct vkd3d_shader_location no_loc; -+ -+ if (program->has_point_size) -+ return VKD3D_OK; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -+ && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY -+ && program->shader_version.type != VKD3D_SHADER_TYPE_HULL -+ && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) -+ return VKD3D_OK; -+ -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -+ -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE) -+ size_parameter = parameter; -+ } -+ -+ if (!size_parameter) -+ return VKD3D_OK; -+ -+ if (size_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid point size parameter data type %#x.", size_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ -+ program->has_point_size = true; -+ -+ /* Append a point size write before each ret. */ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ if (ins->opcode == VKD3DSIH_RET) -+ { -+ size_t new_pos; -+ int ret; -+ -+ if ((ret = insert_point_size_before_ret(program, ins, &new_pos)) < 0) -+ return ret; -+ i = new_pos; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ const struct vkd3d_shader_parameter1 *min_parameter = NULL, *max_parameter = NULL; -+ static const struct vkd3d_shader_location no_loc; -+ -+ if (!program->has_point_size) -+ return VKD3D_OK; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -+ && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY -+ && program->shader_version.type != VKD3D_SHADER_TYPE_HULL -+ && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) -+ return VKD3D_OK; -+ -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -+ -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN) -+ min_parameter = parameter; -+ else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX) -+ max_parameter = parameter; -+ } -+ -+ if (!min_parameter && !max_parameter) -+ return VKD3D_OK; -+ -+ if (min_parameter && min_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid minimum point size parameter data type %#x.", min_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ -+ if (max_parameter && max_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid maximum point size parameter data type %#x.", max_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ -+ /* Replace writes to the point size by inserting a clamp before each write. */ -+ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ const struct vkd3d_shader_location *loc; -+ unsigned int ssa_value; -+ bool clamp = false; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ for (size_t j = 0; j < ins->dst_count; ++j) -+ { -+ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; -+ -+ /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_RASTOUT) -+ { -+ dst_param_init_ssa_float(dst, program->ssa_count); -+ ssa_value = program->ssa_count++; -+ clamp = true; -+ } -+ } -+ -+ if (!clamp) -+ continue; -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ loc = &program->instructions.elements[i].location; -+ ins = &program->instructions.elements[i + 1]; -+ -+ if (min_parameter) -+ { -+ vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MAX, 1, 2); -+ src_param_init_ssa_float(&ins->src[0], ssa_value); -+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MIN, VKD3D_DATA_FLOAT); -+ if (max_parameter) -+ { -+ dst_param_init_ssa_float(&ins->dst[0], program->ssa_count); -+ ssa_value = program->ssa_count++; -+ } -+ else -+ { -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ } -+ ++ins; -+ ++i; -+ } -+ -+ if (max_parameter) -+ { -+ vsir_instruction_init_with_params(program, ins, loc, VKD3DSIH_MIN, 1, 2); -+ src_param_init_ssa_float(&ins->src[0], ssa_value); -+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, VKD3D_DATA_FLOAT); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ -+ ++i; -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ - struct validation_context - { - struct vkd3d_shader_message_context *message_context; -@@ -6395,23 +6588,18 @@ static void vsir_validate_dst_param(struct validation_context *ctx, - break; - - case VKD3DSPR_IMMCONST: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid IMMCONST register used as destination parameter."); -- break; -- - case VKD3DSPR_IMMCONST64: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid IMMCONST64 register used as destination parameter."); -- break; -- - case VKD3DSPR_SAMPLER: -+ case VKD3DSPR_RESOURCE: -+ case VKD3DSPR_INPUT: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid SAMPLER register used as destination parameter."); -+ "Invalid %#x register used as destination parameter.", dst->reg.type); - break; - -- case VKD3DSPR_RESOURCE: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid RESOURCE register used as destination parameter."); -+ case VKD3DSPR_PATCHCONST: -+ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "PATCHCONST register used as destination parameters are only allowed in Hull Shaders."); - break; - - default: -@@ -6454,6 +6642,20 @@ static void vsir_validate_src_param(struct validation_context *ctx, - "Invalid NULL register used as source parameter."); - break; - -+ case VKD3DSPR_OUTPUT: -+ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL -+ || (ctx->phase != VKD3DSIH_HS_FORK_PHASE && ctx->phase != VKD3DSIH_HS_JOIN_PHASE)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid OUTPUT register used as source parameter."); -+ break; -+ -+ case VKD3DSPR_PATCHCONST: -+ if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN -+ && ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "PATCHCONST register used as source parameters are only allowed in Hull and Domain Shaders."); -+ break; -+ - default: - break; - } -@@ -6510,6 +6712,7 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - unsigned int idx) - { - const struct signature_element *element = &signature->elements[idx]; -+ bool integer_type = false; - - if (element->register_count == 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -@@ -6518,6 +6721,97 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - if (element->mask == 0 || (element->mask & ~0xf)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid mask %#x.", idx, signature_type, element->mask); -+ -+ /* Here we'd likely want to validate that the usage mask is a subset of the -+ * signature mask. Unfortunately the D3DBC parser sometimes violates this. -+ * For example I've seen a shader like this: -+ * ps_3_0 -+ * [...] -+ * dcl_texcoord0 v0 -+ * [...] -+ * texld r2.xyzw, v0.xyzw, s1.xyzw -+ * [...] -+ * -+ * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to -+ * compute the signature mask, but the texld instruction apparently uses all -+ * the components. Of course the last two components are ignored, but -+ * formally they seem to be used. So we end up with a signature element with -+ * mask .xy and usage mask .xyzw. -+ * -+ * The correct fix would probably be to make the D3DBC parser aware of which -+ * components are really used for each instruction, but that would take some -+ * time. */ -+ if (element->used_mask & ~0xf) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid usage mask %#x.", -+ idx, signature_type, element->used_mask); -+ -+ switch (element->sysval_semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: -+ case VKD3D_SHADER_SV_POSITION: -+ case VKD3D_SHADER_SV_CLIP_DISTANCE: -+ case VKD3D_SHADER_SV_CULL_DISTANCE: -+ case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: -+ case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: -+ case VKD3D_SHADER_SV_INSTANCE_ID: -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: -+ case VKD3D_SHADER_SV_TARGET: -+ case VKD3D_SHADER_SV_DEPTH: -+ case VKD3D_SHADER_SV_COVERAGE: -+ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -+ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: -+ case VKD3D_SHADER_SV_STENCIL_REF: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid system value semantic %#x.", -+ idx, signature_type, element->sysval_semantic); -+ break; -+ } -+ -+ switch (element->component_type) -+ { -+ case VKD3D_SHADER_COMPONENT_INT: -+ case VKD3D_SHADER_COMPONENT_UINT: -+ integer_type = true; -+ break; -+ -+ case VKD3D_SHADER_COMPONENT_FLOAT: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid component type %#x.", -+ idx, signature_type, element->component_type); -+ break; -+ } -+ -+ if (element->min_precision >= VKD3D_SHADER_MINIMUM_PRECISION_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid minimum precision %#x.", -+ idx, signature_type, element->min_precision); -+ -+ if (element->interpolation_mode >= VKD3DSIM_COUNT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid interpolation mode %#x.", -+ idx, signature_type, element->interpolation_mode); -+ -+ if (integer_type && element->interpolation_mode != VKD3DSIM_NONE -+ && element->interpolation_mode != VKD3DSIM_CONSTANT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", -+ idx, signature_type, element->interpolation_mode); - } - - static void vsir_validate_signature(struct validation_context *ctx, -@@ -7060,6 +7354,7 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - .null_location = {.source_name = source_name}, - .status = VKD3D_OK, - .phase = VKD3DSIH_INVALID, -+ .invalid_instruction_idx = true, - }; - unsigned int i; - -@@ -7088,6 +7383,8 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) - goto fail; - -+ ctx.invalid_instruction_idx = false; -+ - for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count - && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx) - vsir_validate_instruction(&ctx); -@@ -7190,6 +7487,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t - - vsir_transform(&ctx, vsir_program_insert_alpha_test); - vsir_transform(&ctx, vsir_program_insert_clip_planes); -+ vsir_transform(&ctx, vsir_program_insert_point_size); -+ vsir_transform(&ctx, vsir_program_insert_point_size_clamp); - - if (TRACE_ON()) - vsir_program_trace(program); -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index bfc013959e7..36750de1fd8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -39,6 +39,8 @@ struct msl_generator - struct vkd3d_shader_message_context *message_context; - unsigned int indent; - const char *prefix; -+ const struct vkd3d_shader_interface_info *interface_info; -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; - }; - - static void VKD3D_PRINTF_FUNC(3, 4) msl_compiler_error(struct msl_generator *gen, -@@ -109,6 +111,64 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, - vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); - msl_print_register_datatype(buffer, gen, reg->data_type); - break; -+ -+ case VKD3DSPR_INPUT: -+ if (reg->idx_count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled input register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled input register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "v[%u]", reg->idx[0].offset); -+ msl_print_register_datatype(buffer, gen, reg->data_type); -+ break; -+ -+ case VKD3DSPR_OUTPUT: -+ if (reg->idx_count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled output register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled output register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "o[%u]", reg->idx[0].offset); -+ msl_print_register_datatype(buffer, gen, reg->data_type); -+ break; -+ -+ case VKD3DSPR_CONSTBUFFER: -+ if (reg->idx_count != 3) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled constant buffer register index count %u.", reg->idx_count); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+ if (reg->idx[0].rel_addr || reg->idx[2].rel_addr) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled constant buffer register indirect addressing."); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "descriptors.cb_%u[%u]", reg->idx[0].offset, reg->idx[2].offset); -+ msl_print_register_datatype(buffer, gen, reg->data_type); -+ break; -+ - default: - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled register type %#x.", reg->type); -@@ -266,6 +326,144 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - } - } - -+static bool msl_check_shader_visibility(const struct msl_generator *gen, -+ enum vkd3d_shader_visibility visibility) -+{ -+ enum vkd3d_shader_type t = gen->program->shader_version.type; -+ -+ switch (visibility) -+ { -+ case VKD3D_SHADER_VISIBILITY_ALL: -+ return true; -+ case VKD3D_SHADER_VISIBILITY_VERTEX: -+ return t == VKD3D_SHADER_TYPE_VERTEX; -+ case VKD3D_SHADER_VISIBILITY_HULL: -+ return t == VKD3D_SHADER_TYPE_HULL; -+ case VKD3D_SHADER_VISIBILITY_DOMAIN: -+ return t == VKD3D_SHADER_TYPE_DOMAIN; -+ case VKD3D_SHADER_VISIBILITY_GEOMETRY: -+ return t == VKD3D_SHADER_TYPE_GEOMETRY; -+ case VKD3D_SHADER_VISIBILITY_PIXEL: -+ return t == VKD3D_SHADER_TYPE_PIXEL; -+ case VKD3D_SHADER_VISIBILITY_COMPUTE: -+ return t == VKD3D_SHADER_TYPE_COMPUTE; -+ default: -+ WARN("Invalid shader visibility %#x.\n", visibility); -+ return false; -+ } -+} -+ -+static bool msl_get_cbv_binding(const struct msl_generator *gen, -+ unsigned int register_space, unsigned int register_idx, unsigned int *binding_idx) -+{ -+ const struct vkd3d_shader_interface_info *interface_info = gen->interface_info; -+ const struct vkd3d_shader_resource_binding *binding; -+ unsigned int i; -+ -+ if (!interface_info) -+ return false; -+ -+ for (i = 0; i < interface_info->binding_count; ++i) -+ { -+ binding = &interface_info->bindings[i]; -+ -+ if (binding->type != VKD3D_SHADER_DESCRIPTOR_TYPE_CBV) -+ continue; -+ if (binding->register_space != register_space) -+ continue; -+ if (binding->register_index != register_idx) -+ continue; -+ if (!msl_check_shader_visibility(gen, binding->shader_visibility)) -+ continue; -+ if (!(binding->flags & VKD3D_SHADER_BINDING_FLAG_BUFFER)) -+ continue; -+ *binding_idx = i; -+ return true; -+ } -+ -+ return false; -+} -+ -+static void msl_generate_cbv_declaration(struct msl_generator *gen, -+ const struct vkd3d_shader_descriptor_info1 *cbv) -+{ -+ const struct vkd3d_shader_descriptor_binding *binding; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ unsigned int binding_idx; -+ size_t size; -+ -+ if (cbv->count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "Constant buffer %u has unsupported descriptor array size %u.", cbv->register_id, cbv->count); -+ return; -+ } -+ -+ if (!msl_get_cbv_binding(gen, cbv->register_space, cbv->register_index, &binding_idx)) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "No descriptor binding specified for constant buffer %u.", cbv->register_id); -+ return; -+ } -+ -+ binding = &gen->interface_info->bindings[binding_idx].binding; -+ -+ if (binding->set != 0) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "Unsupported binding set %u specified for constant buffer %u.", binding->set, cbv->register_id); -+ return; -+ } -+ -+ if (binding->count != 1) -+ { -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND, -+ "Unsupported binding count %u specified for constant buffer %u.", binding->count, cbv->register_id); -+ return; -+ } -+ -+ size = align(cbv->buffer_size, VKD3D_VEC4_SIZE * sizeof(uint32_t)); -+ size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); -+ -+ vkd3d_string_buffer_printf(buffer, -+ "constant vkd3d_vec4 (&cb_%u)[%zu] [[id(%u)]];", cbv->register_id, size, binding->binding); -+}; -+ -+static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen) -+{ -+ const struct vkd3d_shader_scan_descriptor_info1 *info = gen->descriptor_info; -+ const struct vkd3d_shader_descriptor_info1 *descriptor; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ unsigned int i; -+ -+ if (!info->descriptor_count) -+ return; -+ -+ vkd3d_string_buffer_printf(buffer, "struct vkd3d_%s_descriptors\n{\n", gen->prefix); -+ -+ for (i = 0; i < info->descriptor_count; ++i) -+ { -+ descriptor = &info->descriptors[i]; -+ -+ msl_print_indent(buffer, 1); -+ switch (descriptor->type) -+ { -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -+ msl_generate_cbv_declaration(gen, descriptor); -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(buffer, "/* */", descriptor->type); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled descriptor type %#x.", descriptor->type); -+ break; -+ } -+ vkd3d_string_buffer_printf(buffer, "\n"); -+ } -+ -+ vkd3d_string_buffer_printf(buffer, "};\n\n"); -+} -+ - static void msl_generate_input_struct_declarations(struct msl_generator *gen) - { - const struct shader_signature *signature = &gen->program->input_signature; -@@ -550,9 +748,15 @@ static void msl_generate_entrypoint(struct msl_generator *gen) - - vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_out shader_entry(\n", gen->prefix); - -- /* TODO: descriptor declaration */ -+ if (gen->descriptor_info->descriptor_count) -+ { -+ msl_print_indent(gen->buffer, 2); -+ /* TODO: Configurable argument buffer binding location. */ -+ vkd3d_string_buffer_printf(gen->buffer, -+ "constant vkd3d_%s_descriptors& descriptors [[buffer(0)]],\n", gen->prefix); -+ } - -- msl_print_indent(gen->buffer, 1); -+ msl_print_indent(gen->buffer, 2); - vkd3d_string_buffer_printf(gen->buffer, "vkd3d_%s_in input [[stage_in]])\n{\n", gen->prefix); - - /* TODO: declare #maximum_register + 1 */ -@@ -562,7 +766,10 @@ static void msl_generate_entrypoint(struct msl_generator *gen) - - msl_generate_entrypoint_prologue(gen); - -- vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out);\n", gen->prefix, gen->prefix, gen->prefix); -+ vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); -+ if (gen->descriptor_info->descriptor_count) -+ vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); -+ vkd3d_string_buffer_printf(gen->buffer, ");\n"); - - msl_generate_entrypoint_epilogue(gen); - -@@ -583,13 +790,17 @@ static void msl_generator_generate(struct msl_generator *gen) - vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); - vkd3d_string_buffer_printf(gen->buffer, " float4 f;\n};\n\n"); - -+ msl_generate_descriptor_struct_declarations(gen); - msl_generate_input_struct_declarations(gen); - msl_generate_output_struct_declarations(gen); - - vkd3d_string_buffer_printf(gen->buffer, - "void %s_main(thread vkd3d_vec4 *v, " -- "thread vkd3d_vec4 *o)\n{\n", -+ "thread vkd3d_vec4 *o", - gen->prefix); -+ if (gen->descriptor_info->descriptor_count) -+ vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); -+ vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); - - ++gen->indent; - -@@ -621,6 +832,8 @@ static void msl_generator_cleanup(struct msl_generator *gen) - } - - static int msl_generator_init(struct msl_generator *gen, struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - struct vkd3d_shader_message_context *message_context) - { - enum vkd3d_shader_type type = program->shader_version.type; -@@ -640,11 +853,14 @@ static int msl_generator_init(struct msl_generator *gen, struct vsir_program *pr - "Internal compiler error: Unhandled shader type %#x.", type); - return VKD3D_ERROR_INVALID_SHADER; - } -+ gen->interface_info = vkd3d_find_struct(compile_info->next, INTERFACE_INFO); -+ gen->descriptor_info = descriptor_info; - - return VKD3D_OK; - } - - int msl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) - { - struct msl_generator generator; -@@ -653,7 +869,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, - if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; - -- if ((ret = msl_generator_init(&generator, program, message_context)) < 0) -+ if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) - return ret; - msl_generator_generate(&generator); - msl_generator_cleanup(&generator); -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 692432d5513..1efd717e970 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -2662,8 +2662,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) - { - compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); -- compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count -- && compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY; - - compiler->shader_interface = *shader_interface; - if (shader_interface->push_constant_buffer_count) -@@ -2690,6 +2688,11 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - } - } - -+ if (compiler->shader_type == VKD3D_SHADER_TYPE_VERTEX) -+ compiler->emit_point_size = true; -+ else if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY) -+ compiler->emit_point_size = compiler->xfb_info && compiler->xfb_info->element_count; -+ - compiler->scan_descriptor_info = scan_descriptor_info; - - compiler->phase = VKD3DSIH_INVALID; -@@ -4860,6 +4863,10 @@ static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = - { - VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup, - }; -+static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = -+{ -+ VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize, -+}; - static const struct - { - enum vkd3d_shader_register_type reg_type; -@@ -5449,7 +5456,11 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); - VKD3D_ASSERT(reg->idx_count < 2); - -- if (!(builtin = get_spirv_builtin_for_register(reg->type))) -+ if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -+ { -+ builtin = &vkd3d_output_point_size_builtin; -+ } -+ else if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { - FIXME("Unhandled register %#x.\n", reg->type); - return; -@@ -6746,7 +6757,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi - - static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) - { -- static const struct vkd3d_spirv_builtin point_size = {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInPointSize}; -+ if (compiler->program->has_point_size) -+ return; - - /* Set the point size. Point sprites are not supported in d3d10+, but - * point primitives can still be used with e.g. stream output. Vulkan -@@ -6760,7 +6772,8 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) - || compiler->write_tess_geom_point_size) - { - vkd3d_spirv_build_op_store(&compiler->spirv_builder, -- spirv_compiler_emit_builtin_variable(compiler, &point_size, SpvStorageClassOutput, 0), -+ spirv_compiler_emit_builtin_variable(compiler, -+ &vkd3d_output_point_size_builtin, SpvStorageClassOutput, 0), - spirv_compiler_get_constant_float(compiler, 1.0f), SpvMemoryAccessMaskNone); - } - } -@@ -7427,7 +7440,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - uint32_t components[VKD3D_VEC4_SIZE]; - - if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA -- || dst->modifiers || src->modifiers) -+ || src->reg.type == VKD3DSPR_PARAMETER || dst->modifiers || src->modifiers) - goto general_implementation; - - spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); -@@ -10538,7 +10551,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_UAV_RAW: - case VKD3DSIH_DCL_UAV_STRUCTURED: -- case VKD3DSIH_DCL_UAV_TYPED: - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_NOP: - /* nothing to do */ -@@ -10575,6 +10587,15 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) - else - spirv_compiler_emit_input(compiler, VKD3DSPR_PATCHCONST, i); - } -+ -+ if (compiler->program->has_point_size) -+ { -+ struct vkd3d_shader_dst_param dst; -+ -+ vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ spirv_compiler_emit_output_register(compiler, &dst); -+ } - } - - static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index f84ac551272..cde8dc3146c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1655,7 +1655,10 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - break; - - case VKD3D_SHADER_TARGET_MSL: -- ret = msl_compile(program, config_flags, compile_info, message_context); -+ if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) -+ return ret; -+ ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, message_context); -+ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - break; - - default: -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 41b879af4b4..9ca3c328147 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -60,6 +60,7 @@ - #define VKD3D_DVEC2_SIZE 2 - - #define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) -+#define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1) - - enum vkd3d_shader_error - { -@@ -250,6 +251,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, - - VKD3D_SHADER_ERROR_MSL_INTERNAL = 10000, -+ VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND = 10001, - }; - - enum vkd3d_shader_opcode -@@ -648,6 +650,13 @@ enum vkd3d_shader_register_type - VKD3DSPR_INVALID = ~0u, - }; - -+enum vsir_rastout_register -+{ -+ VSIR_RASTOUT_POSITION = 0x0, -+ VSIR_RASTOUT_FOG = 0x1, -+ VSIR_RASTOUT_POINT_SIZE = 0x2, -+}; -+ - enum vkd3d_shader_register_precision - { - VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, -@@ -1400,6 +1409,7 @@ struct vsir_program - unsigned int temp_count; - unsigned int ssa_count; - bool use_vocp; -+ bool has_point_size; - enum vsir_control_flow_type cf_type; - - const char **block_names; -@@ -1617,6 +1627,7 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - - int msl_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - - enum vkd3d_md5_variant --- -2.45.2 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch deleted file mode 100644 index 15cd3ee5..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch +++ /dev/null @@ -1,1465 +0,0 @@ -From b3447b3e9de88f8796756d62f595ad31e04bf7da Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 15 Oct 2024 07:31:45 +1100 -Subject: [PATCH] Updated vkd3d to 9dd42d15ddca66458042b5e4b7775fa054b4b0a2. - ---- - libs/vkd3d/include/vkd3d_shader.h | 5 + - libs/vkd3d/libs/vkd3d-shader/fx.c | 504 ++++++++++++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 678 +++++++++++------- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 14 + - .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + - 5 files changed, 909 insertions(+), 297 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index e22f236ecd1..5c0d13ea9e2 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -1181,6 +1181,11 @@ enum vkd3d_shader_source_type - * the format used for Direct3D shader model 6 shaders. \since 1.9 - */ - VKD3D_SHADER_SOURCE_DXBC_DXIL, -+ /** -+ * Binary format used by Direct3D 9/10.x/11 effects. -+ * Input is a raw FX section without container. \since 1.14 -+ */ -+ VKD3D_SHADER_SOURCE_FX, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index cc18857a010..e98dfcf4f32 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -470,26 +470,48 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) - return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; - } - --static const uint32_t fx_4_numeric_base_type[] = -+enum fx_4_type_constants -+{ -+ /* Numeric types encoding */ -+ FX_4_NUMERIC_TYPE_FLOAT = 1, -+ FX_4_NUMERIC_TYPE_INT = 2, -+ FX_4_NUMERIC_TYPE_UINT = 3, -+ FX_4_NUMERIC_TYPE_BOOL = 4, -+ -+ FX_4_NUMERIC_CLASS_SCALAR = 1, -+ FX_4_NUMERIC_CLASS_VECTOR = 2, -+ FX_4_NUMERIC_CLASS_MATRIX = 3, -+ -+ FX_4_NUMERIC_BASE_TYPE_SHIFT = 3, -+ FX_4_NUMERIC_ROWS_SHIFT = 8, -+ FX_4_NUMERIC_COLUMNS_SHIFT = 11, -+ FX_4_NUMERIC_COLUMN_MAJOR_MASK = 0x4000, -+ -+ /* Object types */ -+ FX_4_OBJECT_TYPE_STRING = 1, -+ -+ /* Types */ -+ FX_4_TYPE_CLASS_NUMERIC = 1, -+ FX_4_TYPE_CLASS_OBJECT = 2, -+ FX_4_TYPE_CLASS_STRUCT = 3, -+}; -+ -+static const uint32_t fx_4_numeric_base_types[] = - { -- [HLSL_TYPE_HALF] = 1, -- [HLSL_TYPE_FLOAT] = 1, -- [HLSL_TYPE_INT ] = 2, -- [HLSL_TYPE_UINT ] = 3, -- [HLSL_TYPE_BOOL ] = 4, -+ [HLSL_TYPE_HALF ] = FX_4_NUMERIC_TYPE_FLOAT, -+ [HLSL_TYPE_FLOAT] = FX_4_NUMERIC_TYPE_FLOAT, -+ [HLSL_TYPE_INT ] = FX_4_NUMERIC_TYPE_INT, -+ [HLSL_TYPE_UINT ] = FX_4_NUMERIC_TYPE_UINT, -+ [HLSL_TYPE_BOOL ] = FX_4_NUMERIC_TYPE_BOOL, - }; - - static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) - { -- static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; -- static const unsigned int NUMERIC_ROWS_SHIFT = 8; -- static const unsigned int NUMERIC_COLUMNS_SHIFT = 11; -- static const unsigned int NUMERIC_COLUMN_MAJOR_MASK = 0x4000; - static const uint32_t numeric_type_class[] = - { -- [HLSL_CLASS_SCALAR] = 1, -- [HLSL_CLASS_VECTOR] = 2, -- [HLSL_CLASS_MATRIX] = 3, -+ [HLSL_CLASS_SCALAR] = FX_4_NUMERIC_CLASS_SCALAR, -+ [HLSL_CLASS_VECTOR] = FX_4_NUMERIC_CLASS_VECTOR, -+ [HLSL_CLASS_MATRIX] = FX_4_NUMERIC_CLASS_MATRIX, - }; - struct hlsl_ctx *ctx = fx->ctx; - uint32_t value = 0; -@@ -513,17 +535,17 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: -- value |= (fx_4_numeric_base_type[type->e.numeric.type] << NUMERIC_BASE_TYPE_SHIFT); -+ value |= (fx_4_numeric_base_types[type->e.numeric.type] << FX_4_NUMERIC_BASE_TYPE_SHIFT); - break; - default: - hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->e.numeric.type); - return 0; - } - -- value |= (type->dimy & 0x7) << NUMERIC_ROWS_SHIFT; -- value |= (type->dimx & 0x7) << NUMERIC_COLUMNS_SHIFT; -+ value |= (type->dimy & 0x7) << FX_4_NUMERIC_ROWS_SHIFT; -+ value |= (type->dimx & 0x7) << FX_4_NUMERIC_COLUMNS_SHIFT; - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) -- value |= NUMERIC_COLUMN_MAJOR_MASK; -+ value |= FX_4_NUMERIC_COLUMN_MAJOR_MASK; - - return value; - } -@@ -651,7 +673,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: -- put_u32_unaligned(buffer, 1); -+ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_NUMERIC); - break; - - case HLSL_CLASS_DEPTH_STENCIL_STATE: -@@ -669,11 +691,11 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: - case HLSL_CLASS_STRING: -- put_u32_unaligned(buffer, 2); -+ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_OBJECT); - break; - - case HLSL_CLASS_STRUCT: -- put_u32_unaligned(buffer, 3); -+ put_u32_unaligned(buffer, FX_4_TYPE_CLASS_STRUCT); - break; - - case HLSL_CLASS_ARRAY: -@@ -794,7 +816,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - } - else if (element_type->class == HLSL_CLASS_STRING) - { -- put_u32_unaligned(buffer, 1); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_STRING); - } - else if (hlsl_is_numeric_type(element_type)) - { -@@ -1543,7 +1565,7 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: -- type = fx_4_numeric_base_type[data_type->e.numeric.type]; -+ type = fx_4_numeric_base_types[data_type->e.numeric.type]; - break; - default: - type = 0; -@@ -2814,3 +2836,441 @@ int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - vkd3d_unreachable(); - } - } -+ -+struct fx_parser -+{ -+ const uint8_t *ptr, *start, *end; -+ struct vkd3d_shader_message_context *message_context; -+ struct vkd3d_string_buffer buffer; -+ struct -+ { -+ const uint8_t *ptr; -+ const uint8_t *end; -+ uint32_t size; -+ } unstructured; -+ uint32_t buffer_count; -+ uint32_t object_count; -+ bool failed; -+}; -+ -+static uint32_t fx_parser_read_u32(struct fx_parser *parser) -+{ -+ uint32_t ret; -+ -+ if ((parser->end - parser->ptr) < sizeof(uint32_t)) -+ { -+ parser->failed = true; -+ return 0; -+ } -+ -+ ret = *(uint32_t *)parser->ptr; -+ parser->ptr += sizeof(uint32_t); -+ -+ return ret; -+} -+ -+static void fx_parser_read_u32s(struct fx_parser *parser, void *dst, size_t size) -+{ -+ uint32_t *ptr = dst; -+ size_t i; -+ -+ for (i = 0; i < size / sizeof(uint32_t); ++i) -+ ptr[i] = fx_parser_read_u32(parser); -+} -+ -+static void fx_parser_skip(struct fx_parser *parser, size_t size) -+{ -+ if ((parser->end - parser->ptr) < size) -+ { -+ parser->ptr = parser->end; -+ parser->failed = true; -+ return; -+ } -+ parser->ptr += size; -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, enum vkd3d_shader_error error, -+ const char *format, ...) -+{ -+ va_list args; -+ -+ va_start(args, format); -+ vkd3d_shader_verror(parser->message_context, NULL, error, format, args); -+ va_end(args); -+ -+ parser->failed = true; -+} -+ -+static int fx_2_parse(struct fx_parser *parser) -+{ -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); -+ -+ return -1; -+} -+ -+static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) -+{ -+ const uint8_t *ptr = parser->unstructured.ptr; -+ -+ memset(dst, 0, size); -+ if (offset >= parser->unstructured.size -+ || size > parser->unstructured.size - offset) -+ { -+ parser->failed = true; -+ return; -+ } -+ -+ ptr += offset; -+ memcpy(dst, ptr, size); -+} -+ -+static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset) -+{ -+ const uint8_t *ptr = parser->unstructured.ptr; -+ const uint8_t *end = parser->unstructured.end; -+ -+ if (offset >= parser->unstructured.size) -+ { -+ parser->failed = true; -+ return ""; -+ } -+ -+ ptr += offset; -+ -+ while (ptr < end && *ptr) -+ ++ptr; -+ -+ if (*ptr) -+ { -+ parser->failed = true; -+ return ""; -+ } -+ -+ return (const char *)(parser->unstructured.ptr + offset); -+} -+ -+static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) -+{ -+ struct fx_4_numeric_variable -+ { -+ uint32_t name; -+ uint32_t type; -+ uint32_t semantic; -+ uint32_t offset; -+ uint32_t value; -+ uint32_t flags; -+ } var; -+ struct fx_4_type -+ { -+ uint32_t name; -+ uint32_t class; -+ uint32_t element_count; -+ uint32_t unpacked_size; -+ uint32_t stride; -+ uint32_t packed_size; -+ uint32_t typeinfo; -+ } type; -+ const char *name, *semantic, *type_name; -+ uint32_t i; -+ -+ for (i = 0; i < count; ++i) -+ { -+ fx_parser_read_u32s(parser, &var, sizeof(var)); -+ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); -+ -+ name = fx_4_get_string(parser, var.name); -+ type_name = fx_4_get_string(parser, type.name); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, " %s %s", type_name, name); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ if (var.semantic) -+ { -+ semantic = fx_4_get_string(parser, var.semantic); -+ vkd3d_string_buffer_printf(&parser->buffer, " : %s", semantic); -+ } -+ if (var.value) -+ { -+ unsigned int base_type, comp_count; -+ size_t j; -+ -+ if (type.class == FX_4_TYPE_CLASS_NUMERIC) -+ base_type = (type.typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; -+ else -+ base_type = 0; -+ -+ vkd3d_string_buffer_printf(&parser->buffer, " = { "); -+ -+ comp_count = type.unpacked_size / sizeof(uint32_t); -+ for (j = 0; j < comp_count; ++j) -+ { -+ union hlsl_constant_value_component value; -+ -+ fx_parser_read_unstructured(parser, &value, var.value + j * sizeof(uint32_t), sizeof(uint32_t)); -+ -+ if (base_type == FX_4_NUMERIC_TYPE_FLOAT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); -+ else if (base_type == FX_4_NUMERIC_TYPE_INT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); -+ else if (base_type == FX_4_NUMERIC_TYPE_UINT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); -+ else if (base_type == FX_4_NUMERIC_TYPE_BOOL) -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); -+ else -+ vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); -+ -+ if (j < comp_count - 1) -+ vkd3d_string_buffer_printf(&parser->buffer, ", "); -+ } -+ -+ vkd3d_string_buffer_printf(&parser->buffer, " }"); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, "; // Offset: %u, size %u.\n", var.offset, type.unpacked_size); -+ -+ if (fx_parser_read_u32(parser)) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing annotations is not implemented.\n"); -+ return; -+ } -+ } -+} -+ -+static void fx_parse_buffers(struct fx_parser *parser) -+{ -+ struct fx_buffer -+ { -+ uint32_t name; -+ uint32_t size; -+ uint32_t flags; -+ uint32_t count; -+ uint32_t bind_point; -+ } buffer; -+ const char *name; -+ uint32_t i; -+ -+ if (parser->failed) -+ return; -+ -+ for (i = 0; i < parser->buffer_count; ++i) -+ { -+ fx_parser_read_u32s(parser, &buffer, sizeof(buffer)); -+ -+ name = fx_4_get_string(parser, buffer.name); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s\n", name); -+ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); -+ -+ if (fx_parser_read_u32(parser)) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing annotations is not implemented.\n"); -+ return; -+ } -+ -+ fx_parse_fx_4_numeric_variables(parser, buffer.count); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+ } -+} -+ -+static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) -+{ -+ const char *str = fx_4_get_string(parser, offset); -+ vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); -+} -+ -+static void fx_4_parse_objects(struct fx_parser *parser) -+{ -+ struct fx_4_object_variable -+ { -+ uint32_t name; -+ uint32_t type; -+ uint32_t semantic; -+ uint32_t bind_point; -+ } var; -+ struct fx_4_type -+ { -+ uint32_t name; -+ uint32_t class; -+ uint32_t element_count; -+ uint32_t unpacked_size; -+ uint32_t stride; -+ uint32_t packed_size; -+ uint32_t typeinfo; -+ } type; -+ uint32_t i, j, value, element_count; -+ const char *name, *type_name; -+ -+ if (parser->failed) -+ return; -+ -+ for (i = 0; i < parser->object_count; ++i) -+ { -+ fx_parser_read_u32s(parser, &var, sizeof(var)); -+ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); -+ -+ name = fx_4_get_string(parser, var.name); -+ type_name = fx_4_get_string(parser, type.name); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); -+ -+ element_count = max(type.element_count, 1); -+ for (j = 0; j < element_count; ++j) -+ { -+ switch (type.typeinfo) -+ { -+ case FX_4_OBJECT_TYPE_STRING: -+ vkd3d_string_buffer_printf(&parser->buffer, " "); -+ value = fx_parser_read_u32(parser); -+ fx_4_parse_string_initializer(parser, value); -+ break; -+ default: -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Parsing object type %u is not implemented.\n", type.typeinfo); -+ return; -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, ",\n"); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, "};\n"); -+ } -+} -+ -+static int fx_4_parse(struct fx_parser *parser) -+{ -+ struct fx_4_header -+ { -+ uint32_t version; -+ uint32_t buffer_count; -+ uint32_t numeric_variable_count; -+ uint32_t object_count; -+ uint32_t shared_buffer_count; -+ uint32_t shared_numeric_variable_count; -+ uint32_t shared_object_count; -+ uint32_t technique_count; -+ uint32_t unstructured_size; -+ uint32_t string_count; -+ uint32_t texture_count; -+ uint32_t depth_stencil_state_count; -+ uint32_t blend_state_count; -+ uint32_t rasterizer_state_count; -+ uint32_t sampler_state_count; -+ uint32_t rtv_count; -+ uint32_t dsv_count; -+ uint32_t shader_count; -+ uint32_t inline_shader_count; -+ } header; -+ -+ fx_parser_read_u32s(parser, &header, sizeof(header)); -+ parser->buffer_count = header.buffer_count; -+ parser->object_count = header.object_count; -+ -+ if (parser->end - parser->ptr < header.unstructured_size) -+ { -+ parser->failed = true; -+ return -1; -+ } -+ -+ parser->unstructured.ptr = parser->ptr; -+ parser->unstructured.end = parser->ptr + header.unstructured_size; -+ parser->unstructured.size = header.unstructured_size; -+ fx_parser_skip(parser, header.unstructured_size); -+ -+ fx_parse_buffers(parser); -+ fx_4_parse_objects(parser); -+ -+ return parser->failed ? - 1 : 0; -+} -+ -+static int fx_5_parse(struct fx_parser *parser) -+{ -+ struct fx_5_header -+ { -+ uint32_t version; -+ uint32_t buffer_count; -+ uint32_t numeric_variable_count; -+ uint32_t object_count; -+ uint32_t shared_buffer_count; -+ uint32_t shared_numeric_variable_count; -+ uint32_t shared_object_count; -+ uint32_t technique_count; -+ uint32_t unstructured_size; -+ uint32_t string_count; -+ uint32_t texture_count; -+ uint32_t depth_stencil_state_count; -+ uint32_t blend_state_count; -+ uint32_t rasterizer_state_count; -+ uint32_t sampler_state_count; -+ uint32_t rtv_count; -+ uint32_t dsv_count; -+ uint32_t shader_count; -+ uint32_t inline_shader_count; -+ uint32_t group_count; -+ uint32_t uav_count; -+ uint32_t interface_variable_count; -+ uint32_t interface_variable_element_count; -+ uint32_t class_instance_element_count; -+ } header; -+ -+ fx_parser_read_u32s(parser, &header, sizeof(header)); -+ parser->buffer_count = header.buffer_count; -+ parser->object_count = header.object_count; -+ -+ if (parser->end - parser->ptr < header.unstructured_size) -+ { -+ parser->failed = true; -+ return -1; -+ } -+ -+ parser->unstructured.ptr = parser->ptr; -+ parser->unstructured.end = parser->ptr + header.unstructured_size; -+ parser->unstructured.size = header.unstructured_size; -+ fx_parser_skip(parser, header.unstructured_size); -+ -+ fx_parse_buffers(parser); -+ fx_4_parse_objects(parser); -+ -+ return parser->failed ? - 1 : 0; -+} -+ -+int fx_parse(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct fx_parser parser = -+ { -+ .start = compile_info->source.code, -+ .ptr = compile_info->source.code, -+ .end = (uint8_t *)compile_info->source.code + compile_info->source.size, -+ .message_context = message_context, -+ }; -+ uint32_t version; -+ int ret; -+ -+ vkd3d_string_buffer_init(&parser.buffer); -+ -+ if (parser.end - parser.start < sizeof(version)) -+ return -1; -+ version = *(uint32_t *)parser.ptr; -+ -+ switch (version) -+ { -+ case 0xfeff0901: -+ ret = fx_2_parse(&parser); -+ break; -+ case 0xfeff1001: -+ case 0xfeff1011: -+ ret = fx_4_parse(&parser); -+ break; -+ case 0xfeff2001: -+ ret = fx_5_parse(&parser); -+ break; -+ default: -+ fx_parser_error(&parser, VKD3D_SHADER_ERROR_FX_INVALID_VERSION, -+ "Invalid effect binary version value 0x%08x.", version); -+ ret = -1; -+ } -+ -+ vkd3d_shader_code_from_string_buffer(out, &parser.buffer); -+ -+ return ret; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 9d24126fba8..0bcc3d0a1f7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -6180,6 +6180,287 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c - ctx->status = VKD3D_ERROR_INVALID_SHADER; - } - -+static void vsir_validate_register_without_indices(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->idx_count != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+} -+ -+static void vsir_validate_temp_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ struct validation_context_temp_data *data; -+ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a TEMP register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a TEMP register."); -+ -+ if (reg->idx[0].offset >= ctx->program->temp_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "TEMP register index %u exceeds the maximum count %u.", -+ reg->idx[0].offset, ctx->program->temp_count); -+ return; -+ } -+ -+ data = &ctx->temps[reg->idx[0].offset]; -+ -+ if (reg->dimension == VSIR_DIMENSION_NONE) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension NONE for a TEMP register."); -+ return; -+ } -+ -+ /* TEMP registers can be scalar or vec4, provided that -+ * each individual register always appears with the same -+ * dimension. */ -+ if (data->dimension == VSIR_DIMENSION_NONE) -+ { -+ data->dimension = reg->dimension; -+ data->first_seen = ctx->instruction_idx; -+ } -+ else if (data->dimension != reg->dimension) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a TEMP register: " -+ "it has already been seen with dimension %#x at instruction %zu.", -+ reg->dimension, data->dimension, data->first_seen); -+ } -+} -+ -+static void vsir_validate_rastout_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a RASTOUT register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a RASTOUT register."); -+ -+ if (reg->idx[0].offset >= 3) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid offset for a RASTOUT register."); -+} -+ -+static void vsir_validate_misctype_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a MISCTYPE register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a MISCTYPE register."); -+ -+ if (reg->idx[0].offset >= 2) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid offset for a MISCTYPE register."); -+} -+ -+static void vsir_validate_label_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a LABEL register.", reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a LABEL register.", reg->data_type); -+ -+ if (reg->dimension != VSIR_DIMENSION_NONE) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a LABEL register.", reg->dimension); -+ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a LABEL register.", reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a LABEL register."); -+ -+ /* Index == 0 is invalid, but it is temporarily allowed -+ * for intermediate stages. Once we support validation -+ * dialects we can selectively check for that. */ -+ if (reg->idx[0].offset > ctx->program->block_count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "LABEL register index %u exceeds the maximum count %u.", -+ reg->idx[0].offset, ctx->program->block_count); -+} -+ -+static void vsir_validate_sampler_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a SAMPLER register.", reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a SAMPLER register.", reg->data_type); -+ -+ /* VEC4 is allowed in gather operations. */ -+ if (reg->dimension == VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension SCALAR for a SAMPLER register."); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a SAMPLER register.", reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a SAMPLER register."); -+} -+ -+static void vsir_validate_resource_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a RESOURCE register.", reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a RESOURCE register.", reg->data_type); -+ -+ if (reg->dimension != VSIR_DIMENSION_VEC4) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a RESOURCE register.", reg->dimension); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a RESOURCE register.", reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a RESOURCE register."); -+} -+ -+static void vsir_validate_uav_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -+ "Invalid precision %#x for a UAV register.", -+ reg->precision); -+ -+ if (reg->data_type != VKD3D_DATA_UNUSED) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a UAV register.", -+ reg->data_type); -+ -+ /* NONE is allowed in counter operations. */ -+ if (reg->dimension == VSIR_DIMENSION_SCALAR) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a UAV register.", -+ reg->dimension); -+ -+ if (reg->idx_count != 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a UAV register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the descriptor index of a UAV register."); -+} -+ -+static void vsir_validate_ssa_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ struct validation_context_ssa_data *data; -+ -+ if (reg->idx_count != 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a SSA register.", -+ reg->idx_count); -+ return; -+ } -+ -+ if (reg->idx[0].rel_addr) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for a SSA register."); -+ -+ if (reg->idx[0].offset >= ctx->program->ssa_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "SSA register index %u exceeds the maximum count %u.", -+ reg->idx[0].offset, ctx->program->ssa_count); -+ return; -+ } -+ -+ data = &ctx->ssas[reg->idx[0].offset]; -+ -+ if (reg->dimension == VSIR_DIMENSION_NONE) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension NONE for a SSA register."); -+ return; -+ } -+ -+ /* SSA registers can be scalar or vec4, provided that each -+ * individual register always appears with the same -+ * dimension. */ -+ if (data->dimension == VSIR_DIMENSION_NONE) -+ { -+ data->dimension = reg->dimension; -+ data->data_type = reg->data_type; -+ data->first_seen = ctx->instruction_idx; -+ } -+ else -+ { -+ if (data->dimension != reg->dimension) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -+ "Invalid dimension %#x for a SSA register: " -+ "it has already been seen with dimension %#x at instruction %zu.", -+ reg->dimension, data->dimension, data->first_seen); -+ -+ if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid data type %#x for a SSA register: " -+ "it has already been seen with data type %#x at instruction %zu.", -+ reg->data_type, data->data_type, data->first_seen); -+ } -+} -+ - static void vsir_validate_src_param(struct validation_context *ctx, - const struct vkd3d_shader_src_param *src); - -@@ -6218,298 +6499,59 @@ static void vsir_validate_register(struct validation_context *ctx, - switch (reg->type) - { - case VKD3DSPR_TEMP: -- { -- struct validation_context_temp_data *data; -- -- if (reg->idx_count != 1) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a TEMP register.", -- reg->idx_count); -- break; -- } -- -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); -- -- if (reg->idx[0].offset >= ctx->program->temp_count) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->program->temp_count); -- break; -- } -- -- data = &ctx->temps[reg->idx[0].offset]; -- -- if (reg->dimension == VSIR_DIMENSION_NONE) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a TEMP register."); -- break; -- } -- -- /* TEMP registers can be scalar or vec4, provided that -- * each individual register always appears with the same -- * dimension. */ -- if (data->dimension == VSIR_DIMENSION_NONE) -- { -- data->dimension = reg->dimension; -- data->first_seen = ctx->instruction_idx; -- } -- else if (data->dimension != reg->dimension) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a TEMP register: " -- "it has already been seen with dimension %#x at instruction %zu.", -- reg->dimension, data->dimension, data->first_seen); -- } -+ vsir_validate_temp_register(ctx, reg); - break; -- } -- -- case VKD3DSPR_SSA: -- { -- struct validation_context_ssa_data *data; -- -- if (reg->idx_count != 1) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a SSA register.", -- reg->idx_count); -- break; -- } -- -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a SSA register."); - -- if (reg->idx[0].offset >= ctx->program->ssa_count) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "SSA register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->program->ssa_count); -- break; -- } -- -- data = &ctx->ssas[reg->idx[0].offset]; -- -- if (reg->dimension == VSIR_DIMENSION_NONE) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register."); -- break; -- } -- -- /* SSA registers can be scalar or vec4, provided that each -- * individual register always appears with the same -- * dimension. */ -- if (data->dimension == VSIR_DIMENSION_NONE) -- { -- data->dimension = reg->dimension; -- data->data_type = reg->data_type; -- data->first_seen = ctx->instruction_idx; -- } -- else -- { -- if (data->dimension != reg->dimension) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a SSA register: " -- "it has already been seen with dimension %#x at instruction %zu.", -- reg->dimension, data->dimension, data->first_seen); -- -- if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type)) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a SSA register: " -- "it has already been seen with data type %#x at instruction %zu.", -- reg->data_type, data->data_type, data->first_seen); -- } -+ case VKD3DSPR_RASTOUT: -+ vsir_validate_rastout_register(ctx, reg); - break; -- } - -- case VKD3DSPR_LABEL: -- if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", -- reg->precision); -- -- if (reg->data_type != VKD3D_DATA_UNUSED) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", -- reg->data_type); -- -- if (reg->dimension != VSIR_DIMENSION_NONE) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.", -- reg->dimension); -- -- if (reg->idx_count != 1) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.", -- reg->idx_count); -- break; -- } -+ case VKD3DSPR_DEPTHOUT: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a LABEL register."); -- -- /* Index == 0 is invalid, but it is temporarily allowed -- * for intermediate stages. Once we support validation -- * dialects we can selectively check for that. */ -- if (reg->idx[0].offset > ctx->program->block_count) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "LABEL register index %u exceeds the maximum count %u.", -- reg->idx[0].offset, ctx->program->block_count); -+ case VKD3DSPR_MISCTYPE: -+ vsir_validate_misctype_register(ctx, reg); - break; - -- case VKD3DSPR_NULL: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.", -- reg->idx_count); -+ case VKD3DSPR_LABEL: -+ vsir_validate_label_register(ctx, reg); - break; - - case VKD3DSPR_IMMCONST: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.", -- reg->idx_count); -+ vsir_validate_register_without_indices(ctx, reg); - break; - - case VKD3DSPR_IMMCONST64: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.", -- reg->idx_count); -+ vsir_validate_register_without_indices(ctx, reg); - break; - -- case VKD3DSPR_SAMPLER: -- if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -- "Invalid precision %#x for a SAMPLER register.", -- reg->precision); -- -- if (reg->data_type != VKD3D_DATA_UNUSED) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -- "Invalid data type %#x for a SAMPLER register.", -- reg->data_type); -- -- /* VEC4 is allowed in gather operations. */ -- if (reg->dimension == VSIR_DIMENSION_SCALAR) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -- "Invalid dimension SCALAR for a SAMPLER register."); -- -- if (reg->idx_count != 2) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a SAMPLER register.", -- reg->idx_count); -- break; -- } -+ case VKD3DSPR_NULL: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; - -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for the descriptor index of a SAMPLER register."); -+ case VKD3DSPR_SAMPLER: -+ vsir_validate_sampler_register(ctx, reg); - break; - - case VKD3DSPR_RESOURCE: -- if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -- "Invalid precision %#x for a RESOURCE register.", -- reg->precision); -- -- if (reg->data_type != VKD3D_DATA_UNUSED) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -- "Invalid data type %#x for a RESOURCE register.", -- reg->data_type); -- -- if (reg->dimension != VSIR_DIMENSION_VEC4) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -- "Invalid dimension %#x for a RESOURCE register.", -- reg->dimension); -- -- if (reg->idx_count != 2) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a RESOURCE register.", -- reg->idx_count); -- break; -- } -- -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for the descriptor index of a RESOURCE register."); -+ vsir_validate_resource_register(ctx, reg); - break; - - case VKD3DSPR_UAV: -- if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, -- "Invalid precision %#x for a UAV register.", -- reg->precision); -- -- if (reg->data_type != VKD3D_DATA_UNUSED) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -- "Invalid data type %#x for a UAV register.", -- reg->data_type); -- -- /* NONE is allowed in counter operations. */ -- if (reg->dimension == VSIR_DIMENSION_SCALAR) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, -- "Invalid dimension %#x for a UAV register.", -- reg->dimension); -- -- if (reg->idx_count != 2) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a UAV register.", -- reg->idx_count); -- break; -- } -- -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for the descriptor index of a UAV register."); -- break; -- -- case VKD3DSPR_DEPTHOUT: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a DEPTHOUT register.", -- reg->idx_count); -+ vsir_validate_uav_register(ctx, reg); - break; - - case VKD3DSPR_DEPTHOUTGE: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a DEPTHOUTGE register.", -- reg->idx_count); -+ vsir_validate_register_without_indices(ctx, reg); - break; - - case VKD3DSPR_DEPTHOUTLE: -- if (reg->idx_count != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a DEPTHOUTLE register.", -- reg->idx_count); -- break; -- -- case VKD3DSPR_RASTOUT: -- if (reg->idx_count != 1) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a RASTOUT register.", -- reg->idx_count); -- break; -- } -- -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for a RASTOUT register."); -- -- if (reg->idx[0].offset >= 3) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Invalid offset for a RASTOUT register."); -+ vsir_validate_register_without_indices(ctx, reg); - break; - -- case VKD3DSPR_MISCTYPE: -- if (reg->idx_count != 1) -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -- "Invalid index count %u for a MISCTYPE register.", -- reg->idx_count); -- break; -- } -- -- if (reg->idx[0].rel_addr) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Non-NULL relative address for a MISCTYPE register."); -- -- if (reg->idx[0].offset >= 2) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -- "Invalid offset for a MISCTYPE register."); -+ case VKD3DSPR_SSA: -+ vsir_validate_ssa_register(ctx, reg); - break; - - default: -@@ -6707,20 +6749,60 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, - return true; - } - -+enum vsir_signature_type -+{ -+ SIGNATURE_TYPE_INPUT, -+ SIGNATURE_TYPE_OUTPUT, -+ SIGNATURE_TYPE_PATCH_CONSTANT, -+}; -+ -+static const char * const signature_type_names[] = -+{ -+ [SIGNATURE_TYPE_INPUT] = "input", -+ [SIGNATURE_TYPE_OUTPUT] = "output", -+ [SIGNATURE_TYPE_PATCH_CONSTANT] = "patch constant", -+}; -+ -+#define PS_BIT (1u << VKD3D_SHADER_TYPE_PIXEL) -+#define VS_BIT (1u << VKD3D_SHADER_TYPE_VERTEX) -+#define GS_BIT (1u << VKD3D_SHADER_TYPE_GEOMETRY) -+#define HS_BIT (1u << VKD3D_SHADER_TYPE_HULL) -+#define DS_BIT (1u << VKD3D_SHADER_TYPE_DOMAIN) -+#define CS_BIT (1u << VKD3D_SHADER_TYPE_COMPUTE) -+ -+static const struct sysval_validation_data_element -+{ -+ unsigned int input; -+ unsigned int output; -+ unsigned int patch_constant; -+ enum vkd3d_shader_component_type data_type; -+ unsigned int component_count; -+} -+sysval_validation_data[] = -+{ -+ [VKD3D_SHADER_SV_POSITION] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, -+ VKD3D_SHADER_COMPONENT_FLOAT, 4}, -+ [VKD3D_SHADER_SV_CLIP_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, -+ VKD3D_SHADER_COMPONENT_FLOAT, 4}, -+ [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, -+ VKD3D_SHADER_COMPONENT_FLOAT, 4}, -+}; -+ - static void vsir_validate_signature_element(struct validation_context *ctx, -- const struct shader_signature *signature, const char *signature_type, -+ const struct shader_signature *signature, enum vsir_signature_type signature_type, - unsigned int idx) - { -+ const char *signature_type_name = signature_type_names[signature_type]; - const struct signature_element *element = &signature->elements[idx]; - bool integer_type = false; - - if (element->register_count == 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -- "element %u of %s signature: Invalid zero register count.", idx, signature_type); -+ "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); - - if (element->mask == 0 || (element->mask & ~0xf)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -- "element %u of %s signature: Invalid mask %#x.", idx, signature_type, element->mask); -+ "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); - - /* Here we'd likely want to validate that the usage mask is a subset of the - * signature mask. Unfortunately the D3DBC parser sometimes violates this. -@@ -6744,7 +6826,7 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - if (element->used_mask & ~0xf) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid usage mask %#x.", -- idx, signature_type, element->used_mask); -+ idx, signature_type_name, element->used_mask); - - switch (element->sysval_semantic) - { -@@ -6776,10 +6858,56 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - default: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid system value semantic %#x.", -- idx, signature_type, element->sysval_semantic); -+ idx, signature_type_name, element->sysval_semantic); - break; - } - -+ if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) -+ { -+ const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; -+ -+ if (data->input || data->output || data->patch_constant) -+ { -+ unsigned int mask; -+ -+ switch (signature_type) -+ { -+ case SIGNATURE_TYPE_INPUT: -+ mask = data->input; -+ break; -+ -+ case SIGNATURE_TYPE_OUTPUT: -+ mask = data->output; -+ break; -+ -+ case SIGNATURE_TYPE_PATCH_CONSTANT: -+ mask = data->patch_constant; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!(mask & (1u << ctx->program->shader_version.type))) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid system value semantic %#x.", -+ idx, signature_type_name, element->sysval_semantic); -+ } -+ -+ if (data->component_count != 0) -+ { -+ if (element->component_type != data->data_type) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid data type %#x for system value semantic %#x.", -+ idx, signature_type_name, element->component_type, element->sysval_semantic); -+ -+ if (vsir_write_mask_component_count(element->mask) > data->component_count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid mask %#x for system value semantic %#x.", -+ idx, signature_type_name, element->mask, element->sysval_semantic); -+ } -+ } -+ - switch (element->component_type) - { - case VKD3D_SHADER_COMPONENT_INT: -@@ -6793,29 +6921,29 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - default: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid component type %#x.", -- idx, signature_type, element->component_type); -+ idx, signature_type_name, element->component_type); - break; - } - - if (element->min_precision >= VKD3D_SHADER_MINIMUM_PRECISION_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid minimum precision %#x.", -- idx, signature_type, element->min_precision); -+ idx, signature_type_name, element->min_precision); - - if (element->interpolation_mode >= VKD3DSIM_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid interpolation mode %#x.", -- idx, signature_type, element->interpolation_mode); -+ idx, signature_type_name, element->interpolation_mode); - - if (integer_type && element->interpolation_mode != VKD3DSIM_NONE - && element->interpolation_mode != VKD3DSIM_CONSTANT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", -- idx, signature_type, element->interpolation_mode); -+ idx, signature_type_name, element->interpolation_mode); - } - - static void vsir_validate_signature(struct validation_context *ctx, -- const struct shader_signature *signature, const char *signature_type) -+ const struct shader_signature *signature, enum vsir_signature_type signature_type) - { - unsigned int i; - -@@ -7373,9 +7501,9 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - "Patch constant signature is only valid for hull and domain shaders."); - } - -- vsir_validate_signature(&ctx, &program->input_signature, "input"); -- vsir_validate_signature(&ctx, &program->output_signature, "output"); -- vsir_validate_signature(&ctx, &program->patch_constant_signature, "patch constant"); -+ vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); -+ vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); -+ vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); - - if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) - goto fail; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index cde8dc3146c..ca012d4948a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1710,6 +1710,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - { - ret = compile_hlsl(compile_info, out, &message_context); - } -+ else if (compile_info->source_type == VKD3D_SHADER_SOURCE_FX) -+ { -+ ret = fx_parse(compile_info, out, &message_context); -+ } - else - { - uint64_t config_flags = vkd3d_shader_init_config_flags(); -@@ -1942,6 +1946,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns - #ifdef VKD3D_SHADER_UNSUPPORTED_DXIL - VKD3D_SHADER_SOURCE_DXBC_DXIL, - #endif -+ VKD3D_SHADER_SOURCE_FX, - }; - - TRACE("count %p.\n", count); -@@ -2000,6 +2005,11 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - }; - #endif - -+ static const enum vkd3d_shader_target_type fx_types[] = -+ { -+ VKD3D_SHADER_TARGET_D3D_ASM, -+ }; -+ - TRACE("source_type %#x, count %p.\n", source_type, count); - - switch (source_type) -@@ -2022,6 +2032,10 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - return dxbc_dxil_types; - #endif - -+ case VKD3D_SHADER_SOURCE_FX: -+ *count = ARRAY_SIZE(fx_types); -+ return fx_types; -+ - default: - *count = 0; - return NULL; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 9ca3c328147..54b87373ed1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -252,6 +252,9 @@ enum vkd3d_shader_error - - VKD3D_SHADER_ERROR_MSL_INTERNAL = 10000, - VKD3D_SHADER_ERROR_MSL_BINDING_NOT_FOUND = 10001, -+ -+ VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED = 11000, -+ VKD3D_SHADER_ERROR_FX_INVALID_VERSION = 11001, - }; - - enum vkd3d_shader_opcode -@@ -1605,6 +1608,8 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - struct vkd3d_shader_message_context *message_context, struct vsir_program *program); - int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, - struct vkd3d_shader_message_context *message_context, struct vsir_program *program); -+int fx_parse(const struct vkd3d_shader_compile_info *compile_info, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - - void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); - --- -2.45.2 - diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-c89e547e3ef767da28be46bc37ac2ba71ea.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-c89e547e3ef767da28be46bc37ac2ba71ea.patch deleted file mode 100644 index 42c1f4b5..00000000 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-c89e547e3ef767da28be46bc37ac2ba71ea.patch +++ /dev/null @@ -1,2764 +0,0 @@ -From 6bcdf76e80406cdbc80e1fb9596b65db4f5fa1a4 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 17 Oct 2024 06:55:39 +1100 -Subject: [PATCH] Updated vkd3d to c89e547e3ef767da28be46bc37ac2ba71eabdbbe. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 10 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 + - libs/vkd3d/libs/vkd3d-shader/glsl.c | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 35 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 109 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 483 ++++++++---- - libs/vkd3d/libs/vkd3d-shader/ir.c | 50 ++ - libs/vkd3d/libs/vkd3d-shader/tpf.c | 691 +++++++++++------- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 12 +- - 10 files changed, 951 insertions(+), 443 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 44b1714b56b..8f7e5cb666b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1395,7 +1395,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - return VKD3D_OK; - } - --bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, - unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) - { - unsigned int i; -@@ -1463,7 +1463,7 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, - return false; - } - --bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -+bool sm1_usage_from_semantic_name(const char *semantic_name, - uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) - { - static const struct -@@ -1780,7 +1780,7 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff - size_offset = put_u32(buffer, 0); - ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - -- ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); -+ ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ - creator_offset = put_u32(buffer, 0); - put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_u32(buffer, uniform_count); -@@ -2289,7 +2289,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, - uint32_t token, usage_idx; - bool ret; - -- if (hlsl_sm1_register_from_semantic(version, element->semantic_name, -+ if (sm1_register_from_semantic_name(version, element->semantic_name, - element->semantic_index, output, ®.type, ®.reg)) - { - usage = 0; -@@ -2297,7 +2297,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, - } - else - { -- ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); -+ ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); - VKD3D_ASSERT(ret); - reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; - reg.reg = element->register_index; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 5db9d6da063..59494d2500d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -5872,6 +5872,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr - return; - } - e = &signature->elements[row_index]; -+ if (!e->sysval_semantic) -+ column_index += vsir_write_mask_get_component_idx(e->mask); - - if (column_index >= VKD3D_VEC4_SIZE) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index c8efdae3386..1e17de54ede 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -1293,6 +1293,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - break; - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_INPUT_PS: -+ case VKD3DSIH_DCL_INPUT_PS_SGV: - case VKD3DSIH_DCL_INPUT_PS_SIV: - case VKD3DSIH_DCL_INPUT_SGV: - case VKD3DSIH_DCL_OUTPUT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index b44c0296f69..6ad0117fd5c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2545,6 +2545,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, - decl->return_type = return_type; - decl->parameters = *parameters; - decl->loc = *loc; -+ list_init(&decl->extern_vars); - - if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 4082b14fe04..efe3aec024b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -488,6 +488,8 @@ struct hlsl_ir_var - * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 - * means function entry. */ - unsigned int first_write, last_read; -+ /* Whether the variable is read in any entry function. */ -+ bool is_read; - /* Offset where the variable's value is stored within its buffer in numeric register components. - * This in case the variable is uniform. */ - unsigned int buffer_offset; -@@ -611,6 +613,12 @@ struct hlsl_ir_function_decl - * executed. Needed to deal with return statements in non-uniform control - * flow, since some backends can't handle them. */ - struct hlsl_ir_var *early_return_var; -+ -+ /* List of all the extern semantic variables; linked by the -+ * hlsl_ir_var.extern_entry fields. This exists as a convenience because -+ * it is often necessary to iterate all extern variables and these can be -+ * declared in as function parameters, or as the function return value. */ -+ struct list extern_vars; - }; - - struct hlsl_ir_call -@@ -1019,10 +1027,11 @@ struct hlsl_ctx - struct hlsl_scope *dummy_scope; - /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */ - struct list scopes; -- /* List of all the extern variables; linked by the hlsl_ir_var.extern_entry fields. -- * This exists as a convenience because it is often necessary to iterate all extern variables -- * and these can be declared in global scope, as function parameters, or as the function -- * return value. */ -+ -+ /* List of all the extern variables, excluding semantic variables; linked -+ * by the hlsl_ir_var.extern_entry fields. This exists as a convenience -+ * because it is often necessary to iterate all extern variables declared -+ * in the global scope or as function parameters. */ - struct list extern_vars; - - /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared -@@ -1080,9 +1089,6 @@ struct hlsl_ctx - } constant_defs; - /* 'c' registers where the constants expected by SM2 sincos are stored. */ - struct hlsl_reg d3dsincosconst1, d3dsincosconst2; -- /* Number of temp. registers required for the shader to run, i.e. the largest temp register -- * index that will be used in the output bytecode (+1). */ -- uint32_t temp_count; - - /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in - * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ -@@ -1421,6 +1427,9 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, - - void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); -+uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -+void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -+void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); - int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); -@@ -1615,21 +1624,15 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - - D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); - D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); --bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -- unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); --bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -- uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); - - void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); - int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); - --bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, -- struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output); --bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, enum vkd3d_shader_register_type *type, bool *has_idx); --int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); -+int tpf_compile(struct vsir_program *program, uint64_t config_flags, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - - struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 2230cd5b919..ee13e193d49 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1208,6 +1208,32 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - return true; - } - -+static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const char *name, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *record = node_from_block(block); -+ const struct hlsl_type *type = record->data_type; -+ const struct hlsl_struct_field *field, *base; -+ -+ if ((field = get_struct_field(type->e.record.fields, type->e.record.field_count, name))) -+ { -+ unsigned int field_idx = field - type->e.record.fields; -+ -+ return add_record_access(ctx, block, record, field_idx, loc); -+ } -+ else if ((base = get_struct_field(type->e.record.fields, type->e.record.field_count, "$super"))) -+ { -+ unsigned int base_idx = base - type->e.record.fields; -+ -+ if (!add_record_access(ctx, block, record, base_idx, loc)) -+ return false; -+ return add_record_access_recurse(ctx, block, name, loc); -+ } -+ -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); -+ return false; -+} -+ - static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) - { - struct parse_variable_def *v, *v_next; -@@ -4260,6 +4286,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); - } - -+static bool intrinsic_modf(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s modf(%s x, out %s ip)\n" -+ "{\n" -+ " ip = trunc(x);\n" -+ " return x - ip;\n" -+ "}"; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ type = params->args[0]->data_type; -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "modf", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return !!add_user_call(ctx, func, params, false, loc); -+} -+ - static bool intrinsic_mul(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -5147,6 +5202,7 @@ intrinsic_functions[] = - {"mad", 3, true, intrinsic_mad}, - {"max", 2, true, intrinsic_max}, - {"min", 2, true, intrinsic_min}, -+ {"modf", 2, true, intrinsic_modf}, - {"mul", 2, true, intrinsic_mul}, - {"normalize", 1, true, intrinsic_normalize}, - {"pow", 2, true, intrinsic_pow}, -@@ -6560,6 +6616,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - - %type switch_case - -+%type base_optional - %type field_type - %type named_struct_spec - %type unnamed_struct_spec -@@ -6774,11 +6831,28 @@ struct_spec: - | unnamed_struct_spec - - named_struct_spec: -- KW_STRUCT any_identifier '{' fields_list '}' -+ KW_STRUCT any_identifier base_optional '{' fields_list '}' - { - bool ret; - -- $$ = hlsl_new_struct_type(ctx, $2, $4.fields, $4.count); -+ if ($3) -+ { -+ char *name; -+ -+ if (!(name = hlsl_strdup(ctx, "$super"))) -+ YYABORT; -+ if (!hlsl_array_reserve(ctx, (void **)&$5.fields, &$5.capacity, 1 + $5.count, sizeof(*$5.fields))) -+ YYABORT; -+ memmove(&$5.fields[1], $5.fields, $5.count * sizeof(*$5.fields)); -+ ++$5.count; -+ -+ memset(&$5.fields[0], 0, sizeof($5.fields[0])); -+ $5.fields[0].type = $3; -+ $5.fields[0].loc = @3; -+ $5.fields[0].name = name; -+ } -+ -+ $$ = hlsl_new_struct_type(ctx, $2, $5.fields, $5.count); - - if (hlsl_get_var(ctx->cur_scope, $2)) - { -@@ -6805,6 +6879,23 @@ any_identifier: - | TYPE_IDENTIFIER - | NEW_IDENTIFIER - -+/* TODO: Multiple inheritance support for interfaces. */ -+base_optional: -+ %empty -+ { -+ $$ = NULL; -+ } -+ | ':' TYPE_IDENTIFIER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); -+ if ($$->class != HLSL_CLASS_STRUCT) -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Base type \"%s\" is not a struct.", $2); -+ YYABORT; -+ } -+ vkd3d_free($2); -+ } -+ - fields_list: - %empty - { -@@ -8795,19 +8886,7 @@ postfix_expr: - - if (node->data_type->class == HLSL_CLASS_STRUCT) - { -- struct hlsl_type *type = node->data_type; -- const struct hlsl_struct_field *field; -- unsigned int field_idx = 0; -- -- if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3))) -- { -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); -- vkd3d_free($3); -- YYABORT; -- } -- -- field_idx = field - type->e.record.fields; -- if (!add_record_access(ctx, $1, node, field_idx, &@2)) -+ if (!add_record_access_recurse(ctx, $1, $3, &@2)) - { - vkd3d_free($3); - YYABORT; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index c5dd5e71e02..4cf9d5eb84a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -276,8 +276,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls - == base_type_get_semantic_equivalent(type2->e.numeric.type); - } - --static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -- struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, -+static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, - uint32_t index, bool output, const struct vkd3d_shader_location *loc) - { - struct hlsl_semantic new_semantic; -@@ -287,7 +287,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) - return NULL; - -- LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (!ascii_strcasecmp(ext_var->name, new_name)) - { -@@ -339,12 +339,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - ext_var->is_input_semantic = 1; - ext_var->is_param = var->is_param; - list_add_before(&var->scope_entry, &ext_var->scope_entry); -- list_add_tail(&ctx->extern_vars, &ext_var->extern_entry); -+ list_add_tail(&func->extern_vars, &ext_var->extern_entry); - - return ext_var; - } - --static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, -+static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; -@@ -375,7 +375,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - struct hlsl_ir_var *input; - struct hlsl_ir_load *load; - -- if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, -+ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, - semantic_index + i, false, loc))) - return; - -@@ -408,8 +408,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - } - } - --static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct vkd3d_shader_location *loc = &lhs->node.loc; - struct hlsl_type *type = lhs->node.data_type; -@@ -466,30 +466,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - return; - list_add_after(&c->entry, &element_load->node.entry); - -- prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index); -+ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); - } - } - else - { -- prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); -+ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); - } - } - - /* Split inputs into two variables representing the semantic and temp registers, - * and copy the former to the latter, so that writes to input variables work. */ --static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) -+static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- list_add_head(&block->instrs, &load->node.entry); -+ list_add_head(&func->body.instrs, &load->node.entry); - -- prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - --static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, -+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct hlsl_type *type = rhs->node.data_type, *vector_type; -@@ -517,18 +517,19 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - struct hlsl_ir_var *output; - struct hlsl_ir_load *load; - -- if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) -+ if (!(output = add_semantic_var(ctx, func, var, vector_type, -+ modifiers, semantic, semantic_index + i, true, loc))) - return; - - if (type->class == HLSL_CLASS_MATRIX) - { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- hlsl_block_add_instr(block, c); -+ hlsl_block_add_instr(&func->body, c); - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) - return; -- hlsl_block_add_instr(block, &load->node); -+ hlsl_block_add_instr(&func->body, &load->node); - } - else - { -@@ -536,17 +537,17 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) - return; -- hlsl_block_add_instr(block, &load->node); -+ hlsl_block_add_instr(&func->body, &load->node); - } - - if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) - return; -- hlsl_block_add_instr(block, store); -+ hlsl_block_add_instr(&func->body, store); - } - } - --static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) - { - struct vkd3d_shader_location *loc = &rhs->node.loc; - struct hlsl_type *type = rhs->node.data_type; -@@ -580,34 +581,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * - - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; -- hlsl_block_add_instr(block, c); -+ hlsl_block_add_instr(&func->body, c); - - if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) - return; -- hlsl_block_add_instr(block, &element_load->node); -+ hlsl_block_add_instr(&func->body, &element_load->node); - -- append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); -+ append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); - } - } - else - { -- append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); -+ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); - } - } - - /* Split outputs into two variables representing the temp and semantic - * registers, and copy the former to the latter, so that reads from output - * variables work. */ --static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) -+static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) - { - struct hlsl_ir_load *load; - - /* This redundant load is expected to be deleted later by DCE. */ - if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) - return; -- hlsl_block_add_instr(block, &load->node); -+ hlsl_block_add_instr(&func->body, &load->node); - -- append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); - } - - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -@@ -4113,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context) - } - } - --static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, -+static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - struct hlsl_ir_node *instr) - { - if (!deref->rel_offset.node) -@@ -4126,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - return true; - } - -+void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+{ -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ var->indexable = false; -+ } -+ -+ transform_derefs(ctx, mark_indexable_var, &entry_func->body); -+} -+ - static char get_regset_name(enum hlsl_regset regset) - { - switch (regset) -@@ -4142,11 +4157,11 @@ static char get_regset_name(enum hlsl_regset regset) - vkd3d_unreachable(); - } - --static void allocate_register_reservations(struct hlsl_ctx *ctx) -+static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars) - { - struct hlsl_ir_var *var; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry) - { - const struct hlsl_reg_reservation *reservation = &var->reg_reservation; - unsigned int r; -@@ -4352,7 +4367,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - } - } - --static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+static void init_var_liveness(struct hlsl_ir_var *var) -+{ -+ if (var->is_uniform || var->is_input_semantic) -+ var->first_write = 1; -+ else if (var->is_output_semantic) -+ var->last_read = UINT_MAX; -+} -+ -+void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct hlsl_scope *scope; - struct hlsl_ir_var *var; -@@ -4366,16 +4389,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - } - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if (var->is_uniform || var->is_input_semantic) -- var->first_write = 1; -- else if (var->is_output_semantic) -- var->last_read = UINT_MAX; -- } -+ init_var_liveness(var); -+ -+ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) -+ init_var_liveness(var); - - compute_liveness_recurse(&entry_func->body, 0, 0); - } - -+static void mark_vars_usage(struct hlsl_ctx *ctx) -+{ -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->last_read) -+ var->is_read = true; -+ } -+ } -+} -+ - struct register_allocator - { - struct allocation -@@ -5094,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - * index to all (simultaneously live) variables or intermediate values. Agnostic - * as to how many registers are actually available for the current backend, and - * does not handle constants. */ --static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct register_allocator allocator = {0}; -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ -+ /* Reset variable temp register allocations. */ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) -+ memset(var->regs, 0, sizeof(var->regs)); -+ } -+ } - - /* ps_1_* outputs are special and go in temp register 0. */ - if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -@@ -5105,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio - - for (i = 0; i < entry_func->parameters.count; ++i) - { -- const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; -- -+ var = entry_func->parameters.vars[i]; - if (var->is_output_semantic) - { - record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); -@@ -5116,11 +5163,13 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio - } - - allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); -- ctx->temp_count = allocator.reg_count; - vkd3d_free(allocator.allocations); -+ -+ return allocator.reg_count; - } - --static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) -+static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -+ unsigned int *counter, bool output, bool is_patch_constant_func) - { - static const char *const shader_names[] = - { -@@ -5133,27 +5182,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - }; - - enum vkd3d_shader_register_type type; -+ struct vkd3d_shader_version version; - uint32_t reg; - bool builtin; - - VKD3D_ASSERT(var->semantic.name); - -- if (ctx->profile->major_version < 4) -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ -+ if (version.major < 4) - { -- struct vkd3d_shader_version version; - enum vkd3d_decl_usage usage; - uint32_t usage_idx; - - /* ps_1_* outputs are special and go in temp register 0. */ -- if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL) - return; - -- version.major = ctx->profile->major_version; -- version.minor = ctx->profile->minor_version; -- version.type = ctx->profile->type; -- builtin = hlsl_sm1_register_from_semantic(&version, -+ builtin = sm1_register_from_semantic_name(&version, - var->semantic.name, var->semantic.index, output, &type, ®); -- if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) -+ if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); -@@ -5168,19 +5218,21 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - enum vkd3d_shader_sysval_semantic semantic; - bool has_idx; - -- if (!sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output)) -+ if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, -+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); - return; - } -- if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, &has_idx))) -+ -+ if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) - reg = has_idx ? var->semantic.index : 0; - } - - if (builtin) - { -- TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type], -+ TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type], - output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); - } - else -@@ -5194,17 +5246,18 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - } - } - --static void allocate_semantic_registers(struct hlsl_ctx *ctx) -+static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -+ bool is_patch_constant_func = entry_func == ctx->patch_constant_func; - unsigned int input_counter = 0, output_counter = 0; - struct hlsl_ir_var *var; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_input_semantic) -- allocate_semantic_register(ctx, var, &input_counter, false); -+ allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); - if (var->is_output_semantic) -- allocate_semantic_register(ctx, var, &output_counter, true); -+ allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); - } - } - -@@ -5279,7 +5332,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - - TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); - buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); -- if (var->last_read) -+ if (var->is_read) - buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); - } - -@@ -5510,15 +5563,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - return NULL; - } - --static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) -+static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset) - { - char regset_name = get_regset_name(regset); - uint32_t min_index = 0, id = 0; - struct hlsl_ir_var *var; - -- if (regset == HLSL_REGSET_UAVS) -+ if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - { -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") - || !ascii_strcasecmp(var->semantic.name, "sv_target"))) -@@ -6228,22 +6281,104 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - } while (progress); - } - --static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, -- struct vsir_program *program, bool output, struct hlsl_ir_var *var) -+static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct shader_signature *signature, bool output, struct hlsl_ir_var *var) - { - enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ enum vkd3d_shader_component_type component_type; -+ unsigned int register_index, mask, use_mask; -+ const char *name = var->semantic.name; - enum vkd3d_shader_register_type type; -- struct shader_signature *signature; - struct signature_element *element; -- unsigned int register_index, mask; - -- if ((!output && !var->last_read) || (output && !var->first_write)) -- return; -+ if (hlsl_version_ge(ctx, 4, 0)) -+ { -+ struct vkd3d_string_buffer *string; -+ bool has_idx, ret; - -- if (output) -- signature = &program->output_signature; -+ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, -+ ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, -+ output, signature == &program->patch_constant_signature); -+ VKD3D_ASSERT(ret); -+ if (sysval == ~0u) -+ return; -+ -+ if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) -+ { -+ register_index = has_idx ? var->semantic.index : ~0u; -+ } -+ else -+ { -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ } -+ -+ /* NOTE: remember to change this to the actually allocated mask once -+ * we start optimizing interstage signatures. */ -+ mask = (1u << var->data_type->dimx) - 1; -+ use_mask = mask; /* FIXME: retrieve use mask accurately. */ -+ -+ switch (var->data_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ break; -+ -+ case HLSL_TYPE_INT: -+ component_type = VKD3D_SHADER_COMPONENT_INT; -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ component_type = VKD3D_SHADER_COMPONENT_UINT; -+ break; -+ -+ default: -+ if ((string = hlsl_type_to_string(ctx, var->data_type))) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Invalid data type %s for semantic variable %s.", string->buffer, var->name); -+ hlsl_release_string_buffer(ctx, string); -+ component_type = VKD3D_SHADER_COMPONENT_VOID; -+ break; -+ } -+ -+ if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) -+ name = "SV_Target"; -+ else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) -+ name ="SV_Depth"; -+ else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) -+ name = "SV_Position"; -+ } - else -- signature = &program->input_signature; -+ { -+ if ((!output && !var->last_read) || (output && !var->first_write)) -+ return; -+ -+ if (!sm1_register_from_semantic_name(&program->shader_version, -+ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) -+ { -+ enum vkd3d_decl_usage usage; -+ unsigned int usage_idx; -+ bool ret; -+ -+ register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ -+ ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx); -+ VKD3D_ASSERT(ret); -+ /* With the exception of vertex POSITION output, none of these are -+ * system values. Pixel POSITION input is not equivalent to -+ * SV_Position; the closer equivalent is VPOS, which is not declared -+ * as a semantic. */ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ } -+ -+ mask = (1 << var->data_type->dimx) - 1; -+ use_mask = mask; /* FIXME: retrieve use mask accurately. */ -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ } - - if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, - signature->element_count + 1, sizeof(*signature->elements))) -@@ -6252,30 +6387,9 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - return; - } - element = &signature->elements[signature->element_count++]; -- -- if (!hlsl_sm1_register_from_semantic(&program->shader_version, -- var->semantic.name, var->semantic.index, output, &type, ®ister_index)) -- { -- enum vkd3d_decl_usage usage; -- unsigned int usage_idx; -- bool ret; -- -- register_index = var->regs[HLSL_REGSET_NUMERIC].id; -- -- ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); -- VKD3D_ASSERT(ret); -- /* With the exception of vertex POSITION output, none of these are -- * system values. Pixel POSITION input is not equivalent to -- * SV_Position; the closer equivalent is VPOS, which is not declared -- * as a semantic. */ -- if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -- && output && usage == VKD3D_DECL_USAGE_POSITION) -- sysval = VKD3D_SHADER_SV_POSITION; -- } -- mask = (1 << var->data_type->dimx) - 1; -- - memset(element, 0, sizeof(*element)); -- if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) -+ -+ if (!(element->semantic_name = vkd3d_strdup(name))) - { - --signature->element_count; - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -@@ -6283,26 +6397,35 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - } - element->semantic_index = var->semantic.index; - element->sysval_semantic = sysval; -- element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->component_type = component_type; - element->register_index = register_index; - element->target_location = register_index; - element->register_count = 1; - element->mask = mask; -- element->used_mask = mask; -+ element->used_mask = use_mask; - if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) - element->interpolation_mode = VKD3DSIM_LINEAR; - } - --static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) -+static void generate_vsir_signature(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_function_decl *func) - { - struct hlsl_ir_var *var; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (var->is_input_semantic) -- sm1_generate_vsir_signature_entry(ctx, program, false, var); -- if (var->is_output_semantic) -- sm1_generate_vsir_signature_entry(ctx, program, true, var); -+ if (func == ctx->patch_constant_func) -+ { -+ generate_vsir_signature_entry(ctx, program, -+ &program->patch_constant_signature, var->is_output_semantic, var); -+ } -+ else -+ { -+ if (var->is_input_semantic) -+ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); -+ if (var->is_output_semantic) -+ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); -+ } - } - } - -@@ -6873,7 +6996,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, - type = VKD3DSPR_TEMP; - register_index = 0; - } -- else if (!hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, -+ else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, - deref->var->semantic.index, true, &type, ®ister_index)) - { - VKD3D_ASSERT(reg.allocated); -@@ -6928,7 +7051,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -- if (hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, -+ if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, - deref->var->semantic.index, false, &type, ®ister_index)) - { - writemask = (1 << deref->var->data_type->dimx) - 1; -@@ -7233,9 +7356,6 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo - } - } - --/* OBJECTIVE: Translate all the information from ctx and entry_func to the -- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -- * without relying on ctx and entry_func. */ - static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) - { -@@ -7262,7 +7382,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - ctab->code = buffer.data; - ctab->size = buffer.size; - -- sm1_generate_vsir_signature(ctx, program); -+ generate_vsir_signature(ctx, program, entry_func); - - hlsl_block_init(&block); - sm1_generate_vsir_constant_defs(ctx, program, &block); -@@ -7272,6 +7392,29 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - sm1_generate_vsir_block(ctx, &entry_func->body, program); - } - -+/* OBJECTIVE: Translate all the information from ctx and entry_func to the -+ * vsir_program, so it can be used as input to tpf_compile() without relying -+ * on ctx and entry_func. */ -+static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ uint64_t config_flags, struct vsir_program *program) -+{ -+ struct vkd3d_shader_version version = {0}; -+ -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ generate_vsir_signature(ctx, program, entry_func); -+ if (version.type == VKD3D_SHADER_TYPE_HULL) -+ generate_vsir_signature(ctx, program, ctx->patch_constant_func); -+} -+ - static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, - struct hlsl_block **found_block) - { -@@ -7572,16 +7715,23 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru - return true; - } - --int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -- enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) -+static void process_entry_function(struct hlsl_ctx *ctx, -+ const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) - { - const struct hlsl_profile_info *profile = ctx->profile; -+ struct hlsl_block static_initializers, global_uniforms; - struct hlsl_block *const body = &entry_func->body; - struct recursive_call_ctx recursive_call_ctx; - struct hlsl_ir_var *var; - unsigned int i; - -- list_move_head(&body->instrs, &ctx->static_initializers.instrs); -+ if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers)) -+ return; -+ list_move_head(&body->instrs, &static_initializers.instrs); -+ -+ if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block)) -+ return; -+ list_move_head(&body->instrs, &global_uniforms.instrs); - - memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); - hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); -@@ -7590,7 +7740,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - /* Avoid going into an infinite loop when processing call instructions. - * lower_return() recurses into inferior calls. */ - if (ctx->result) -- return ctx->result; -+ return; - - if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) - lower_ir(ctx, lower_f16tof32, body); -@@ -7602,20 +7752,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_matrix_swizzles, body); - lower_ir(ctx, lower_index_loads, body); - -- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -- { -- if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -- prepend_uniform_copy(ctx, body, var); -- } -- - for (i = 0; i < entry_func->parameters.count; ++i) - { - var = entry_func->parameters.vars[i]; - -- if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if (hlsl_type_is_resource(var->data_type)) - { - prepend_uniform_copy(ctx, body, var); - } -+ else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ { -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Patch constant function parameter \"%s\" cannot be uniform.", var->name); -+ else -+ prepend_uniform_copy(ctx, body, var); -+ } - else - { - if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT -@@ -7627,9 +7779,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - } - - if (var->storage_modifiers & HLSL_STORAGE_IN) -- prepend_input_var_copy(ctx, body, var); -+ prepend_input_var_copy(ctx, entry_func, var); - if (var->storage_modifiers & HLSL_STORAGE_OUT) -- append_output_var_copy(ctx, body, var); -+ append_output_var_copy(ctx, entry_func, var); - } - } - if (entry_func->return_var) -@@ -7638,19 +7790,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, - "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - -- append_output_var_copy(ctx, body, entry_func->return_var); -+ append_output_var_copy(ctx, entry_func, entry_func->return_var); - } - -- parse_entry_function_attributes(ctx, entry_func); -- if (ctx->result) -- return ctx->result; -- -- if (profile->type == VKD3D_SHADER_TYPE_HULL) -- validate_hull_shader_attributes(ctx, entry_func); -- else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) -- hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -- "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); -- - if (profile->major_version >= 4) - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); -@@ -7726,29 +7868,69 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - while (hlsl_transform_ir(ctx, dce, body, NULL)); - - compute_liveness(ctx, entry_func); -+ mark_vars_usage(ctx); - -- if (TRACE_ON()) -- rb_for_each_entry(&ctx->functions, dump_function, ctx); -+ calculate_resource_register_counts(ctx); - -- transform_derefs(ctx, mark_indexable_vars, body); -+ allocate_register_reservations(ctx, &ctx->extern_vars); -+ allocate_register_reservations(ctx, &entry_func->extern_vars); -+ allocate_semantic_registers(ctx, entry_func); -+} - -- calculate_resource_register_counts(ctx); -+int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) -+{ -+ const struct hlsl_profile_info *profile = ctx->profile; -+ struct hlsl_block global_uniform_block; -+ struct hlsl_ir_var *var; -+ -+ parse_entry_function_attributes(ctx, entry_func); -+ if (ctx->result) -+ return ctx->result; -+ -+ if (profile->type == VKD3D_SHADER_TYPE_HULL) -+ validate_hull_shader_attributes(ctx, entry_func); -+ else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); -+ -+ hlsl_block_init(&global_uniform_block); -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ prepend_uniform_copy(ctx, &global_uniform_block, var); -+ } - -- allocate_register_reservations(ctx); -+ process_entry_function(ctx, &global_uniform_block, entry_func); -+ if (ctx->result) -+ return ctx->result; -+ -+ if (profile->type == VKD3D_SHADER_TYPE_HULL) -+ { -+ process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func); -+ if (ctx->result) -+ return ctx->result; -+ } -+ -+ hlsl_block_cleanup(&global_uniform_block); - -- allocate_temp_registers(ctx, entry_func); - if (profile->major_version < 4) - { -+ mark_indexable_vars(ctx, entry_func); -+ allocate_temp_registers(ctx, entry_func); - allocate_const_registers(ctx, entry_func); - } - else - { - allocate_buffers(ctx); -- allocate_objects(ctx, HLSL_REGSET_TEXTURES); -- allocate_objects(ctx, HLSL_REGSET_UAVS); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); - } -- allocate_semantic_registers(ctx); -- allocate_objects(ctx, HLSL_REGSET_SAMPLERS); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); -+ -+ if (TRACE_ON()) -+ rb_for_each_entry(&ctx->functions, dump_function, ctx); - - if (ctx->result) - return ctx->result; -@@ -7777,7 +7959,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - } - - case VKD3D_SHADER_TARGET_DXBC_TPF: -- return hlsl_sm4_write(ctx, entry_func, out); -+ { -+ uint32_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vsir_program program; -+ int result; -+ -+ sm4_generate_vsir(ctx, entry_func, config_flags, &program); -+ if (ctx->result) -+ { -+ vsir_program_cleanup(&program); -+ return ctx->result; -+ } -+ -+ result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); -+ vsir_program_cleanup(&program); -+ return result; -+ } - - default: - ERR("Unsupported shader target type %#x.\n", target_type); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 0bcc3d0a1f7..c475bf92279 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -6786,6 +6786,12 @@ sysval_validation_data[] = - VKD3D_SHADER_COMPONENT_FLOAT, 4}, - [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, - VKD3D_SHADER_COMPONENT_FLOAT, 4}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, -+ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, - }; - - static void vsir_validate_signature_element(struct validation_context *ctx, -@@ -6942,11 +6948,23 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - idx, signature_type_name, element->interpolation_mode); - } - -+static const unsigned int allowed_signature_phases[] = -+{ -+ [SIGNATURE_TYPE_INPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, -+ [SIGNATURE_TYPE_OUTPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, -+ [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, -+}; -+ - static void vsir_validate_signature(struct validation_context *ctx, - const struct shader_signature *signature, enum vsir_signature_type signature_type) - { - unsigned int i; - -+ if (signature->element_count != 0 && !(allowed_signature_phases[signature_type] -+ & (1u << ctx->program->shader_version.type))) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Unexpected %s signature.", signature_type_names[signature_type]); -+ - for (i = 0; i < signature->element_count; ++i) - vsir_validate_signature_element(ctx, signature, signature_type, i); - } -@@ -7501,6 +7519,38 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - "Patch constant signature is only valid for hull and domain shaders."); - } - -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ if (program->input_control_point_count == 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid zero input control point count."); -+ break; -+ -+ default: -+ if (program->input_control_point_count != 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid input control point count %u.", -+ program->input_control_point_count); -+ } -+ -+ switch (program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ if (program->output_control_point_count == 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid zero output control point count."); -+ break; -+ -+ default: -+ if (program->output_control_point_count != 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid output control point count %u.", -+ program->output_control_point_count); -+ } -+ - vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); - vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); - vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index f79e97e92d4..54344f2afc0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -1403,12 +1403,16 @@ struct sm4_stat - uint32_t fields[VKD3D_STAT_COUNT]; - }; - --struct tpf_writer -+struct tpf_compiler - { -+ /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ - struct hlsl_ctx *ctx; -- struct vkd3d_bytecode_buffer *buffer; -+ struct vsir_program *program; - struct vkd3d_sm4_lookup_tables lookup; - struct sm4_stat *stat; -+ -+ struct vkd3d_bytecode_buffer *buffer; -+ struct dxbc_writer dxbc; - }; - - static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) -@@ -1916,15 +1920,6 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - } - } - --static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct sm4_stat *stat, -- struct vkd3d_bytecode_buffer *buffer) --{ -- tpf->ctx = ctx; -- tpf->buffer = buffer; -- tpf->stat = stat; -- init_sm4_lookup_tables(&tpf->lookup); --} -- - static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( - const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) - { -@@ -2986,7 +2981,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - return VKD3D_OK; - } - --static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); -+static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); - - static bool type_is_integer(const struct hlsl_type *type) - { -@@ -3002,8 +2997,8 @@ static bool type_is_integer(const struct hlsl_type *type) - } - } - --bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, enum vkd3d_shader_register_type *type, bool *has_idx) -+bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, -+ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) - { - unsigned int i; - -@@ -3023,6 +3018,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, - -+ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, -+ - /* Put sv_target in this table, instead of letting it fall through to - * default varying allocation, so that the register index matches the - * usage index. */ -@@ -3035,9 +3033,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { -- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -+ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) - && output == register_table[i].output -- && ctx->profile->type == register_table[i].shader_type) -+ && version->type == register_table[i].shader_type) - { - if (type) - *type = register_table[i].type; -@@ -3049,8 +3047,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - return false; - } - --bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, -- struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output) -+static bool get_tessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, -+ enum vkd3d_tessellator_domain domain, uint32_t index) -+{ -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ if (index == 0) -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; -+ else if (index == 1) -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; -+ else -+ return false; -+ return true; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; -+ return index < 3; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; -+ return index < 4; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, -+ enum vkd3d_tessellator_domain domain, uint32_t index) -+{ -+ switch (domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ return false; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; -+ return index == 0; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; -+ return index < 2; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, -+ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, -+ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func) - { - unsigned int i; - -@@ -3075,6 +3122,12 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, - {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, - {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, - -+ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_HULL, ~0u}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, -+ -+ {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, -+ - {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, -@@ -3098,16 +3151,42 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, - {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, - {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, - }; -- bool needs_compat_mapping = ascii_strncasecmp(hlsl_semantic->name, "sv_", 3); -+ bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3); -+ -+ if (is_patch_constant_func) -+ { -+ if (output) -+ { -+ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) -+ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) -+ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ if (!ascii_strcasecmp(semantic_name, "sv_position")) -+ { -+ *sysval_semantic = VKD3D_SHADER_SV_NONE; -+ return true; -+ } -+ } -+ else -+ { -+ if (!ascii_strcasecmp(semantic_name, "sv_primitiveid") -+ || !ascii_strcasecmp(semantic_name, "sv_position")) -+ { -+ *sysval_semantic = ~0u; -+ return true; -+ } -+ return false; -+ } -+ } - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { -- if (!ascii_strcasecmp(hlsl_semantic->name, semantics[i].name) -+ if (!ascii_strcasecmp(semantic_name, semantics[i].name) - && output == semantics[i].output -- && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) -- && ctx->profile->type == semantics[i].shader_type) -+ && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) -+ && version->type == semantics[i].shader_type) - { -- *semantic = semantics[i].semantic; -+ *sysval_semantic = semantics[i].semantic; - return true; - } - } -@@ -3115,7 +3194,7 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, - if (!needs_compat_mapping) - return false; - -- *semantic = VKD3D_SHADER_SV_NONE; -+ *sysval_semantic = VKD3D_SHADER_SV_NONE; - return true; - } - -@@ -3133,110 +3212,46 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - ctx->result = buffer->status; - } - --static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) -+static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) - { -+ bool output = tag == TAG_OSGN || tag == TAG_PCSG; - struct vkd3d_bytecode_buffer buffer = {0}; -- struct vkd3d_string_buffer *string; -- const struct hlsl_ir_var *var; -- size_t count_position; - unsigned int i; -- bool ret; - -- count_position = put_u32(&buffer, 0); -+ put_u32(&buffer, signature->element_count); - put_u32(&buffer, 8); /* unknown */ - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ for (i = 0; i < signature->element_count; ++i) - { -- unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; -- enum vkd3d_shader_sysval_semantic semantic; -- uint32_t usage_idx, reg_idx; -- bool has_idx; -- -- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -- continue; -- -- ret = sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); -- VKD3D_ASSERT(ret); -- if (semantic == ~0u) -- continue; -- usage_idx = var->semantic.index; -+ const struct signature_element *element = &signature->elements[i]; -+ enum vkd3d_shader_sysval_semantic sysval; -+ uint32_t used_mask = element->used_mask; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx)) -- { -- reg_idx = has_idx ? var->semantic.index : ~0u; -- } -- else -- { -- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -- reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; -- } -- -- use_mask = width; /* FIXME: accurately report use mask */ - if (output) -- use_mask = 0xf ^ use_mask; -+ used_mask = 0xf ^ used_mask; - -- /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ -- if (semantic >= VKD3D_SHADER_SV_TARGET) -- semantic = VKD3D_SHADER_SV_NONE; -+ sysval = element->sysval_semantic; -+ if (sysval >= VKD3D_SHADER_SV_TARGET) -+ sysval = VKD3D_SHADER_SV_NONE; - - put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, usage_idx); -- put_u32(&buffer, semantic); -- switch (var->data_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_HALF: -- put_u32(&buffer, VKD3D_SHADER_COMPONENT_FLOAT); -- break; -- -- case HLSL_TYPE_INT: -- put_u32(&buffer, VKD3D_SHADER_COMPONENT_INT); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- put_u32(&buffer, VKD3D_SHADER_COMPONENT_UINT); -- break; -- -- default: -- if ((string = hlsl_type_to_string(ctx, var->data_type))) -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Invalid data type %s for semantic variable %s.", string->buffer, var->name); -- hlsl_release_string_buffer(ctx, string); -- put_u32(&buffer, VKD3D_SHADER_COMPONENT_VOID); -- } -- put_u32(&buffer, reg_idx); -- put_u32(&buffer, vkd3d_make_u16(width, use_mask)); -+ put_u32(&buffer, element->semantic_index); -+ put_u32(&buffer, sysval); -+ put_u32(&buffer, element->component_type); -+ put_u32(&buffer, element->register_index); -+ put_u32(&buffer, vkd3d_make_u16(element->mask, used_mask)); - } - -- i = 0; -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ for (i = 0; i < signature->element_count; ++i) - { -- enum vkd3d_shader_sysval_semantic semantic; -- const char *name = var->semantic.name; -+ const struct signature_element *element = &signature->elements[i]; - size_t string_offset; - -- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) -- continue; -- -- sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); -- if (semantic == ~0u) -- continue; -- -- if (semantic == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) -- string_offset = put_string(&buffer, "SV_Target"); -- else if (semantic == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) -- string_offset = put_string(&buffer, "SV_Depth"); -- else if (semantic == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) -- string_offset = put_string(&buffer, "SV_Position"); -- else -- string_offset = put_string(&buffer, name); -- set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); -+ string_offset = put_string(&buffer, element->semantic_name); -+ set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); - } - -- set_u32(&buffer, count_position, i); -- -- add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); -+ add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); - } - - static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -@@ -3863,7 +3878,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - { - uint32_t flags = 0; - -- if (var->last_read) -+ if (var->is_read) - flags |= D3D_SVF_USED; - - put_u32(&buffer, 0); /* name */ -@@ -4091,11 +4106,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s - *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); - } - --static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, -+static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) - { -- const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; -+ const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); - const struct hlsl_ir_var *var = deref->var; -+ struct hlsl_ctx *ctx = tpf->ctx; - - if (var->is_uniform) - { -@@ -4105,7 +4122,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -@@ -4124,7 +4141,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -@@ -4143,7 +4160,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -@@ -4165,7 +4182,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; -- if (hlsl_version_ge(ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - reg->idx[0].offset = var->buffer->reg.id; - reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -@@ -4185,7 +4202,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - bool has_idx; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, &has_idx)) -+ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -@@ -4195,7 +4212,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - reg->idx_count = 1; - } - -- reg->dimension = VSIR_DIMENSION_VEC4; -+ if (shader_sm4_is_scalar_register(reg)) -+ reg->dimension = VSIR_DIMENSION_SCALAR; -+ else -+ reg->dimension = VSIR_DIMENSION_VEC4; - *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); - } - else -@@ -4214,7 +4234,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - bool has_idx; - -- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, &has_idx)) -+ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -@@ -4224,7 +4244,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - reg->idx_count = 1; - } - -- if (reg->type == VKD3DSPR_DEPTHOUT) -+ if (shader_sm4_is_scalar_register(reg)) - reg->dimension = VSIR_DIMENSION_SCALAR; - else - reg->dimension = VSIR_DIMENSION_VEC4; -@@ -4250,13 +4270,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - } - } - --static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, -+static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) - { - unsigned int hlsl_swizzle; - uint32_t writemask; - -- sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); -+ sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); - if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) - { - hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -@@ -4294,7 +4314,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, - } - } - --static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, -+static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_ir_node *instr, uint32_t map_writemask) - { - unsigned int hlsl_swizzle; -@@ -4330,7 +4350,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ - return 0; - } - --static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, -+static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, - enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) - { - const struct vkd3d_sm4_register_type_info *register_type_info; -@@ -4390,7 +4410,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v - return token; - } - --static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, -+static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, - unsigned int j) - { - unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); -@@ -4420,7 +4440,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct - } - } - --static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) -+static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) - { - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = 0; -@@ -4433,7 +4453,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk - sm4_write_register_index(tpf, &dst->reg, j); - } - --static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) -+static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) - { - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t token = 0, mod_token = 0; -@@ -4494,9 +4514,9 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk - } - } - --static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct sm4_instruction *instr) -+static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) - { -- enum vkd3d_shader_type shader_type = tpf->ctx->profile->type; -+ enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; - enum vkd3d_sm4_stat_field stat_field; - uint32_t opcode; - -@@ -4539,7 +4559,7 @@ static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct - } - } - --static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) -+static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) - { - uint32_t token = instr->opcode | instr->extra_bits; - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; -@@ -4606,7 +4626,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - return true; - } - --static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) -+static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) - { - size_t size = (cbuffer->used_size + 3) / 4; - -@@ -4641,7 +4661,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) -+static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) - { - unsigned int i; - struct sm4_instruction instr = -@@ -4682,9 +4702,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - } - } - --static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, -+static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, - bool uav) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - struct hlsl_type *component_type; - struct sm4_instruction instr; -@@ -4714,14 +4735,14 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; - -- if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) -+ if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) - { - hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Multisampled texture object declaration needs sample count for profile %s.", -- tpf->ctx->profile->name); -+ "Multisampled texture object declaration needs sample count for profile %u.%u.", -+ version->major, version->minor); - } - -- if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(version, 5, 1)) - { - VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; -@@ -4767,9 +4788,10 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - } - } - --static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) -+static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, -+ const struct hlsl_ir_var *var, bool is_patch_constant_func) - { -- const struct hlsl_profile_info *profile = tpf->ctx->profile; -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const bool output = var->is_output_semantic; - enum vkd3d_shader_sysval_semantic semantic; - bool has_idx; -@@ -4780,7 +4802,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - .dst_count = 1, - }; - -- if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx)) -+ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) - { - if (has_idx) - { -@@ -4801,10 +4823,11 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - -- if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) -+ if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) - instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; - -- sysval_semantic_from_hlsl(&semantic, tpf->ctx, &var->semantic, output); -+ sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, -+ tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); - if (semantic == ~0u) - semantic = VKD3D_SHADER_SV_NONE; - -@@ -4813,25 +4836,26 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - switch (semantic) - { - case VKD3D_SHADER_SV_NONE: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; - break; - - case VKD3D_SHADER_SV_INSTANCE_ID: -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: - case VKD3D_SHADER_SV_PRIMITIVE_ID: -- case VKD3D_SHADER_SV_VERTEX_ID: - case VKD3D_SHADER_SV_SAMPLE_INDEX: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; - break; - - default: -- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; - break; - } - -- if (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) - { - enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; - -@@ -4870,32 +4894,32 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - } - else - { -- if (semantic == VKD3D_SHADER_SV_NONE || profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; - else - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; - } - -- switch (semantic) -+ if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) - { -- case VKD3D_SHADER_SV_COVERAGE: -- case VKD3D_SHADER_SV_DEPTH: -- case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -- case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: -- case VKD3D_SHADER_SV_TARGET: -- case VKD3D_SHADER_SV_NONE: -- break; -- -- default: -- instr.idx_count = 1; -- instr.idx[0] = semantic; -- break; -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET -+ || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); -+ } -+ else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) -+ { -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); -+ } -+ else -+ { -+ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); -+ instr.idx_count = 1; -+ instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); - } - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) -+static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) - { - struct sm4_instruction instr = - { -@@ -4908,7 +4932,7 @@ static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_coun - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, -+static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, - uint32_t size, uint32_t comp_count) - { - struct sm4_instruction instr = -@@ -4922,7 +4946,7 @@ static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) -+static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) - { - struct sm4_instruction instr = - { -@@ -4937,7 +4961,7 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t flags) -+static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) - { - struct sm4_instruction instr = - { -@@ -4948,7 +4972,94 @@ static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t fl - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ret(const struct tpf_writer *tpf) -+static void tpf_write_hs_decls(const struct tpf_compiler *tpf) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_HS_DECLS, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, -+ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, -+ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, -+ .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, -+ enum vkd3d_shader_tessellator_partitioning partitioning) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, -+ .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, -+ enum vkd3d_shader_tessellator_output_primitive output_primitive) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, -+ .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void write_sm4_ret(const struct tpf_compiler *tpf) - { - struct sm4_instruction instr = - { -@@ -4958,7 +5069,7 @@ static void write_sm4_ret(const struct tpf_writer *tpf) - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) - { - struct sm4_instruction instr; -@@ -4976,7 +5087,7 @@ static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opco - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) - { - struct sm4_instruction instr; -@@ -4997,7 +5108,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -5016,7 +5127,7 @@ static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opc - } - - /* dp# instructions don't map the swizzle. */ --static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -5034,7 +5145,7 @@ static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4 - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, -+static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { -@@ -5057,7 +5168,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, - const struct hlsl_ir_node *src3) - { -@@ -5077,7 +5188,7 @@ static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_op - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, - enum hlsl_sampler_dim dim) -@@ -5086,6 +5197,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node - bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; - struct sm4_instruction instr; - -@@ -5140,7 +5252,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node - reg->dimension = VSIR_DIMENSION_SCALAR; - reg->u.immconst_u32[0] = index->value.u[0].u; - } -- else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) -+ else if (version->major == 4 && version->minor == 0) - { - hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); - } -@@ -5155,7 +5267,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *coords = load->coords.node; -@@ -5235,7 +5347,7 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; -@@ -5257,7 +5369,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; -@@ -5292,7 +5404,7 @@ static bool type_is_float(const struct hlsl_type *type) - return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; - } - --static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, -+static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) - { - struct sm4_instruction instr; -@@ -5312,7 +5424,7 @@ static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) -+static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) - { - static const union - { -@@ -5421,7 +5533,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - } - --static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, -+static void write_sm4_store_uav_typed(const struct tpf_compiler *tpf, const struct hlsl_deref *dst, - const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) - { - struct sm4_instruction instr; -@@ -5429,7 +5541,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - -- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); -+ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -@@ -5439,7 +5551,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) -+static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) - { - struct sm4_instruction instr; - -@@ -5458,8 +5570,9 @@ static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, cons - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) -+static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; - const struct hlsl_ir_node *arg3 = expr->operands[2].node; -@@ -5474,7 +5587,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - switch (expr->op) - { - case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -- if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) - write_sm4_rasterizer_sample_count(tpf, &expr->node); - else - hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -@@ -5595,7 +5708,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - case HLSL_TYPE_FLOAT: - /* SM5 comes with a RCP opcode */ -- if (tpf->ctx->profile->major_version >= 5) -+ if (vkd3d_shader_ver_ge(version, 5, 0)) - { - write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); - } -@@ -5973,7 +6086,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - hlsl_release_string_buffer(tpf->ctx, dst_type_string); - } - --static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) -+static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) - { - struct sm4_instruction instr = - { -@@ -6002,7 +6115,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) -+static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) - { - struct sm4_instruction instr = {0}; - -@@ -6041,16 +6154,17 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju - /* Does this variable's data come directly from the API user, rather than being - * temporary or from a previous shader stage? - * I.e. is it a uniform or VS input? */ --static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) -+static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) - { - if (var->is_uniform) - return true; - -- return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; -+ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; - } - --static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) -+static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *type = load->node.data_type; - struct sm4_instruction instr; - -@@ -6060,7 +6174,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo - instr.dst_count = 1; - - VKD3D_ASSERT(hlsl_is_numeric_type(type)); -- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) -+ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) - { - struct hlsl_constant_value value; - -@@ -6088,7 +6202,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) -+static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) - { - struct sm4_instruction instr = - { -@@ -6103,10 +6217,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, -+static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, - const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - struct vkd3d_shader_src_param *src; - struct sm4_instruction instr; - -@@ -6123,7 +6238,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ - { - if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) - { -- if (tpf->ctx->profile->major_version < 5) -+ if (!vkd3d_shader_ver_ge(version, 5, 0)) - { - hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, - "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); -@@ -6144,7 +6259,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_ir_node *texel_offset = load->texel_offset.node; - const struct hlsl_ir_node *sample_index = load->sample_index.node; -@@ -6213,7 +6328,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h - } - } - --static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) -+static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) - { - struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); - -@@ -6232,7 +6347,7 @@ static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct - write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); - } - --static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) -+static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) - { - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; -@@ -6241,7 +6356,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - -- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); -+ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); - instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - -@@ -6251,7 +6366,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s) -+static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) - { - const struct hlsl_ir_node *selector = s->selector.node; - struct hlsl_ir_switch_case *c; -@@ -6291,7 +6406,7 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) -+static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) - { - unsigned int hlsl_swizzle; - struct sm4_instruction instr; -@@ -6312,7 +6427,7 @@ static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) -+static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; - -@@ -6386,18 +6501,65 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - } - } - --static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, -- struct sm4_stat *stat, struct dxbc_writer *dxbc) -+static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) - { -- const struct hlsl_profile_info *profile = ctx->profile; -+ struct hlsl_ctx *ctx = tpf->ctx; -+ const struct hlsl_scope *scope; -+ const struct hlsl_ir_var *var; -+ uint32_t temp_count; -+ -+ compute_liveness(ctx, func); -+ mark_indexable_vars(ctx, func); -+ temp_count = allocate_temp_registers(ctx, func); -+ if (ctx->result) -+ return; -+ -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if ((var->is_input_semantic && var->last_read) -+ || (var->is_output_semantic && var->first_write)) -+ tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); -+ } -+ -+ if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) -+ write_sm4_dcl_thread_group(tpf, ctx->thread_count); -+ -+ if (temp_count) -+ write_sm4_dcl_temps(tpf, temp_count); -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -+ continue; -+ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -+ continue; -+ -+ if (var->indexable) -+ { -+ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -+ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -+ -+ write_sm4_dcl_indexable_temp(tpf, id, size, 4); -+ } -+ } -+ } -+ -+ write_sm4_block(tpf, &func->body); -+ -+ write_sm4_ret(tpf); -+} -+ -+static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) -+{ -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - struct vkd3d_bytecode_buffer buffer = {0}; - struct extern_resource *extern_resources; - unsigned int extern_resources_count, i; - const struct hlsl_buffer *cbuffer; -- const struct hlsl_scope *scope; -- const struct hlsl_ir_var *var; -+ struct hlsl_ctx *ctx = tpf->ctx; - size_t token_count_position; -- struct tpf_writer tpf; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { -@@ -6412,17 +6574,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d - VKD3D_SM4_LIB, - }; - -- tpf_writer_init(&tpf, ctx, stat, &buffer); -+ tpf->buffer = &buffer; - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - -- put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); -+ put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); - token_count_position = put_u32(&buffer, 0); - -+ if (version->type == VKD3D_SHADER_TYPE_HULL) -+ { -+ tpf_write_hs_decls(tpf); -+ -+ tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ -+ tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); -+ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); -+ tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); -+ tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); -+ } -+ - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- write_sm4_dcl_constant_buffer(&tpf, cbuffer); -+ write_sm4_dcl_constant_buffer(tpf, cbuffer); - } - - for (i = 0; i < extern_resources_count; ++i) -@@ -6430,62 +6603,40 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d - const struct extern_resource *resource = &extern_resources[i]; - - if (resource->regset == HLSL_REGSET_SAMPLERS) -- write_sm4_dcl_samplers(&tpf, resource); -+ write_sm4_dcl_samplers(tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -- write_sm4_dcl_textures(&tpf, resource, false); -+ write_sm4_dcl_textures(tpf, resource, false); - else if (resource->regset == HLSL_REGSET_UAVS) -- write_sm4_dcl_textures(&tpf, resource, true); -+ write_sm4_dcl_textures(tpf, resource, true); - } - -- if (entry_func->early_depth_test && profile->major_version >= 5) -- write_sm4_dcl_global_flags(&tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); -- -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) -- write_sm4_dcl_semantic(&tpf, var); -- } -+ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) -+ write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); - -- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) -- write_sm4_dcl_thread_group(&tpf, ctx->thread_count); -+ if (version->type == VKD3D_SHADER_TYPE_HULL) -+ tpf_write_hs_control_point_phase(tpf); - -- if (ctx->temp_count) -- write_sm4_dcl_temps(&tpf, ctx->temp_count); -+ tpf_write_shader_function(tpf, entry_func); - -- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ if (version->type == VKD3D_SHADER_TYPE_HULL) - { -- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -- { -- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -- continue; -- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -- continue; -- -- if (var->indexable) -- { -- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -- -- write_sm4_dcl_indexable_temp(&tpf, id, size, 4); -- } -- } -+ tpf_write_hs_fork_phase(tpf); -+ tpf_write_shader_function(tpf, ctx->patch_constant_func); - } - -- write_sm4_block(&tpf, &entry_func->body); -- -- write_sm4_ret(&tpf); -- - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - -- add_section(ctx, dxbc, TAG_SHDR, &buffer); -+ add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); -+ tpf->buffer = NULL; - - sm4_free_extern_resources(extern_resources, extern_resources_count); - } - --static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -+static void tpf_write_sfi0(struct tpf_compiler *tpf) - { - struct extern_resource *extern_resources; - unsigned int extern_resources_count; -+ struct hlsl_ctx *ctx = tpf->ctx; - uint64_t *flags; - - flags = vkd3d_calloc(1, sizeof(*flags)); -@@ -6502,14 +6653,16 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ - - if (*flags) -- dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); -+ dxbc_writer_add_section(&tpf->dxbc, TAG_SFI0, flags, sizeof(*flags)); - else - vkd3d_free(flags); - } - --static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, struct dxbc_writer *dxbc) -+static void tpf_write_stat(struct tpf_compiler *tpf) - { - struct vkd3d_bytecode_buffer buffer = {0}; -+ const struct sm4_stat *stat = tpf->stat; -+ struct hlsl_ctx *ctx = tpf->ctx; - - put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); - put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); -@@ -6553,28 +6706,40 @@ static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, st - put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); - } - -- add_section(ctx, dxbc, TAG_STAT, &buffer); -+ add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); - } - --int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -+/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving -+ * data from the other parameters instead, so they can be removed from the -+ * arguments and this function can be independent of HLSL structs. */ -+int tpf_compile(struct vsir_program *program, uint64_t config_flags, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -+ struct tpf_compiler tpf = {0}; - struct sm4_stat stat = {0}; -- struct dxbc_writer dxbc; - size_t i; - int ret; - -- dxbc_writer_init(&dxbc); -- -- write_sm4_signature(ctx, &dxbc, false); -- write_sm4_signature(ctx, &dxbc, true); -- write_sm4_rdef(ctx, &dxbc); -- write_sm4_shdr(ctx, entry_func, &stat, &dxbc); -- write_sm4_sfi0(ctx, &dxbc); -- write_sm4_stat(ctx, &stat, &dxbc); -+ tpf.ctx = ctx; -+ tpf.program = program; -+ tpf.buffer = NULL; -+ tpf.stat = &stat; -+ init_sm4_lookup_tables(&tpf.lookup); -+ dxbc_writer_init(&tpf.dxbc); -+ -+ tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); -+ tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) -+ tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); -+ write_sm4_rdef(ctx, &tpf.dxbc); -+ tpf_write_shdr(&tpf, entry_func); -+ tpf_write_sfi0(&tpf); -+ tpf_write_stat(&tpf); - - if (!(ret = ctx->result)) -- ret = dxbc_writer_write(&dxbc, out); -- for (i = 0; i < dxbc.section_count; ++i) -- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); -+ ret = dxbc_writer_write(&tpf.dxbc, out); -+ for (i = 0; i < tpf.dxbc.section_count; ++i) -+ vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); - return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 54b87373ed1..d63bfd96121 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1602,6 +1602,16 @@ void vkd3d_shader_trace_text_(const char *text, size_t size, const char *functio - #define vkd3d_shader_trace_text(text, size) \ - vkd3d_shader_trace_text_(text, size, __FUNCTION__) - -+bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); -+bool sm1_usage_from_semantic_name(const char *semantic_name, -+ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); -+bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, -+ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); -+bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, -+ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, -+ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); -+ - int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, - struct vkd3d_shader_message_context *message_context, struct vsir_program *program); - int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -@@ -1914,7 +1924,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, - #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) - #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) - --#define DXBC_MAX_SECTION_COUNT 6 -+#define DXBC_MAX_SECTION_COUNT 7 - - struct dxbc_writer - { --- -2.45.2 - diff --git a/patches/vkd3d-latest/0007-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch b/patches/vkd3d-latest/0007-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch deleted file mode 100644 index 660793c8..00000000 --- a/patches/vkd3d-latest/0007-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch +++ /dev/null @@ -1,1584 +0,0 @@ -From 1060207c8e59871433ade5578fd0a215ebebc6e3 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 18 Oct 2024 07:31:22 +1100 -Subject: [PATCH] Updated vkd3d to 03ad04c89004c7f800c5b1a0ea7ba28622916328. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 21 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 3 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 220 ++++++---- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 3 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 68 ++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 4 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 185 ++++++++- - libs/vkd3d/libs/vkd3d-shader/msl.c | 3 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 3 + - libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 6 +- - libs/vkd3d/libs/vkd3d/command.c | 41 +- - libs/vkd3d/libs/vkd3d/state.c | 376 +++++++++--------- - 13 files changed, 607 insertions(+), 329 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 8f7e5cb666b..ae8e864c179 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -561,6 +561,21 @@ static struct signature_element *find_signature_element_by_register_index( - return NULL; - } - -+/* Add missing bits to a mask to make it contiguous. */ -+static unsigned int make_mask_contiguous(unsigned int mask) -+{ -+ static const unsigned int table[] = -+ { -+ 0x0, 0x1, 0x2, 0x3, -+ 0x4, 0x7, 0x6, 0x7, -+ 0x8, 0xf, 0xe, 0xf, -+ 0xc, 0xf, 0xe, 0xf, -+ }; -+ -+ VKD3D_ASSERT(mask < ARRAY_SIZE(table)); -+ return table[mask]; -+} -+ - static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, - const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, - unsigned int register_index, bool is_dcl, unsigned int mask) -@@ -576,7 +591,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - - if ((element = find_signature_element(signature, name, index))) - { -- element->mask |= mask; -+ element->mask = make_mask_contiguous(element->mask | mask); - if (!is_dcl) - element->used_mask |= mask; - return true; -@@ -596,7 +611,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp - element->register_index = register_index; - element->target_location = register_index; - element->register_count = 1; -- element->mask = mask; -+ element->mask = make_mask_contiguous(mask); - element->used_mask = is_dcl ? 0 : mask; - if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) - element->interpolation_mode = VKD3DSIM_LINEAR; -@@ -1305,7 +1320,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, -- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED)) -+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, false)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 59494d2500d..f9f44f34bcf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -10311,7 +10311,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - - /* Estimate instruction count to avoid reallocation in most shaders. */ - count = max(token_count, 400) - 400; -- if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS)) -+ if (!vsir_program_init(program, compile_info, &version, -+ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, true)) - return VKD3D_ERROR_OUT_OF_MEMORY; - vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); - sm6->ptr = &sm6->start[1]; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index e98dfcf4f32..f3f7a2c765c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -25,6 +25,17 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin - return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); - } - -+struct fx_4_binary_type -+{ -+ uint32_t name; -+ uint32_t class; -+ uint32_t element_count; -+ uint32_t unpacked_size; -+ uint32_t stride; -+ uint32_t packed_size; -+ uint32_t typeinfo; -+}; -+ - struct string_entry - { - struct rb_entry entry; -@@ -2842,6 +2853,7 @@ struct fx_parser - const uint8_t *ptr, *start, *end; - struct vkd3d_shader_message_context *message_context; - struct vkd3d_string_buffer buffer; -+ unsigned int indent; - struct - { - const uint8_t *ptr; -@@ -2949,6 +2961,131 @@ static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset) - return (const char *)(parser->unstructured.ptr + offset); - } - -+static void parse_fx_start_indent(struct fx_parser *parser) -+{ -+ ++parser->indent; -+} -+ -+static void parse_fx_end_indent(struct fx_parser *parser) -+{ -+ --parser->indent; -+} -+ -+static void parse_fx_print_indent(struct fx_parser *parser) -+{ -+ vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); -+} -+ -+static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset, -+ const struct fx_4_binary_type *type) -+{ -+ unsigned int base_type, comp_count; -+ size_t i; -+ -+ base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; -+ -+ comp_count = type->packed_size / sizeof(uint32_t); -+ for (i = 0; i < comp_count; ++i) -+ { -+ union hlsl_constant_value_component value; -+ -+ fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); -+ -+ if (base_type == FX_4_NUMERIC_TYPE_FLOAT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); -+ else if (base_type == FX_4_NUMERIC_TYPE_INT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); -+ else if (base_type == FX_4_NUMERIC_TYPE_UINT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); -+ else if (base_type == FX_4_NUMERIC_TYPE_BOOL) -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); -+ else -+ vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); -+ -+ if (i < comp_count - 1) -+ vkd3d_string_buffer_printf(&parser->buffer, ", "); -+ -+ offset += sizeof(uint32_t); -+ } -+} -+ -+static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) -+{ -+ const char *str = fx_4_get_string(parser, offset); -+ vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); -+} -+ -+static void fx_parse_fx_4_annotations(struct fx_parser *parser) -+{ -+ struct fx_4_annotation -+ { -+ uint32_t name; -+ uint32_t type; -+ } var; -+ struct fx_4_binary_type type; -+ const char *name, *type_name; -+ uint32_t count, i, value; -+ -+ count = fx_parser_read_u32(parser); -+ -+ if (!count) -+ return; -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "<\n"); -+ parse_fx_start_indent(parser); -+ -+ for (i = 0; i < count; ++i) -+ { -+ fx_parser_read_u32s(parser, &var, sizeof(var)); -+ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); -+ -+ name = fx_4_get_string(parser, var.name); -+ type_name = fx_4_get_string(parser, type.name); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ vkd3d_string_buffer_printf(&parser->buffer, " = "); -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "{ "); -+ -+ if (type.class == FX_4_TYPE_CLASS_NUMERIC) -+ { -+ value = fx_parser_read_u32(parser); -+ parse_fx_4_numeric_value(parser, value, &type); -+ } -+ else if (type.class == FX_4_TYPE_CLASS_OBJECT && type.typeinfo == FX_4_OBJECT_TYPE_STRING) -+ { -+ uint32_t element_count = max(type.element_count, 1); -+ -+ for (uint32_t j = 0; j < element_count; ++j) -+ { -+ value = fx_parser_read_u32(parser); -+ fx_4_parse_string_initializer(parser, value); -+ if (j < element_count - 1) -+ vkd3d_string_buffer_printf(&parser->buffer, ", "); -+ } -+ } -+ else -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "Only numeric and string types are supported in annotations.\n"); -+ } -+ -+ if (type.element_count) -+ vkd3d_string_buffer_printf(&parser->buffer, " }"); -+ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); -+ } -+ parse_fx_end_indent(parser); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, ">"); -+} -+ -+ - static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) - { - struct fx_4_numeric_variable -@@ -2960,17 +3097,8 @@ static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t c - uint32_t value; - uint32_t flags; - } var; -- struct fx_4_type -- { -- uint32_t name; -- uint32_t class; -- uint32_t element_count; -- uint32_t unpacked_size; -- uint32_t stride; -- uint32_t packed_size; -- uint32_t typeinfo; -- } type; - const char *name, *semantic, *type_name; -+ struct fx_4_binary_type type; - uint32_t i; - - for (i = 0; i < count; ++i) -@@ -2984,54 +3112,21 @@ static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t c - vkd3d_string_buffer_printf(&parser->buffer, " %s %s", type_name, name); - if (type.element_count) - vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ - if (var.semantic) - { - semantic = fx_4_get_string(parser, var.semantic); - vkd3d_string_buffer_printf(&parser->buffer, " : %s", semantic); - } -+ fx_parse_fx_4_annotations(parser); -+ - if (var.value) - { -- unsigned int base_type, comp_count; -- size_t j; -- -- if (type.class == FX_4_TYPE_CLASS_NUMERIC) -- base_type = (type.typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; -- else -- base_type = 0; -- - vkd3d_string_buffer_printf(&parser->buffer, " = { "); -- -- comp_count = type.unpacked_size / sizeof(uint32_t); -- for (j = 0; j < comp_count; ++j) -- { -- union hlsl_constant_value_component value; -- -- fx_parser_read_unstructured(parser, &value, var.value + j * sizeof(uint32_t), sizeof(uint32_t)); -- -- if (base_type == FX_4_NUMERIC_TYPE_FLOAT) -- vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); -- else if (base_type == FX_4_NUMERIC_TYPE_INT) -- vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); -- else if (base_type == FX_4_NUMERIC_TYPE_UINT) -- vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); -- else if (base_type == FX_4_NUMERIC_TYPE_BOOL) -- vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); -- else -- vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); -- -- if (j < comp_count - 1) -- vkd3d_string_buffer_printf(&parser->buffer, ", "); -- } -- -+ parse_fx_4_numeric_value(parser, var.value, &type); - vkd3d_string_buffer_printf(&parser->buffer, " }"); - } - vkd3d_string_buffer_printf(&parser->buffer, "; // Offset: %u, size %u.\n", var.offset, type.unpacked_size); -- -- if (fx_parser_read_u32(parser)) -- { -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing annotations is not implemented.\n"); -- return; -- } - } - } - -@@ -3057,27 +3152,17 @@ static void fx_parse_buffers(struct fx_parser *parser) - - name = fx_4_get_string(parser, buffer.name); - -- vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s\n", name); -- vkd3d_string_buffer_printf(&parser->buffer, "{\n"); -- -- if (fx_parser_read_u32(parser)) -- { -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing annotations is not implemented.\n"); -- return; -- } -+ vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s", name); -+ fx_parse_fx_4_annotations(parser); - -+ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); -+ parse_fx_start_indent(parser); - fx_parse_fx_4_numeric_variables(parser, buffer.count); -- -+ parse_fx_end_indent(parser); - vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); - } - } - --static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) --{ -- const char *str = fx_4_get_string(parser, offset); -- vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); --} -- - static void fx_4_parse_objects(struct fx_parser *parser) - { - struct fx_4_object_variable -@@ -3087,17 +3172,8 @@ static void fx_4_parse_objects(struct fx_parser *parser) - uint32_t semantic; - uint32_t bind_point; - } var; -- struct fx_4_type -- { -- uint32_t name; -- uint32_t class; -- uint32_t element_count; -- uint32_t unpacked_size; -- uint32_t stride; -- uint32_t packed_size; -- uint32_t typeinfo; -- } type; - uint32_t i, j, value, element_count; -+ struct fx_4_binary_type type; - const char *name, *type_name; - - if (parser->failed) -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 1e17de54ede..46515818d07 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -2213,6 +2213,9 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, - if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; - -+ VKD3D_ASSERT(program->normalised_io); -+ VKD3D_ASSERT(program->normalised_hull_cp_io); -+ - vkd3d_glsl_generator_init(&generator, program, compile_info, - descriptor_info, combined_sampler_info, message_context); - ret = vkd3d_glsl_generator_generate(&generator, out); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index ee13e193d49..213cec79c3d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -40,6 +40,7 @@ struct parse_initializer - unsigned int args_count; - struct hlsl_block *instrs; - bool braces; -+ struct vkd3d_shader_location loc; - }; - - struct parse_parameter -@@ -2506,6 +2507,32 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - } - } - -+static void initialize_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *dst, -+ const struct parse_initializer *initializer, bool is_default_values_initializer) -+{ -+ unsigned int store_index = 0; -+ -+ /* If any of the elements has an error type, then initializer_size() is not -+ * meaningful. */ -+ for (unsigned int i = 0; i < initializer->args_count; ++i) -+ { -+ if (initializer->args[i]->data_type->class == HLSL_CLASS_ERROR) -+ return; -+ } -+ -+ if (initializer_size(initializer) != hlsl_type_component_count(dst->data_type)) -+ { -+ hlsl_error(ctx, &initializer->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u components in initializer, but got %u.", -+ hlsl_type_component_count(dst->data_type), initializer_size(initializer)); -+ return; -+ } -+ -+ for (unsigned int i = 0; i < initializer->args_count; ++i) -+ initialize_var_components(ctx, initializer->instrs, dst, &store_index, -+ initializer->args[i], is_default_values_initializer); -+} -+ - static bool type_has_object_components(const struct hlsl_type *type) - { - if (type->class == HLSL_CLASS_ARRAY) -@@ -2832,8 +2859,6 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - if (v->initializer.args_count) - { - bool is_default_values_initializer; -- unsigned int store_index = 0; -- unsigned int size, k; - - is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) - || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -@@ -2871,20 +2896,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - v->initializer.args[0] = node_from_block(v->initializer.instrs); - } - -- size = initializer_size(&v->initializer); -- if (component_count != size) -- { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u components in initializer, but got %u.", component_count, size); -- free_parse_variable_def(v); -- continue; -- } -- -- for (k = 0; k < v->initializer.args_count; ++k) -- { -- initialize_var_components(ctx, v->initializer.instrs, var, -- &store_index, v->initializer.args[k], is_default_values_initializer); -- } -+ initialize_var(ctx, var, &v->initializer, is_default_values_initializer); - - if (is_default_values_initializer) - { -@@ -5402,13 +5414,11 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type - { - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; -- unsigned int i, idx = 0; - - if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) - return NULL; - -- for (i = 0; i < params->args_count; ++i) -- initialize_var_components(ctx, params->instrs, var, &idx, params->args[i], false); -+ initialize_var(ctx, var, params, false); - - if (!(load = hlsl_new_var_load(ctx, var, loc))) - return NULL; -@@ -5425,6 +5435,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_type *cond_type = cond->data_type; - struct hlsl_type *common_type; - -+ if (cond->data_type->class == HLSL_CLASS_ERROR -+ || first->data_type->class == HLSL_CLASS_ERROR -+ || second->data_type->class == HLSL_CLASS_ERROR) -+ { -+ block->value = ctx->error_instr; -+ return true; -+ } -+ - if (cond_type->class > HLSL_CLASS_LAST_NUMERIC) - { - struct vkd3d_string_buffer *string; -@@ -8321,6 +8339,7 @@ complex_initializer: - $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; -+ $$.loc = @$; - } - | '{' complex_initializer_list '}' - { -@@ -8352,6 +8371,7 @@ complex_initializer_list: - $$.args[$$.args_count++] = $3.args[i]; - hlsl_block_add_block($$.instrs, $3.instrs); - free_parse_initializer(&$3); -+ $$.loc = @$; - } - - initializer_expr: -@@ -8369,6 +8389,7 @@ initializer_expr_list: - $$.args[0] = node_from_block($1); - $$.instrs = $1; - $$.braces = false; -+ $$.loc = @$; - } - | initializer_expr_list ',' initializer_expr - { -@@ -8687,6 +8708,7 @@ func_arguments: - if (!($$.instrs = make_empty_block(ctx))) - YYABORT; - $$.braces = false; -+ $$.loc = @$; - } - | initializer_expr_list - -@@ -8950,14 +8972,6 @@ postfix_expr: - free_parse_initializer(&$4); - YYABORT; - } -- if ($2->dimx * $2->dimy != initializer_size(&$4)) -- { -- hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u components in constructor, but got %u.", -- $2->dimx * $2->dimy, initializer_size(&$4)); -- free_parse_initializer(&$4); -- YYABORT; -- } - - if (!($$ = add_constructor(ctx, $2, &$4, &@2))) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 4cf9d5eb84a..88bec8610cb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -7366,7 +7366,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -@@ -7404,7 +7404,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; - -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index c475bf92279..b47f12d2188 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -74,7 +74,8 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil - } - - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type) -+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -+ bool normalised_io) - { - memset(program, 0, sizeof(*program)); - -@@ -97,6 +98,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c - - program->shader_version = *version; - program->cf_type = cf_type; -+ program->normalised_io = normalised_io; -+ program->normalised_hull_cp_io = normalised_io; - return shader_instruction_array_init(&program->instructions, reserve); - } - -@@ -1132,6 +1135,14 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - enum vkd3d_result ret; - unsigned int i, j; - -+ VKD3D_ASSERT(!program->normalised_hull_cp_io); -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) -+ { -+ program->normalised_hull_cp_io = true; -+ return VKD3D_OK; -+ } -+ - if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program))) - { - ERR("Failed to allocate src param.\n"); -@@ -1175,6 +1186,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - program->instructions = normaliser.instructions; -+ program->normalised_hull_cp_io = true; - return VKD3D_OK; - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -@@ -1183,6 +1195,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, - input_control_point_count, i, &location); - program->instructions = normaliser.instructions; -+ program->normalised_hull_cp_io = true; - return ret; - default: - break; -@@ -1190,6 +1203,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - } - - program->instructions = normaliser.instructions; -+ program->normalised_hull_cp_io = true; - return VKD3D_OK; - } - -@@ -1903,6 +1917,8 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - struct vkd3d_shader_instruction *ins; - unsigned int i; - -+ VKD3D_ASSERT(!program->normalised_io); -+ - normaliser.phase = VKD3DSIH_INVALID; - normaliser.shader_type = program->shader_version.type; - normaliser.major = program->shader_version.major; -@@ -1959,6 +1975,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - - program->instructions = normaliser.instructions; - program->use_vocp = normaliser.use_vocp; -+ program->normalised_io = true; - return VKD3D_OK; - } - -@@ -6189,6 +6206,143 @@ static void vsir_validate_register_without_indices(struct validation_context *ct - reg->idx_count, reg->type); - } - -+static void vsir_validate_io_register(struct validation_context *ctx, -+ const struct vkd3d_shader_register *reg) -+{ -+ const struct shader_signature *signature; -+ bool has_control_point = false; -+ -+ switch (reg->type) -+ { -+ case VKD3DSPR_INPUT: -+ signature = &ctx->program->input_signature; -+ -+ switch (ctx->program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_GEOMETRY: -+ case VKD3D_SHADER_TYPE_HULL: -+ case VKD3D_SHADER_TYPE_DOMAIN: -+ has_control_point = true; -+ break; -+ -+ default: -+ break; -+ } -+ break; -+ -+ case VKD3DSPR_OUTPUT: -+ switch (ctx->program->shader_version.type) -+ { -+ case VKD3D_SHADER_TYPE_HULL: -+ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ { -+ signature = &ctx->program->output_signature; -+ has_control_point = ctx->program->normalised_hull_cp_io; -+ } -+ else if (ctx->program->normalised_io) -+ { -+ signature = &ctx->program->output_signature; -+ has_control_point = true; -+ } -+ else -+ { -+ signature = &ctx->program->patch_constant_signature; -+ } -+ break; -+ -+ default: -+ signature = &ctx->program->output_signature; -+ break; -+ } -+ break; -+ -+ case VKD3DSPR_INCONTROLPOINT: -+ signature = &ctx->program->input_signature; -+ has_control_point = true; -+ break; -+ -+ case VKD3DSPR_OUTCONTROLPOINT: -+ signature = &ctx->program->output_signature; -+ has_control_point = true; -+ break; -+ -+ case VKD3DSPR_PATCHCONST: -+ signature = &ctx->program->patch_constant_signature; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!ctx->program->normalised_io) -+ { -+ /* Indices are [register] or [control point, register]. Both are -+ * allowed to have a relative address. */ -+ unsigned int expected_idx_count = 1 + !!has_control_point; -+ -+ if (reg->idx_count != expected_idx_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+ return; -+ } -+ } -+ else -+ { -+ struct signature_element *element; -+ unsigned int expected_idx_count; -+ unsigned int signature_idx; -+ bool is_array = false; -+ -+ /* If the signature element is not an array, indices are -+ * [signature] or [control point, signature]. If the signature -+ * element is an array, indices are [array, signature] or -+ * [control point, array, signature]. In any case `signature' is -+ * not allowed to have a relative address, while the others are. -+ */ -+ if (reg->idx_count < 1) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+ return; -+ } -+ -+ if (reg->idx[reg->idx_count - 1].rel_addr) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Non-NULL relative address for the signature index of a register of type %#x.", -+ reg->type); -+ return; -+ } -+ -+ signature_idx = reg->idx[reg->idx_count - 1].offset; -+ -+ if (signature_idx >= signature->element_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Signature index %u exceeds the signature size %u in a register of type %#x.", -+ signature_idx, signature->element_count, reg->type); -+ return; -+ } -+ -+ element = &signature->elements[signature_idx]; -+ if (element->register_count > 1 || vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) -+ is_array = true; -+ -+ expected_idx_count = 1 + !!has_control_point + !!is_array; -+ -+ if (reg->idx_count != expected_idx_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u for a register of type %#x.", -+ reg->idx_count, reg->type); -+ return; -+ } -+ } -+} -+ - static void vsir_validate_temp_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) - { -@@ -6502,10 +6656,18 @@ static void vsir_validate_register(struct validation_context *ctx, - vsir_validate_temp_register(ctx, reg); - break; - -+ case VKD3DSPR_INPUT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ - case VKD3DSPR_RASTOUT: - vsir_validate_rastout_register(ctx, reg); - break; - -+ case VKD3DSPR_OUTPUT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ - case VKD3DSPR_DEPTHOUT: - vsir_validate_register_without_indices(ctx, reg); - break; -@@ -6542,6 +6704,18 @@ static void vsir_validate_register(struct validation_context *ctx, - vsir_validate_uav_register(ctx, reg); - break; - -+ case VKD3DSPR_INCONTROLPOINT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_OUTCONTROLPOINT: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ -+ case VKD3DSPR_PATCHCONST: -+ vsir_validate_io_register(ctx, reg); -+ break; -+ - case VKD3DSPR_DEPTHOUTGE: - vsir_validate_register_without_indices(ctx, reg); - break; -@@ -6810,6 +6984,11 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); - -+ if (!vkd3d_bitmask_is_contiguous(element->mask)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Non-contiguous mask %#x.", -+ idx, signature_type_name, element->mask); -+ - /* Here we'd likely want to validate that the usage mask is a subset of the - * signature mask. Unfortunately the D3DBC parser sometimes violates this. - * For example I've seen a shader like this: -@@ -7649,11 +7828,9 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t - vsir_transform(&ctx, vsir_program_remap_output_signature); - - if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) -- { - vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); -- vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); -- } - -+ vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); - vsir_transform(&ctx, vsir_program_normalise_io_registers); - vsir_transform(&ctx, vsir_program_normalise_flat_constants); - vsir_transform(&ctx, vsir_program_remove_dead_code); -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 36750de1fd8..5baefbc1f44 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -869,6 +869,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, - if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; - -+ VKD3D_ASSERT(program->normalised_io); -+ VKD3D_ASSERT(program->normalised_hull_cp_io); -+ - if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) - return ret; - msl_generator_generate(&generator); -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 1efd717e970..6a28e2cd68e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -10650,6 +10650,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - compile_info, compiler->message_context)) < 0) - return result; - -+ VKD3D_ASSERT(program->normalised_io); -+ VKD3D_ASSERT(program->normalised_hull_cp_io); -+ - max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); - if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) - return VKD3D_ERROR_OUT_OF_MEMORY; -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 54344f2afc0..848e78a34d3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2792,7 +2792,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20, VSIR_CF_STRUCTURED)) -+ if (!vsir_program_init(program, compile_info, -+ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, false)) - return false; - vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); - sm4->ptr = sm4->start; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index d63bfd96121..9df538a0da0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -255,6 +255,7 @@ enum vkd3d_shader_error - - VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED = 11000, - VKD3D_SHADER_ERROR_FX_INVALID_VERSION = 11001, -+ VKD3D_SHADER_ERROR_FX_INVALID_DATA = 11002, - }; - - enum vkd3d_shader_opcode -@@ -1414,6 +1415,8 @@ struct vsir_program - bool use_vocp; - bool has_point_size; - enum vsir_control_flow_type cf_type; -+ bool normalised_io; -+ bool normalised_hull_cp_io; - - const char **block_names; - size_t block_name_count; -@@ -1426,7 +1429,8 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( - const struct vsir_program *program, enum vkd3d_shader_parameter_name name); - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -- const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type); -+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -+ bool normalised_io); - enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index eab0436bebd..5495809fcb9 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2795,39 +2795,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - /* We use separate bindings for buffer and texture SRVs/UAVs. - * See d3d12_root_signature_init(). For unbounded ranges the - * descriptors exist in two consecutive sets, otherwise they occur -- * in pairs in one set. */ -- if (range->descriptor_count == UINT_MAX) -- { -- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) -- { -- vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; -- vk_descriptor_write->dstBinding = 0; -- } -- } -- else -- { -- if (!use_array) -- vk_descriptor_write->dstBinding = vk_binding + 2 * index; -- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER -- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) -- ++vk_descriptor_write->dstBinding; -- } -- -+ * as consecutive ranges within a set. */ - if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER - || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - { - vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; -+ break; -+ } -+ -+ if (range->descriptor_count == UINT_MAX) -+ { -+ vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; -+ vk_descriptor_write->dstBinding = 0; - } - else - { -- vk_image_info->sampler = VK_NULL_HANDLE; -- vk_image_info->imageView = u.view->v.u.vk_image_view; -- vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV -- ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; -- -- vk_descriptor_write->pImageInfo = vk_image_info; -+ vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; - } -+ -+ vk_image_info->sampler = VK_NULL_HANDLE; -+ vk_image_info->imageView = u.view->v.u.vk_image_view; -+ vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV -+ ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; -+ -+ vk_descriptor_write->pImageInfo = vk_image_info; - break; - - case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index fb377177403..8e5ec70a577 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -219,6 +219,30 @@ static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY vi - } - } - -+static VkShaderStageFlags stage_flags_from_vkd3d_shader_visibility(enum vkd3d_shader_visibility visibility) -+{ -+ switch (visibility) -+ { -+ case VKD3D_SHADER_VISIBILITY_ALL: -+ return VK_SHADER_STAGE_ALL; -+ case VKD3D_SHADER_VISIBILITY_VERTEX: -+ return VK_SHADER_STAGE_VERTEX_BIT; -+ case VKD3D_SHADER_VISIBILITY_HULL: -+ return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; -+ case VKD3D_SHADER_VISIBILITY_DOMAIN: -+ return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; -+ case VKD3D_SHADER_VISIBILITY_GEOMETRY: -+ return VK_SHADER_STAGE_GEOMETRY_BIT; -+ case VKD3D_SHADER_VISIBILITY_PIXEL: -+ return VK_SHADER_STAGE_FRAGMENT_BIT; -+ case VKD3D_SHADER_VISIBILITY_COMPUTE: -+ return VK_SHADER_STAGE_COMPUTE_BIT; -+ default: -+ FIXME("Unhandled visibility %#x.\n", visibility); -+ return VKD3D_SHADER_VISIBILITY_ALL; -+ } -+} -+ - static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility) - { - switch (visibility) -@@ -260,23 +284,6 @@ static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d - } - } - --static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(D3D12_ROOT_PARAMETER_TYPE type) --{ -- switch (type) -- { -- /* SRV and UAV root parameters are buffer views. */ -- case D3D12_ROOT_PARAMETER_TYPE_SRV: -- return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -- case D3D12_ROOT_PARAMETER_TYPE_UAV: -- return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -- case D3D12_ROOT_PARAMETER_TYPE_CBV: -- return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -- default: -- FIXME("Unhandled descriptor root parameter type %#x.\n", type); -- return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -- } --} -- - static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( - D3D12_DESCRIPTOR_RANGE_TYPE type) - { -@@ -313,20 +320,6 @@ static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_p - } - } - --static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutBinding *binding_desc, -- enum vkd3d_shader_descriptor_type descriptor_type, D3D12_SHADER_VISIBILITY shader_visibility, -- bool is_buffer, uint32_t vk_binding, unsigned int descriptor_count) --{ -- binding_desc->binding = vk_binding; -- binding_desc->descriptorType -- = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, is_buffer); -- binding_desc->descriptorCount = descriptor_count; -- binding_desc->stageFlags = stage_flags_from_visibility(shader_visibility); -- binding_desc->pImmutableSamplers = NULL; -- -- return true; --} -- - struct d3d12_root_signature_info - { - size_t binding_count; -@@ -719,18 +712,66 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat - return S_OK; - } - -+struct vk_binding_array -+{ -+ VkDescriptorSetLayoutBinding *bindings; -+ size_t capacity, count; -+ -+ unsigned int table_index; -+ unsigned int unbounded_offset; -+ VkDescriptorSetLayoutCreateFlags flags; -+}; -+ -+static void vk_binding_array_cleanup(struct vk_binding_array *array) -+{ -+ vkd3d_free(array->bindings); -+ array->bindings = NULL; -+} -+ -+static bool vk_binding_array_add_binding(struct vk_binding_array *array, -+ VkDescriptorType descriptor_type, unsigned int descriptor_count, -+ VkShaderStageFlags stage_flags, const VkSampler *immutable_sampler, unsigned int *binding_idx) -+{ -+ unsigned int binding_count = array->count; -+ VkDescriptorSetLayoutBinding *binding; -+ -+ if (!vkd3d_array_reserve((void **)&array->bindings, &array->capacity, -+ array->count + 1, sizeof(*array->bindings))) -+ { -+ ERR("Failed to reallocate the Vulkan binding array.\n"); -+ return false; -+ } -+ -+ *binding_idx = binding_count; -+ binding = &array->bindings[binding_count]; -+ binding->binding = binding_count; -+ binding->descriptorType = descriptor_type; -+ binding->descriptorCount = descriptor_count; -+ binding->stageFlags = stage_flags; -+ binding->pImmutableSamplers = immutable_sampler; -+ ++array->count; -+ -+ return true; -+} -+ - struct vkd3d_descriptor_set_context - { -- VkDescriptorSetLayoutBinding *current_binding; -- VkDescriptorSetLayoutBinding *first_binding; -+ struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; - unsigned int table_index; - unsigned int unbounded_offset; - unsigned int descriptor_index; - unsigned int uav_counter_index; - unsigned int push_constant_index; -- uint32_t descriptor_binding; - }; - -+static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) -+{ -+ size_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(context->vk_bindings); ++i) -+ vk_binding_array_cleanup(&context->vk_bindings[i]); -+} -+ - static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, unsigned int set_count) - { - uint32_t max_count = min(VKD3D_MAX_DESCRIPTOR_SETS, device->vk_info.device_limits.maxBoundDescriptorSets); -@@ -745,56 +786,56 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns - return true; - } - --static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, -- VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded, -- const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout); -- --static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_root_signature *root_signature, -- struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayoutCreateFlags flags) -+static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( -+ struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) - { -- struct d3d12_descriptor_set_layout *layout; -- unsigned int index; -- HRESULT hr; -- -- if (!context->descriptor_binding) -- return S_OK; -+ if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) -+ return NULL; - -- index = root_signature->vk_set_count; -- layout = &root_signature->descriptor_set_layouts[index]; -+ return &context->vk_bindings[root_signature->vk_set_count]; -+} - -- if (!vkd3d_validate_descriptor_set_count(root_signature->device, index + 1)) -- return E_INVALIDARG; -+static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, -+ VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) -+{ -+ struct vk_binding_array *array; - -- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, flags, context->descriptor_binding, -- context->unbounded_offset != UINT_MAX, context->first_binding, &layout->vk_layout))) -- return hr; -- layout->table_index = context->table_index; -- layout->unbounded_offset = context->unbounded_offset; -- ++root_signature->vk_set_count; -+ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) -+ return; - -- context->current_binding = context->first_binding; -- context->descriptor_binding = 0; -+ array->table_index = context->table_index; -+ array->unbounded_offset = context->unbounded_offset; -+ array->flags = flags; - -- return S_OK; -+ ++root_signature->vk_set_count; - } - - static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, -- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, -- bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, -- unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) -+ enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, -+ unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, -+ unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, -+ const VkSampler *immutable_sampler, unsigned int *binding_idx) - { - struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets - ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; -- struct vkd3d_shader_resource_binding *mapping -- = &root_signature->descriptor_mapping[context->descriptor_index++]; -+ struct vkd3d_shader_resource_binding *mapping; -+ struct vk_binding_array *array; -+ unsigned int idx; -+ -+ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) -+ || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], -+ vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, -+ stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) -+ return E_OUTOFMEMORY; - -+ mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; - mapping->type = descriptor_type; - mapping->register_space = register_space; - mapping->register_index = register_idx; - mapping->shader_visibility = shader_visibility; - mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; - mapping->binding.set = root_signature->vk_set_count; -- mapping->binding.binding = context->descriptor_binding++; -+ mapping->binding.binding = idx; - mapping->binding.count = descriptor_count; - if (offset) - { -@@ -803,37 +844,11 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur - } - - if (context->unbounded_offset != UINT_MAX) -- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - -- return S_OK; --} -+ if (binding_idx) -+ *binding_idx = idx; - --static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, -- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, -- unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, -- enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, -- uint32_t *first_binding) --{ -- unsigned int i; -- HRESULT hr; -- -- is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; -- duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV -- || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -- && duplicate_descriptors; -- -- *first_binding = context->descriptor_binding; -- for (i = 0; i < binding_count; ++i) -- { -- if (duplicate_descriptors -- && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -- register_space, base_register_idx + i, true, shader_visibility, 1, context))) -- return hr; -- -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -- base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) -- return hr; -- } - return S_OK; - } - -@@ -895,38 +910,41 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro - return min(count, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); - } - --static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature, -+static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, - const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, -+ unsigned int vk_binding_array_count, unsigned int bindings_per_range, - struct vkd3d_descriptor_set_context *context) - { - enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); -- bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; -+ bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; - enum vkd3d_shader_descriptor_type descriptor_type = range->type; -+ unsigned int i, register_space = range->register_space; - HRESULT hr; - - if (range->descriptor_count == UINT_MAX) - context->unbounded_offset = range->offset; - -- if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ for (i = 0; i < bindings_per_range; ++i) - { -- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, -- descriptor_type, visibility, true, context->descriptor_binding, range->vk_binding_count)) -- return E_NOTIMPL; -- ++context->current_binding; -- -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ register_space, range->base_register_idx + i, is_buffer, shader_visibility, -+ vk_binding_array_count, context, NULL, NULL))) - return hr; - } - -- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, -- descriptor_type, visibility, is_buffer, context->descriptor_binding, range->vk_binding_count)) -- return E_NOTIMPL; -- ++context->current_binding; -+ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ { -+ context->unbounded_offset = UINT_MAX; -+ return S_OK; -+ } - -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) -- return hr; -+ for (i = 0; i < bindings_per_range; ++i) -+ { -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ register_space, range->base_register_idx + i, false, shader_visibility, -+ vk_binding_array_count, context, NULL, NULL))) -+ return hr; -+ } - - context->unbounded_offset = UINT_MAX; - -@@ -1116,11 +1134,10 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, - struct vkd3d_descriptor_set_context *context) - { -+ unsigned int i, j, range_count, bindings_per_range, vk_binding_array_count; - const struct d3d12_device *device = root_signature->device; - bool use_vk_heaps = root_signature->device->use_vk_heaps; - struct d3d12_root_descriptor_table *table; -- unsigned int i, j, k, range_count; -- uint32_t vk_binding; - HRESULT hr; - - root_signature->descriptor_table_mask = 0; -@@ -1177,7 +1194,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - for (j = 0; j < range_count; ++j) - { - struct d3d12_root_descriptor_table_range *range; -- VkDescriptorSetLayoutBinding *cur_binding; - - range = &table->ranges[j]; - -@@ -1223,53 +1239,23 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - base_range = range; - } - -- range->binding = context->descriptor_binding; - range->vk_binding_count = vk_binding_count_from_descriptor_range(range, - info, &device->vk_info.descriptor_limits); -- -- if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature, -- range, p->ShaderVisibility, context))) -- return hr; -- -- continue; -+ vk_binding_array_count = range->vk_binding_count; -+ bindings_per_range = 1; - } -- -- cur_binding = context->current_binding; -- -- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, -- range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, -- shader_visibility, context, &vk_binding))) -- return hr; -- -- /* Unroll descriptor range. */ -- for (k = 0; k < range->descriptor_count; ++k) -+ else - { -- uint32_t vk_current_binding = vk_binding + k; -- -- if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV -- || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -- { -- vk_current_binding = vk_binding + 2 * k; -- -- /* Assign binding for image view. */ -- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, -- range->type, p->ShaderVisibility, false, vk_current_binding + 1, 1)) -- return E_NOTIMPL; -- -- ++cur_binding; -- } -- -- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, -- range->type, p->ShaderVisibility, true, vk_current_binding, 1)) -- return E_NOTIMPL; -- -- ++cur_binding; -+ range->vk_binding_count = range->descriptor_count; -+ vk_binding_array_count = 1; -+ bindings_per_range = range->descriptor_count; - } - -- table->ranges[j].vk_binding_count = table->ranges[j].descriptor_count; -- table->ranges[j].binding = vk_binding; -+ range->binding = context->vk_bindings[root_signature->vk_set_count].count; - -- context->current_binding = cur_binding; -+ if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, -+ p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) -+ return hr; - } - ++context->push_constant_index; - } -@@ -1280,8 +1266,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) - { -- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; -- unsigned int i; -+ unsigned int binding, i; - HRESULT hr; - - root_signature->push_descriptor_mask = 0; -@@ -1296,23 +1281,16 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign - - root_signature->push_descriptor_mask |= 1u << i; - -- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, - vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), -- p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, -- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, -+ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) - return hr; -- cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); -- cur_binding->descriptorCount = 1; -- cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); -- cur_binding->pImmutableSamplers = NULL; - - root_signature->parameters[i].parameter_type = p->ParameterType; -- root_signature->parameters[i].u.descriptor.binding = cur_binding->binding; -- -- ++cur_binding; -+ root_signature->parameters[i].u.descriptor.binding = binding; - } - -- context->current_binding = cur_binding; - return S_OK; - } - -@@ -1320,7 +1298,6 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, - struct vkd3d_descriptor_set_context *context) - { -- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; - unsigned int i; - HRESULT hr; - -@@ -1332,21 +1309,15 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) - return hr; - -- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, -- VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, -- vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, -+ vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, -+ &root_signature->static_samplers[i], NULL))) - return hr; -- cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; -- cur_binding->descriptorCount = 1; -- cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); -- cur_binding->pImmutableSamplers = &root_signature->static_samplers[i]; -- -- ++cur_binding; - } - -- context->current_binding = cur_binding; - if (device->use_vk_heaps) -- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - - return S_OK; - } -@@ -1479,6 +1450,34 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, - return S_OK; - } - -+static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, -+ struct vkd3d_descriptor_set_context *context) -+{ -+ unsigned int i; -+ HRESULT hr; -+ -+ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); -+ -+ if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) -+ return E_INVALIDARG; -+ -+ for (i = 0; i < root_signature->vk_set_count; ++i) -+ { -+ struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; -+ struct vk_binding_array *array = &context->vk_bindings[i]; -+ -+ VKD3D_ASSERT(array->count); -+ -+ if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, -+ array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) -+ return hr; -+ layout->unbounded_offset = array->unbounded_offset; -+ layout->table_index = array->table_index; -+ } -+ -+ return S_OK; -+} -+ - static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, - VkDescriptorSetLayout *vk_set_layouts) - { -@@ -1510,7 +1509,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; - const struct vkd3d_vulkan_info *vk_info = &device->vk_info; - struct vkd3d_descriptor_set_context context; -- VkDescriptorSetLayoutBinding *binding_desc; - struct d3d12_root_signature_info info; - bool use_vk_heaps; - unsigned int i; -@@ -1518,7 +1516,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - - memset(&context, 0, sizeof(context)); - context.unbounded_offset = UINT_MAX; -- binding_desc = NULL; - - root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; - root_signature->refcount = 1; -@@ -1580,20 +1577,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - sizeof(*root_signature->static_samplers)))) - goto fail; - -- if (!(binding_desc = vkd3d_calloc(info.binding_count, sizeof(*binding_desc)))) -- goto fail; -- context.first_binding = binding_desc; -- context.current_binding = binding_desc; -- - if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) - goto fail; - - /* We use KHR_push_descriptor for root descriptor parameters. */ - if (vk_info->KHR_push_descriptor) - { -- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, -- &context, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR))) -- goto fail; -+ d3d12_root_signature_append_vk_binding_array(root_signature, -+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); - } - - root_signature->main_set = root_signature->vk_set_count; -@@ -1609,11 +1600,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - if (use_vk_heaps) - d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context); - -- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) -+ if (FAILED(hr = d3d12_root_signature_create_descriptor_set_layouts(root_signature, &context))) - goto fail; - -- vkd3d_free(binding_desc); -- binding_desc = NULL; -+ descriptor_set_context_cleanup(&context); - - i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts); - if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, -@@ -1629,7 +1619,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - return S_OK; - - fail: -- vkd3d_free(binding_desc); -+ descriptor_set_context_cleanup(&context); - d3d12_root_signature_cleanup(root_signature, device); - return hr; - } --- -2.45.2 -