diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch index 146bf532..64b9c578 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-25232f2b2b35bcf1c265bc380c31cd1d32e.patch @@ -1,4 +1,4 @@ -From 21b9bad56830f461f19e19dc3c81030500610728 Mon Sep 17 00:00:00 2001 +From 7fb609a62cc524d0d886ae20412492af36e71821 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Sep 2024 07:18:49 +1000 Subject: [PATCH] Updated vkd3d to 25232f2b2b35bcf1c265bc380c31cd1d32e4f4a6. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch index ff90313d..e1770dd8 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-cd74461d6dabae4e702de61a90533d811aa.patch @@ -1,4 +1,4 @@ -From ca6c91b4e3a8a1585c09b55ec35a7871e3a62213 Mon Sep 17 00:00:00 2001 +From 1652829e1c0845b53db7cc789c6ea1043beb4f55 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 9 Oct 2024 20:40:25 +1100 Subject: [PATCH] Updated vkd3d to cd74461d6dabae4e702de61a90533d811aa0a3fb. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch index edc20380..dc4c9ab6 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-9cb4207c92ec3ee05fce15580c89f2e5146.patch @@ -1,4 +1,4 @@ -From b7ab480481541737db8e9b5514bc0e52309211c9 Mon Sep 17 00:00:00 2001 +From 4b1bc5d293eca9de7f1f00f3afac32e329364515 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 10 Oct 2024 07:16:15 +1100 Subject: [PATCH] Updated vkd3d to 9cb4207c92ec3ee05fce15580c89f2e5146354db. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch index 717e8475..d24c8625 100644 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-cd249a47b86545fe0b3a4b477f854965e85.patch @@ -1,4 +1,4 @@ -From 67023ff9d755c33d6d2a21fdaf3647a44871c830 Mon Sep 17 00:00:00 2001 +From 52de9a875b2a75a0d32486b990e685fed075bcab Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 11 Oct 2024 07:30:59 +1100 Subject: [PATCH] Updated vkd3d to cd249a47b86545fe0b3a4b477f854965e858b744. diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch index 352373a2..15cd3ee5 100644 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-9dd42d15ddca66458042b5e4b7775fa054b.patch @@ -1,4 +1,4 @@ -From ddc89bca32897d5d4ad81a9a566ba987f49c91de Mon Sep 17 00:00:00 2001 +From b3447b3e9de88f8796756d62f595ad31e04bf7da Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 15 Oct 2024 07:31:45 +1100 Subject: [PATCH] Updated vkd3d to 9dd42d15ddca66458042b5e4b7775fa054b4b0a2. diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-c89e547e3ef767da28be46bc37ac2ba71ea.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-c89e547e3ef767da28be46bc37ac2ba71ea.patch new file mode 100644 index 00000000..42c1f4b5 --- /dev/null +++ b/patches/vkd3d-latest/0006-Updated-vkd3d-to-c89e547e3ef767da28be46bc37ac2ba71ea.patch @@ -0,0 +1,2764 @@ +From 6bcdf76e80406cdbc80e1fb9596b65db4f5fa1a4 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 17 Oct 2024 06:55:39 +1100 +Subject: [PATCH] Updated vkd3d to c89e547e3ef767da28be46bc37ac2ba71eabdbbe. + +--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 10 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 + + libs/vkd3d/libs/vkd3d-shader/glsl.c | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 35 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 109 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 483 ++++++++---- + libs/vkd3d/libs/vkd3d-shader/ir.c | 50 ++ + libs/vkd3d/libs/vkd3d-shader/tpf.c | 691 +++++++++++------- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 12 +- + 10 files changed, 951 insertions(+), 443 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 44b1714b56b..8f7e5cb666b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1395,7 +1395,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c + return VKD3D_OK; + } + +-bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) + { + unsigned int i; +@@ -1463,7 +1463,7 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, + return false; + } + +-bool hlsl_sm1_usage_from_semantic(const char *semantic_name, ++bool sm1_usage_from_semantic_name(const char *semantic_name, + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) + { + static const struct +@@ -1780,7 +1780,7 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); + +- ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); ++ ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ + creator_offset = put_u32(buffer, 0); + put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); +@@ -2289,7 +2289,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + uint32_t token, usage_idx; + bool ret; + +- if (hlsl_sm1_register_from_semantic(version, element->semantic_name, ++ if (sm1_register_from_semantic_name(version, element->semantic_name, + element->semantic_index, output, ®.type, ®.reg)) + { + usage = 0; +@@ -2297,7 +2297,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + } + else + { +- ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); ++ ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg = element->register_index; +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 5db9d6da063..59494d2500d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -5872,6 +5872,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr + return; + } + e = &signature->elements[row_index]; ++ if (!e->sysval_semantic) ++ column_index += vsir_write_mask_get_component_idx(e->mask); + + if (column_index >= VKD3D_VEC4_SIZE) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index c8efdae3386..1e17de54ede 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -1293,6 +1293,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + break; + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_INPUT_PS: ++ case VKD3DSIH_DCL_INPUT_PS_SGV: + case VKD3DSIH_DCL_INPUT_PS_SIV: + case VKD3DSIH_DCL_INPUT_SGV: + case VKD3DSIH_DCL_OUTPUT: +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index b44c0296f69..6ad0117fd5c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -2545,6 +2545,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + decl->return_type = return_type; + decl->parameters = *parameters; + decl->loc = *loc; ++ list_init(&decl->extern_vars); + + if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 4082b14fe04..efe3aec024b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -488,6 +488,8 @@ struct hlsl_ir_var + * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 + * means function entry. */ + unsigned int first_write, last_read; ++ /* Whether the variable is read in any entry function. */ ++ bool is_read; + /* Offset where the variable's value is stored within its buffer in numeric register components. + * This in case the variable is uniform. */ + unsigned int buffer_offset; +@@ -611,6 +613,12 @@ struct hlsl_ir_function_decl + * executed. Needed to deal with return statements in non-uniform control + * flow, since some backends can't handle them. */ + struct hlsl_ir_var *early_return_var; ++ ++ /* List of all the extern semantic variables; linked by the ++ * hlsl_ir_var.extern_entry fields. This exists as a convenience because ++ * it is often necessary to iterate all extern variables and these can be ++ * declared in as function parameters, or as the function return value. */ ++ struct list extern_vars; + }; + + struct hlsl_ir_call +@@ -1019,10 +1027,11 @@ struct hlsl_ctx + struct hlsl_scope *dummy_scope; + /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */ + struct list scopes; +- /* List of all the extern variables; linked by the hlsl_ir_var.extern_entry fields. +- * This exists as a convenience because it is often necessary to iterate all extern variables +- * and these can be declared in global scope, as function parameters, or as the function +- * return value. */ ++ ++ /* List of all the extern variables, excluding semantic variables; linked ++ * by the hlsl_ir_var.extern_entry fields. This exists as a convenience ++ * because it is often necessary to iterate all extern variables declared ++ * in the global scope or as function parameters. */ + struct list extern_vars; + + /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared +@@ -1080,9 +1089,6 @@ struct hlsl_ctx + } constant_defs; + /* 'c' registers where the constants expected by SM2 sincos are stored. */ + struct hlsl_reg d3dsincosconst1, d3dsincosconst2; +- /* Number of temp. registers required for the shader to run, i.e. the largest temp register +- * index that will be used in the output bytecode (+1). */ +- uint32_t temp_count; + + /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in + * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ +@@ -1421,6 +1427,9 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, + + void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); + void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); ++uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); ++void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); ++void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); + int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); +@@ -1615,21 +1624,15 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, + + D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); + D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +-bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, +- unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); +-bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +- uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); + + void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); + int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +-bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, +- struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output); +-bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, enum vkd3d_shader_register_type *type, bool *has_idx); +-int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); ++int tpf_compile(struct vsir_program *program, uint64_t config_flags, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + + struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 2230cd5b919..ee13e193d49 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -1208,6 +1208,32 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + return true; + } + ++static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const char *name, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *record = node_from_block(block); ++ const struct hlsl_type *type = record->data_type; ++ const struct hlsl_struct_field *field, *base; ++ ++ if ((field = get_struct_field(type->e.record.fields, type->e.record.field_count, name))) ++ { ++ unsigned int field_idx = field - type->e.record.fields; ++ ++ return add_record_access(ctx, block, record, field_idx, loc); ++ } ++ else if ((base = get_struct_field(type->e.record.fields, type->e.record.field_count, "$super"))) ++ { ++ unsigned int base_idx = base - type->e.record.fields; ++ ++ if (!add_record_access(ctx, block, record, base_idx, loc)) ++ return false; ++ return add_record_access_recurse(ctx, block, name, loc); ++ } ++ ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); ++ return false; ++} ++ + static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) + { + struct parse_variable_def *v, *v_next; +@@ -4260,6 +4286,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); + } + ++static bool intrinsic_modf(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s modf(%s x, out %s ip)\n" ++ "{\n" ++ " ip = trunc(x);\n" ++ " return x - ip;\n" ++ "}"; ++ ++ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) ++ return false; ++ type = params->args[0]->data_type; ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type->name, type->name, type->name))) ++ return false; ++ func = hlsl_compile_internal_function(ctx, "modf", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return !!add_user_call(ctx, func, params, false, loc); ++} ++ + static bool intrinsic_mul(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -5147,6 +5202,7 @@ intrinsic_functions[] = + {"mad", 3, true, intrinsic_mad}, + {"max", 2, true, intrinsic_max}, + {"min", 2, true, intrinsic_min}, ++ {"modf", 2, true, intrinsic_modf}, + {"mul", 2, true, intrinsic_mul}, + {"normalize", 1, true, intrinsic_normalize}, + {"pow", 2, true, intrinsic_pow}, +@@ -6560,6 +6616,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type switch_case + ++%type base_optional + %type field_type + %type named_struct_spec + %type unnamed_struct_spec +@@ -6774,11 +6831,28 @@ struct_spec: + | unnamed_struct_spec + + named_struct_spec: +- KW_STRUCT any_identifier '{' fields_list '}' ++ KW_STRUCT any_identifier base_optional '{' fields_list '}' + { + bool ret; + +- $$ = hlsl_new_struct_type(ctx, $2, $4.fields, $4.count); ++ if ($3) ++ { ++ char *name; ++ ++ if (!(name = hlsl_strdup(ctx, "$super"))) ++ YYABORT; ++ if (!hlsl_array_reserve(ctx, (void **)&$5.fields, &$5.capacity, 1 + $5.count, sizeof(*$5.fields))) ++ YYABORT; ++ memmove(&$5.fields[1], $5.fields, $5.count * sizeof(*$5.fields)); ++ ++$5.count; ++ ++ memset(&$5.fields[0], 0, sizeof($5.fields[0])); ++ $5.fields[0].type = $3; ++ $5.fields[0].loc = @3; ++ $5.fields[0].name = name; ++ } ++ ++ $$ = hlsl_new_struct_type(ctx, $2, $5.fields, $5.count); + + if (hlsl_get_var(ctx->cur_scope, $2)) + { +@@ -6805,6 +6879,23 @@ any_identifier: + | TYPE_IDENTIFIER + | NEW_IDENTIFIER + ++/* TODO: Multiple inheritance support for interfaces. */ ++base_optional: ++ %empty ++ { ++ $$ = NULL; ++ } ++ | ':' TYPE_IDENTIFIER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); ++ if ($$->class != HLSL_CLASS_STRUCT) ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Base type \"%s\" is not a struct.", $2); ++ YYABORT; ++ } ++ vkd3d_free($2); ++ } ++ + fields_list: + %empty + { +@@ -8795,19 +8886,7 @@ postfix_expr: + + if (node->data_type->class == HLSL_CLASS_STRUCT) + { +- struct hlsl_type *type = node->data_type; +- const struct hlsl_struct_field *field; +- unsigned int field_idx = 0; +- +- if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3))) +- { +- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); +- vkd3d_free($3); +- YYABORT; +- } +- +- field_idx = field - type->e.record.fields; +- if (!add_record_access(ctx, $1, node, field_idx, &@2)) ++ if (!add_record_access_recurse(ctx, $1, $3, &@2)) + { + vkd3d_free($3); + YYABORT; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index c5dd5e71e02..4cf9d5eb84a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -276,8 +276,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls + == base_type_get_semantic_equivalent(type2->e.numeric.type); + } + +-static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, +- struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, ++static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, + uint32_t index, bool output, const struct vkd3d_shader_location *loc) + { + struct hlsl_semantic new_semantic; +@@ -287,7 +287,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) + return NULL; + +- LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!ascii_strcasecmp(ext_var->name, new_name)) + { +@@ -339,12 +339,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + ext_var->is_input_semantic = 1; + ext_var->is_param = var->is_param; + list_add_before(&var->scope_entry, &ext_var->scope_entry); +- list_add_tail(&ctx->extern_vars, &ext_var->extern_entry); ++ list_add_tail(&func->extern_vars, &ext_var->extern_entry); + + return ext_var; + } + +-static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, ++static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, + uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; +@@ -375,7 +375,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + struct hlsl_ir_var *input; + struct hlsl_ir_load *load; + +- if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, ++ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, + semantic_index + i, false, loc))) + return; + +@@ -408,8 +408,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + } + } + +-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &lhs->node.loc; + struct hlsl_type *type = lhs->node.data_type; +@@ -466,30 +466,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + return; + list_add_after(&c->entry, &element_load->node.entry); + +- prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index); ++ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); + } + } + else + { +- prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); ++ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); + } + } + + /* Split inputs into two variables representing the semantic and temp registers, + * and copy the former to the latter, so that writes to input variables work. */ +-static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) ++static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- list_add_head(&block->instrs, &load->node.entry); ++ list_add_head(&func->body.instrs, &load->node.entry); + +- prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + +-static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, ++static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, + uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = rhs->node.data_type, *vector_type; +@@ -517,18 +517,19 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + struct hlsl_ir_var *output; + struct hlsl_ir_load *load; + +- if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) ++ if (!(output = add_semantic_var(ctx, func, var, vector_type, ++ modifiers, semantic, semantic_index + i, true, loc))) + return; + + if (type->class == HLSL_CLASS_MATRIX) + { + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- hlsl_block_add_instr(block, c); ++ hlsl_block_add_instr(&func->body, c); + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) + return; +- hlsl_block_add_instr(block, &load->node); ++ hlsl_block_add_instr(&func->body, &load->node); + } + else + { +@@ -536,17 +537,17 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) + return; +- hlsl_block_add_instr(block, &load->node); ++ hlsl_block_add_instr(&func->body, &load->node); + } + + if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) + return; +- hlsl_block_add_instr(block, store); ++ hlsl_block_add_instr(&func->body, store); + } + } + +-static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &rhs->node.loc; + struct hlsl_type *type = rhs->node.data_type; +@@ -580,34 +581,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- hlsl_block_add_instr(block, c); ++ hlsl_block_add_instr(&func->body, c); + + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; +- hlsl_block_add_instr(block, &element_load->node); ++ hlsl_block_add_instr(&func->body, &element_load->node); + +- append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); ++ append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { +- append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); ++ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); + } + } + + /* Split outputs into two variables representing the temp and semantic + * registers, and copy the former to the latter, so that reads from output + * variables work. */ +-static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) ++static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- hlsl_block_add_instr(block, &load->node); ++ hlsl_block_add_instr(&func->body, &load->node); + +- append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +@@ -4113,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context) + } + } + +-static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, ++static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + struct hlsl_ir_node *instr) + { + if (!deref->rel_offset.node) +@@ -4126,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + return true; + } + ++void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++{ ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ var->indexable = false; ++ } ++ ++ transform_derefs(ctx, mark_indexable_var, &entry_func->body); ++} ++ + static char get_regset_name(enum hlsl_regset regset) + { + switch (regset) +@@ -4142,11 +4157,11 @@ static char get_regset_name(enum hlsl_regset regset) + vkd3d_unreachable(); + } + +-static void allocate_register_reservations(struct hlsl_ctx *ctx) ++static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars) + { + struct hlsl_ir_var *var; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry) + { + const struct hlsl_reg_reservation *reservation = &var->reg_reservation; + unsigned int r; +@@ -4352,7 +4367,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + } + } + +-static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++static void init_var_liveness(struct hlsl_ir_var *var) ++{ ++ if (var->is_uniform || var->is_input_semantic) ++ var->first_write = 1; ++ else if (var->is_output_semantic) ++ var->last_read = UINT_MAX; ++} ++ ++void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { + struct hlsl_scope *scope; + struct hlsl_ir_var *var; +@@ -4366,16 +4389,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if (var->is_uniform || var->is_input_semantic) +- var->first_write = 1; +- else if (var->is_output_semantic) +- var->last_read = UINT_MAX; +- } ++ init_var_liveness(var); ++ ++ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) ++ init_var_liveness(var); + + compute_liveness_recurse(&entry_func->body, 0, 0); + } + ++static void mark_vars_usage(struct hlsl_ctx *ctx) ++{ ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->last_read) ++ var->is_read = true; ++ } ++ } ++} ++ + struct register_allocator + { + struct allocation +@@ -5094,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + * index to all (simultaneously live) variables or intermediate values. Agnostic + * as to how many registers are actually available for the current backend, and + * does not handle constants. */ +-static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { + struct register_allocator allocator = {0}; ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ ++ /* Reset variable temp register allocations. */ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) ++ memset(var->regs, 0, sizeof(var->regs)); ++ } ++ } + + /* ps_1_* outputs are special and go in temp register 0. */ + if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) +@@ -5105,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio + + for (i = 0; i < entry_func->parameters.count; ++i) + { +- const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; +- ++ var = entry_func->parameters.vars[i]; + if (var->is_output_semantic) + { + record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); +@@ -5116,11 +5163,13 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio + } + + allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); +- ctx->temp_count = allocator.reg_count; + vkd3d_free(allocator.allocations); ++ ++ return allocator.reg_count; + } + +-static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) ++static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, ++ unsigned int *counter, bool output, bool is_patch_constant_func) + { + static const char *const shader_names[] = + { +@@ -5133,27 +5182,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + }; + + enum vkd3d_shader_register_type type; ++ struct vkd3d_shader_version version; + uint32_t reg; + bool builtin; + + VKD3D_ASSERT(var->semantic.name); + +- if (ctx->profile->major_version < 4) ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ ++ if (version.major < 4) + { +- struct vkd3d_shader_version version; + enum vkd3d_decl_usage usage; + uint32_t usage_idx; + + /* ps_1_* outputs are special and go in temp register 0. */ +- if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL) + return; + +- version.major = ctx->profile->major_version; +- version.minor = ctx->profile->minor_version; +- version.type = ctx->profile->type; +- builtin = hlsl_sm1_register_from_semantic(&version, ++ builtin = sm1_register_from_semantic_name(&version, + var->semantic.name, var->semantic.index, output, &type, ®); +- if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) ++ if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -5168,19 +5218,21 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + enum vkd3d_shader_sysval_semantic semantic; + bool has_idx; + +- if (!sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output)) ++ if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, ++ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); + return; + } +- if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, &has_idx))) ++ ++ if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) + reg = has_idx ? var->semantic.index : 0; + } + + if (builtin) + { +- TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type], ++ TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type], + output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); + } + else +@@ -5194,17 +5246,18 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + } + } + +-static void allocate_semantic_registers(struct hlsl_ctx *ctx) ++static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { ++ bool is_patch_constant_func = entry_func == ctx->patch_constant_func; + unsigned int input_counter = 0, output_counter = 0; + struct hlsl_ir_var *var; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_input_semantic) +- allocate_semantic_register(ctx, var, &input_counter, false); ++ allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); + if (var->is_output_semantic) +- allocate_semantic_register(ctx, var, &output_counter, true); ++ allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); + } + } + +@@ -5279,7 +5332,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va + + TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); + buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); +- if (var->last_read) ++ if (var->is_read) + buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); + } + +@@ -5510,15 +5563,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + return NULL; + } + +-static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) ++static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset) + { + char regset_name = get_regset_name(regset); + uint32_t min_index = 0, id = 0; + struct hlsl_ir_var *var; + +- if (regset == HLSL_REGSET_UAVS) ++ if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + { +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") + || !ascii_strcasecmp(var->semantic.name, "sv_target"))) +@@ -6228,22 +6281,104 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + } while (progress); + } + +-static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, +- struct vsir_program *program, bool output, struct hlsl_ir_var *var) ++static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct shader_signature *signature, bool output, struct hlsl_ir_var *var) + { + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ enum vkd3d_shader_component_type component_type; ++ unsigned int register_index, mask, use_mask; ++ const char *name = var->semantic.name; + enum vkd3d_shader_register_type type; +- struct shader_signature *signature; + struct signature_element *element; +- unsigned int register_index, mask; + +- if ((!output && !var->last_read) || (output && !var->first_write)) +- return; ++ if (hlsl_version_ge(ctx, 4, 0)) ++ { ++ struct vkd3d_string_buffer *string; ++ bool has_idx, ret; + +- if (output) +- signature = &program->output_signature; ++ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ++ ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, ++ output, signature == &program->patch_constant_signature); ++ VKD3D_ASSERT(ret); ++ if (sysval == ~0u) ++ return; ++ ++ if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) ++ { ++ register_index = has_idx ? var->semantic.index : ~0u; ++ } ++ else ++ { ++ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ } ++ ++ /* NOTE: remember to change this to the actually allocated mask once ++ * we start optimizing interstage signatures. */ ++ mask = (1u << var->data_type->dimx) - 1; ++ use_mask = mask; /* FIXME: retrieve use mask accurately. */ ++ ++ switch (var->data_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ break; ++ ++ case HLSL_TYPE_INT: ++ component_type = VKD3D_SHADER_COMPONENT_INT; ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ component_type = VKD3D_SHADER_COMPONENT_UINT; ++ break; ++ ++ default: ++ if ((string = hlsl_type_to_string(ctx, var->data_type))) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Invalid data type %s for semantic variable %s.", string->buffer, var->name); ++ hlsl_release_string_buffer(ctx, string); ++ component_type = VKD3D_SHADER_COMPONENT_VOID; ++ break; ++ } ++ ++ if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) ++ name = "SV_Target"; ++ else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) ++ name ="SV_Depth"; ++ else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) ++ name = "SV_Position"; ++ } + else +- signature = &program->input_signature; ++ { ++ if ((!output && !var->last_read) || (output && !var->first_write)) ++ return; ++ ++ if (!sm1_register_from_semantic_name(&program->shader_version, ++ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ { ++ enum vkd3d_decl_usage usage; ++ unsigned int usage_idx; ++ bool ret; ++ ++ register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ ++ ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx); ++ VKD3D_ASSERT(ret); ++ /* With the exception of vertex POSITION output, none of these are ++ * system values. Pixel POSITION input is not equivalent to ++ * SV_Position; the closer equivalent is VPOS, which is not declared ++ * as a semantic. */ ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ } ++ ++ mask = (1 << var->data_type->dimx) - 1; ++ use_mask = mask; /* FIXME: retrieve use mask accurately. */ ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ } + + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, + signature->element_count + 1, sizeof(*signature->elements))) +@@ -6252,30 +6387,9 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, + return; + } + element = &signature->elements[signature->element_count++]; +- +- if (!hlsl_sm1_register_from_semantic(&program->shader_version, +- var->semantic.name, var->semantic.index, output, &type, ®ister_index)) +- { +- enum vkd3d_decl_usage usage; +- unsigned int usage_idx; +- bool ret; +- +- register_index = var->regs[HLSL_REGSET_NUMERIC].id; +- +- ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); +- VKD3D_ASSERT(ret); +- /* With the exception of vertex POSITION output, none of these are +- * system values. Pixel POSITION input is not equivalent to +- * SV_Position; the closer equivalent is VPOS, which is not declared +- * as a semantic. */ +- if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX +- && output && usage == VKD3D_DECL_USAGE_POSITION) +- sysval = VKD3D_SHADER_SV_POSITION; +- } +- mask = (1 << var->data_type->dimx) - 1; +- + memset(element, 0, sizeof(*element)); +- if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) ++ ++ if (!(element->semantic_name = vkd3d_strdup(name))) + { + --signature->element_count; + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +@@ -6283,26 +6397,35 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, + } + element->semantic_index = var->semantic.index; + element->sysval_semantic = sysval; +- element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ element->component_type = component_type; + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; + element->mask = mask; +- element->used_mask = mask; ++ element->used_mask = use_mask; + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + element->interpolation_mode = VKD3DSIM_LINEAR; + } + +-static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) ++static void generate_vsir_signature(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_function_decl *func) + { + struct hlsl_ir_var *var; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_input_semantic) +- sm1_generate_vsir_signature_entry(ctx, program, false, var); +- if (var->is_output_semantic) +- sm1_generate_vsir_signature_entry(ctx, program, true, var); ++ if (func == ctx->patch_constant_func) ++ { ++ generate_vsir_signature_entry(ctx, program, ++ &program->patch_constant_signature, var->is_output_semantic, var); ++ } ++ else ++ { ++ if (var->is_input_semantic) ++ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); ++ if (var->is_output_semantic) ++ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); ++ } + } + } + +@@ -6873,7 +6996,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + type = VKD3DSPR_TEMP; + register_index = 0; + } +- else if (!hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, ++ else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, + deref->var->semantic.index, true, &type, ®ister_index)) + { + VKD3D_ASSERT(reg.allocated); +@@ -6928,7 +7051,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +- if (hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, ++ if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, + deref->var->semantic.index, false, &type, ®ister_index)) + { + writemask = (1 << deref->var->data_type->dimx) - 1; +@@ -7233,9 +7356,6 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo + } + } + +-/* OBJECTIVE: Translate all the information from ctx and entry_func to the +- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() +- * without relying on ctx and entry_func. */ + static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) + { +@@ -7262,7 +7382,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + ctab->code = buffer.data; + ctab->size = buffer.size; + +- sm1_generate_vsir_signature(ctx, program); ++ generate_vsir_signature(ctx, program, entry_func); + + hlsl_block_init(&block); + sm1_generate_vsir_constant_defs(ctx, program, &block); +@@ -7272,6 +7392,29 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + sm1_generate_vsir_block(ctx, &entry_func->body, program); + } + ++/* OBJECTIVE: Translate all the information from ctx and entry_func to the ++ * vsir_program, so it can be used as input to tpf_compile() without relying ++ * on ctx and entry_func. */ ++static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ uint64_t config_flags, struct vsir_program *program) ++{ ++ struct vkd3d_shader_version version = {0}; ++ ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ generate_vsir_signature(ctx, program, entry_func); ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ generate_vsir_signature(ctx, program, ctx->patch_constant_func); ++} ++ + static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, + struct hlsl_block **found_block) + { +@@ -7572,16 +7715,23 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru + return true; + } + +-int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +- enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) ++static void process_entry_function(struct hlsl_ctx *ctx, ++ const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) + { + const struct hlsl_profile_info *profile = ctx->profile; ++ struct hlsl_block static_initializers, global_uniforms; + struct hlsl_block *const body = &entry_func->body; + struct recursive_call_ctx recursive_call_ctx; + struct hlsl_ir_var *var; + unsigned int i; + +- list_move_head(&body->instrs, &ctx->static_initializers.instrs); ++ if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers)) ++ return; ++ list_move_head(&body->instrs, &static_initializers.instrs); ++ ++ if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block)) ++ return; ++ list_move_head(&body->instrs, &global_uniforms.instrs); + + memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); + hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); +@@ -7590,7 +7740,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + /* Avoid going into an infinite loop when processing call instructions. + * lower_return() recurses into inferior calls. */ + if (ctx->result) +- return ctx->result; ++ return; + + if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) + lower_ir(ctx, lower_f16tof32, body); +@@ -7602,20 +7752,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_matrix_swizzles, body); + lower_ir(ctx, lower_index_loads, body); + +- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) +- { +- if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) +- prepend_uniform_copy(ctx, body, var); +- } +- + for (i = 0; i < entry_func->parameters.count; ++i) + { + var = entry_func->parameters.vars[i]; + +- if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if (hlsl_type_is_resource(var->data_type)) + { + prepend_uniform_copy(ctx, body, var); + } ++ else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ { ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Patch constant function parameter \"%s\" cannot be uniform.", var->name); ++ else ++ prepend_uniform_copy(ctx, body, var); ++ } + else + { + if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT +@@ -7627,9 +7779,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } + + if (var->storage_modifiers & HLSL_STORAGE_IN) +- prepend_input_var_copy(ctx, body, var); ++ prepend_input_var_copy(ctx, entry_func, var); + if (var->storage_modifiers & HLSL_STORAGE_OUT) +- append_output_var_copy(ctx, body, var); ++ append_output_var_copy(ctx, entry_func, var); + } + } + if (entry_func->return_var) +@@ -7638,19 +7790,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); + +- append_output_var_copy(ctx, body, entry_func->return_var); ++ append_output_var_copy(ctx, entry_func, entry_func->return_var); + } + +- parse_entry_function_attributes(ctx, entry_func); +- if (ctx->result) +- return ctx->result; +- +- if (profile->type == VKD3D_SHADER_TYPE_HULL) +- validate_hull_shader_attributes(ctx, entry_func); +- else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) +- hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, +- "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); +- + if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); +@@ -7726,29 +7868,69 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + while (hlsl_transform_ir(ctx, dce, body, NULL)); + + compute_liveness(ctx, entry_func); ++ mark_vars_usage(ctx); + +- if (TRACE_ON()) +- rb_for_each_entry(&ctx->functions, dump_function, ctx); ++ calculate_resource_register_counts(ctx); + +- transform_derefs(ctx, mark_indexable_vars, body); ++ allocate_register_reservations(ctx, &ctx->extern_vars); ++ allocate_register_reservations(ctx, &entry_func->extern_vars); ++ allocate_semantic_registers(ctx, entry_func); ++} + +- calculate_resource_register_counts(ctx); ++int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) ++{ ++ const struct hlsl_profile_info *profile = ctx->profile; ++ struct hlsl_block global_uniform_block; ++ struct hlsl_ir_var *var; ++ ++ parse_entry_function_attributes(ctx, entry_func); ++ if (ctx->result) ++ return ctx->result; ++ ++ if (profile->type == VKD3D_SHADER_TYPE_HULL) ++ validate_hull_shader_attributes(ctx, entry_func); ++ else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, ++ "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); ++ ++ hlsl_block_init(&global_uniform_block); ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) ++ prepend_uniform_copy(ctx, &global_uniform_block, var); ++ } + +- allocate_register_reservations(ctx); ++ process_entry_function(ctx, &global_uniform_block, entry_func); ++ if (ctx->result) ++ return ctx->result; ++ ++ if (profile->type == VKD3D_SHADER_TYPE_HULL) ++ { ++ process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func); ++ if (ctx->result) ++ return ctx->result; ++ } ++ ++ hlsl_block_cleanup(&global_uniform_block); + +- allocate_temp_registers(ctx, entry_func); + if (profile->major_version < 4) + { ++ mark_indexable_vars(ctx, entry_func); ++ allocate_temp_registers(ctx, entry_func); + allocate_const_registers(ctx, entry_func); + } + else + { + allocate_buffers(ctx); +- allocate_objects(ctx, HLSL_REGSET_TEXTURES); +- allocate_objects(ctx, HLSL_REGSET_UAVS); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); + } +- allocate_semantic_registers(ctx); +- allocate_objects(ctx, HLSL_REGSET_SAMPLERS); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); ++ ++ if (TRACE_ON()) ++ rb_for_each_entry(&ctx->functions, dump_function, ctx); + + if (ctx->result) + return ctx->result; +@@ -7777,7 +7959,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } + + case VKD3D_SHADER_TARGET_DXBC_TPF: +- return hlsl_sm4_write(ctx, entry_func, out); ++ { ++ uint32_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vsir_program program; ++ int result; ++ ++ sm4_generate_vsir(ctx, entry_func, config_flags, &program); ++ if (ctx->result) ++ { ++ vsir_program_cleanup(&program); ++ return ctx->result; ++ } ++ ++ result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); ++ vsir_program_cleanup(&program); ++ return result; ++ } + + default: + ERR("Unsupported shader target type %#x.\n", target_type); +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 0bcc3d0a1f7..c475bf92279 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -6786,6 +6786,12 @@ sysval_validation_data[] = + VKD3D_SHADER_COMPONENT_FLOAT, 4}, + [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, + VKD3D_SHADER_COMPONENT_FLOAT, 4}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, ++ [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + }; + + static void vsir_validate_signature_element(struct validation_context *ctx, +@@ -6942,11 +6948,23 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + idx, signature_type_name, element->interpolation_mode); + } + ++static const unsigned int allowed_signature_phases[] = ++{ ++ [SIGNATURE_TYPE_INPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, ++ [SIGNATURE_TYPE_OUTPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, ++ [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, ++}; ++ + static void vsir_validate_signature(struct validation_context *ctx, + const struct shader_signature *signature, enum vsir_signature_type signature_type) + { + unsigned int i; + ++ if (signature->element_count != 0 && !(allowed_signature_phases[signature_type] ++ & (1u << ctx->program->shader_version.type))) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Unexpected %s signature.", signature_type_names[signature_type]); ++ + for (i = 0; i < signature->element_count; ++i) + vsir_validate_signature_element(ctx, signature, signature_type, i); + } +@@ -7501,6 +7519,38 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + "Patch constant signature is only valid for hull and domain shaders."); + } + ++ switch (program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ case VKD3D_SHADER_TYPE_GEOMETRY: ++ if (program->input_control_point_count == 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid zero input control point count."); ++ break; ++ ++ default: ++ if (program->input_control_point_count != 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid input control point count %u.", ++ program->input_control_point_count); ++ } ++ ++ switch (program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ if (program->output_control_point_count == 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid zero output control point count."); ++ break; ++ ++ default: ++ if (program->output_control_point_count != 0) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid output control point count %u.", ++ program->output_control_point_count); ++ } ++ + vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); + vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); + vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index f79e97e92d4..54344f2afc0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -1403,12 +1403,16 @@ struct sm4_stat + uint32_t fields[VKD3D_STAT_COUNT]; + }; + +-struct tpf_writer ++struct tpf_compiler + { ++ /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ + struct hlsl_ctx *ctx; +- struct vkd3d_bytecode_buffer *buffer; ++ struct vsir_program *program; + struct vkd3d_sm4_lookup_tables lookup; + struct sm4_stat *stat; ++ ++ struct vkd3d_bytecode_buffer *buffer; ++ struct dxbc_writer dxbc; + }; + + static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1916,15 +1920,6 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + } + } + +-static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct sm4_stat *stat, +- struct vkd3d_bytecode_buffer *buffer) +-{ +- tpf->ctx = ctx; +- tpf->buffer = buffer; +- tpf->stat = stat; +- init_sm4_lookup_tables(&tpf->lookup); +-} +- + static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) + { +@@ -2986,7 +2981,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + return VKD3D_OK; + } + +-static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); ++static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); + + static bool type_is_integer(const struct hlsl_type *type) + { +@@ -3002,8 +2997,8 @@ static bool type_is_integer(const struct hlsl_type *type) + } + } + +-bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, enum vkd3d_shader_register_type *type, bool *has_idx) ++bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, ++ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) + { + unsigned int i; + +@@ -3023,6 +3018,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, + ++ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, ++ + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ +@@ -3035,9 +3033,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { +- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) ++ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) + && output == register_table[i].output +- && ctx->profile->type == register_table[i].shader_type) ++ && version->type == register_table[i].shader_type) + { + if (type) + *type = register_table[i].type; +@@ -3049,8 +3047,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + return false; + } + +-bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, +- struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output) ++static bool get_tessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, ++ enum vkd3d_tessellator_domain domain, uint32_t index) ++{ ++ switch (domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ if (index == 0) ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; ++ else if (index == 1) ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; ++ else ++ return false; ++ return true; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; ++ return index < 3; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; ++ return index < 4; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, ++ enum vkd3d_tessellator_domain domain, uint32_t index) ++{ ++ switch (domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ return false; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; ++ return index == 0; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; ++ return index < 2; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, ++ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, ++ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func) + { + unsigned int i; + +@@ -3075,6 +3122,12 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, + {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + ++ {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_HULL, ~0u}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, ++ ++ {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, ++ + {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, +@@ -3098,16 +3151,42 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, + {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, + {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + }; +- bool needs_compat_mapping = ascii_strncasecmp(hlsl_semantic->name, "sv_", 3); ++ bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3); ++ ++ if (is_patch_constant_func) ++ { ++ if (output) ++ { ++ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) ++ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) ++ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ if (!ascii_strcasecmp(semantic_name, "sv_position")) ++ { ++ *sysval_semantic = VKD3D_SHADER_SV_NONE; ++ return true; ++ } ++ } ++ else ++ { ++ if (!ascii_strcasecmp(semantic_name, "sv_primitiveid") ++ || !ascii_strcasecmp(semantic_name, "sv_position")) ++ { ++ *sysval_semantic = ~0u; ++ return true; ++ } ++ return false; ++ } ++ } + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { +- if (!ascii_strcasecmp(hlsl_semantic->name, semantics[i].name) ++ if (!ascii_strcasecmp(semantic_name, semantics[i].name) + && output == semantics[i].output +- && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) +- && ctx->profile->type == semantics[i].shader_type) ++ && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) ++ && version->type == semantics[i].shader_type) + { +- *semantic = semantics[i].semantic; ++ *sysval_semantic = semantics[i].semantic; + return true; + } + } +@@ -3115,7 +3194,7 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, + if (!needs_compat_mapping) + return false; + +- *semantic = VKD3D_SHADER_SV_NONE; ++ *sysval_semantic = VKD3D_SHADER_SV_NONE; + return true; + } + +@@ -3133,110 +3212,46 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + ctx->result = buffer->status; + } + +-static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) ++static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) + { ++ bool output = tag == TAG_OSGN || tag == TAG_PCSG; + struct vkd3d_bytecode_buffer buffer = {0}; +- struct vkd3d_string_buffer *string; +- const struct hlsl_ir_var *var; +- size_t count_position; + unsigned int i; +- bool ret; + +- count_position = put_u32(&buffer, 0); ++ put_u32(&buffer, signature->element_count); + put_u32(&buffer, 8); /* unknown */ + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ for (i = 0; i < signature->element_count; ++i) + { +- unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; +- enum vkd3d_shader_sysval_semantic semantic; +- uint32_t usage_idx, reg_idx; +- bool has_idx; +- +- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) +- continue; +- +- ret = sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); +- VKD3D_ASSERT(ret); +- if (semantic == ~0u) +- continue; +- usage_idx = var->semantic.index; ++ const struct signature_element *element = &signature->elements[i]; ++ enum vkd3d_shader_sysval_semantic sysval; ++ uint32_t used_mask = element->used_mask; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx)) +- { +- reg_idx = has_idx ? var->semantic.index : ~0u; +- } +- else +- { +- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); +- reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; +- } +- +- use_mask = width; /* FIXME: accurately report use mask */ + if (output) +- use_mask = 0xf ^ use_mask; ++ used_mask = 0xf ^ used_mask; + +- /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ +- if (semantic >= VKD3D_SHADER_SV_TARGET) +- semantic = VKD3D_SHADER_SV_NONE; ++ sysval = element->sysval_semantic; ++ if (sysval >= VKD3D_SHADER_SV_TARGET) ++ sysval = VKD3D_SHADER_SV_NONE; + + put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, usage_idx); +- put_u32(&buffer, semantic); +- switch (var->data_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- put_u32(&buffer, VKD3D_SHADER_COMPONENT_FLOAT); +- break; +- +- case HLSL_TYPE_INT: +- put_u32(&buffer, VKD3D_SHADER_COMPONENT_INT); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- put_u32(&buffer, VKD3D_SHADER_COMPONENT_UINT); +- break; +- +- default: +- if ((string = hlsl_type_to_string(ctx, var->data_type))) +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Invalid data type %s for semantic variable %s.", string->buffer, var->name); +- hlsl_release_string_buffer(ctx, string); +- put_u32(&buffer, VKD3D_SHADER_COMPONENT_VOID); +- } +- put_u32(&buffer, reg_idx); +- put_u32(&buffer, vkd3d_make_u16(width, use_mask)); ++ put_u32(&buffer, element->semantic_index); ++ put_u32(&buffer, sysval); ++ put_u32(&buffer, element->component_type); ++ put_u32(&buffer, element->register_index); ++ put_u32(&buffer, vkd3d_make_u16(element->mask, used_mask)); + } + +- i = 0; +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ for (i = 0; i < signature->element_count; ++i) + { +- enum vkd3d_shader_sysval_semantic semantic; +- const char *name = var->semantic.name; ++ const struct signature_element *element = &signature->elements[i]; + size_t string_offset; + +- if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) +- continue; +- +- sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); +- if (semantic == ~0u) +- continue; +- +- if (semantic == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) +- string_offset = put_string(&buffer, "SV_Target"); +- else if (semantic == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) +- string_offset = put_string(&buffer, "SV_Depth"); +- else if (semantic == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) +- string_offset = put_string(&buffer, "SV_Position"); +- else +- string_offset = put_string(&buffer, name); +- set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); ++ string_offset = put_string(&buffer, element->semantic_name); ++ set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); + } + +- set_u32(&buffer, count_position, i); +- +- add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); ++ add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); + } + + static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +@@ -3863,7 +3878,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + { + uint32_t flags = 0; + +- if (var->last_read) ++ if (var->is_read) + flags |= D3D_SVF_USED; + + put_u32(&buffer, 0); /* name */ +@@ -4091,11 +4106,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); + } + +-static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, ++static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, + uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) + { +- const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; ++ const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); + const struct hlsl_ir_var *var = deref->var; ++ struct hlsl_ctx *ctx = tpf->ctx; + + if (var->is_uniform) + { +@@ -4105,7 +4122,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ +@@ -4124,7 +4141,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ +@@ -4143,7 +4160,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ +@@ -4165,7 +4182,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; +- if (hlsl_version_ge(ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ +@@ -4185,7 +4202,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + bool has_idx; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, &has_idx)) ++ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +@@ -4195,7 +4212,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + reg->idx_count = 1; + } + +- reg->dimension = VSIR_DIMENSION_VEC4; ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; + *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); + } + else +@@ -4214,7 +4234,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + bool has_idx; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, &has_idx)) ++ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +@@ -4224,7 +4244,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + reg->idx_count = 1; + } + +- if (reg->type == VKD3DSPR_DEPTHOUT) ++ if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; +@@ -4250,13 +4270,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + } + } + +-static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, ++static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, + const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) + { + unsigned int hlsl_swizzle; + uint32_t writemask; + +- sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); ++ sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); + if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) + { + hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +@@ -4294,7 +4314,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, + } + } + +-static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, ++static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, + const struct hlsl_ir_node *instr, uint32_t map_writemask) + { + unsigned int hlsl_swizzle; +@@ -4330,7 +4350,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ + return 0; + } + +-static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, ++static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, + enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) + { + const struct vkd3d_sm4_register_type_info *register_type_info; +@@ -4390,7 +4410,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v + return token; + } + +-static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, ++static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, + unsigned int j) + { + unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); +@@ -4420,7 +4440,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct + } + } + +-static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) ++static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) + { + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0; +@@ -4433,7 +4453,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk + sm4_write_register_index(tpf, &dst->reg, j); + } + +-static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) ++static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) + { + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0, mod_token = 0; +@@ -4494,9 +4514,9 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk + } + } + +-static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct sm4_instruction *instr) ++static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) + { +- enum vkd3d_shader_type shader_type = tpf->ctx->profile->type; ++ enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; + enum vkd3d_sm4_stat_field stat_field; + uint32_t opcode; + +@@ -4539,7 +4559,7 @@ static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct + } + } + +-static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) ++static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) + { + uint32_t token = instr->opcode | instr->extra_bits; + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; +@@ -4606,7 +4626,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + return true; + } + +-static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) ++static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) + { + size_t size = (cbuffer->used_size + 3) / 4; + +@@ -4641,7 +4661,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) ++static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) + { + unsigned int i; + struct sm4_instruction instr = +@@ -4682,9 +4702,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex + } + } + +-static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, ++static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, + bool uav) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + struct hlsl_type *component_type; + struct sm4_instruction instr; +@@ -4714,14 +4735,14 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + +- if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) ++ if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) + { + hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Multisampled texture object declaration needs sample count for profile %s.", +- tpf->ctx->profile->name); ++ "Multisampled texture object declaration needs sample count for profile %u.%u.", ++ version->major, version->minor); + } + +- if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(version, 5, 1)) + { + VKD3D_ASSERT(!i); + instr.dsts[0].reg.idx[0].offset = resource->id; +@@ -4767,9 +4788,10 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + } + } + +-static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) ++static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, ++ const struct hlsl_ir_var *var, bool is_patch_constant_func) + { +- const struct hlsl_profile_info *profile = tpf->ctx->profile; ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const bool output = var->is_output_semantic; + enum vkd3d_shader_sysval_semantic semantic; + bool has_idx; +@@ -4780,7 +4802,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + .dst_count = 1, + }; + +- if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx)) ++ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) + { + if (has_idx) + { +@@ -4801,10 +4823,11 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + +- if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) ++ if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) + instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; + +- sysval_semantic_from_hlsl(&semantic, tpf->ctx, &var->semantic, output); ++ sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, ++ tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + if (semantic == ~0u) + semantic = VKD3D_SHADER_SV_NONE; + +@@ -4813,25 +4836,26 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + switch (semantic) + { + case VKD3D_SHADER_SV_NONE: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; + break; + + case VKD3D_SHADER_SV_INSTANCE_ID: ++ case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_PRIMITIVE_ID: +- case VKD3D_SHADER_SV_VERTEX_ID: + case VKD3D_SHADER_SV_SAMPLE_INDEX: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ case VKD3D_SHADER_SV_VERTEX_ID: ++ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; + break; + + default: +- instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; + break; + } + +- if (profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL) + { + enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; + +@@ -4870,32 +4894,32 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + } + else + { +- if (semantic == VKD3D_SHADER_SV_NONE || profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; + else + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; + } + +- switch (semantic) ++ if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) + { +- case VKD3D_SHADER_SV_COVERAGE: +- case VKD3D_SHADER_SV_DEPTH: +- case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: +- case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: +- case VKD3D_SHADER_SV_TARGET: +- case VKD3D_SHADER_SV_NONE: +- break; +- +- default: +- instr.idx_count = 1; +- instr.idx[0] = semantic; +- break; ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET ++ || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); ++ } ++ else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) ++ { ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); ++ } ++ else ++ { ++ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); ++ instr.idx_count = 1; ++ instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); + } + + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) ++static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) + { + struct sm4_instruction instr = + { +@@ -4908,7 +4932,7 @@ static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_coun + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, ++static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, + uint32_t size, uint32_t comp_count) + { + struct sm4_instruction instr = +@@ -4922,7 +4946,7 @@ static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) ++static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) + { + struct sm4_instruction instr = + { +@@ -4937,7 +4961,7 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t flags) ++static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) + { + struct sm4_instruction instr = + { +@@ -4948,7 +4972,94 @@ static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t fl + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ret(const struct tpf_writer *tpf) ++static void tpf_write_hs_decls(const struct tpf_compiler *tpf) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_HS_DECLS, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, ++ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, ++ .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, ++ .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, ++ enum vkd3d_shader_tessellator_partitioning partitioning) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, ++ .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, ++ enum vkd3d_shader_tessellator_output_primitive output_primitive) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, ++ .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, ++ }; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_ret(const struct tpf_compiler *tpf) + { + struct sm4_instruction instr = + { +@@ -4958,7 +5069,7 @@ static void write_sm4_ret(const struct tpf_writer *tpf) + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) + { + struct sm4_instruction instr; +@@ -4976,7 +5087,7 @@ static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opco + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) + { + struct sm4_instruction instr; +@@ -4997,7 +5108,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -5016,7 +5127,7 @@ static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opc + } + + /* dp# instructions don't map the swizzle. */ +-static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -5034,7 +5145,7 @@ static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4 + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, ++static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { +@@ -5057,7 +5168,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, + const struct hlsl_ir_node *src3) + { +@@ -5077,7 +5188,7 @@ static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_op + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) +@@ -5086,6 +5197,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; + struct sm4_instruction instr; + +@@ -5140,7 +5252,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + reg->dimension = VSIR_DIMENSION_SCALAR; + reg->u.immconst_u32[0] = index->value.u[0].u; + } +- else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) ++ else if (version->major == 4 && version->minor == 0) + { + hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } +@@ -5155,7 +5267,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; +@@ -5235,7 +5347,7 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; +@@ -5257,7 +5369,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; +@@ -5292,7 +5404,7 @@ static bool type_is_float(const struct hlsl_type *type) + return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; + } + +-static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, ++static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) + { + struct sm4_instruction instr; +@@ -5312,7 +5424,7 @@ static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) ++static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) + { + static const union + { +@@ -5421,7 +5533,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex + } + } + +-static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, ++static void write_sm4_store_uav_typed(const struct tpf_compiler *tpf, const struct hlsl_deref *dst, + const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) + { + struct sm4_instruction instr; +@@ -5429,7 +5541,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + +- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); ++ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); + instr.dst_count = 1; + + sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +@@ -5439,7 +5551,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) ++static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) + { + struct sm4_instruction instr; + +@@ -5458,8 +5570,9 @@ static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, cons + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) ++static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; + const struct hlsl_ir_node *arg3 = expr->operands[2].node; +@@ -5474,7 +5587,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + switch (expr->op) + { + case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: +- if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) + write_sm4_rasterizer_sample_count(tpf, &expr->node); + else + hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, +@@ -5595,7 +5708,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + case HLSL_TYPE_FLOAT: + /* SM5 comes with a RCP opcode */ +- if (tpf->ctx->profile->major_version >= 5) ++ if (vkd3d_shader_ver_ge(version, 5, 0)) + { + write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); + } +@@ -5973,7 +6086,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + hlsl_release_string_buffer(tpf->ctx, dst_type_string); + } + +-static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) ++static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) + { + struct sm4_instruction instr = + { +@@ -6002,7 +6115,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) ++static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) + { + struct sm4_instruction instr = {0}; + +@@ -6041,16 +6154,17 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju + /* Does this variable's data come directly from the API user, rather than being + * temporary or from a previous shader stage? + * I.e. is it a uniform or VS input? */ +-static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) ++static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) + { + if (var->is_uniform) + return true; + +- return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; ++ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; + } + +-static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) ++static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; + +@@ -6060,7 +6174,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo + instr.dst_count = 1; + + VKD3D_ASSERT(hlsl_is_numeric_type(type)); +- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) ++ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) + { + struct hlsl_constant_value value; + +@@ -6088,7 +6202,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) ++static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) + { + struct sm4_instruction instr = + { +@@ -6103,10 +6217,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) + { ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + struct vkd3d_shader_src_param *src; + struct sm4_instruction instr; + +@@ -6123,7 +6238,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- if (tpf->ctx->profile->major_version < 5) ++ if (!vkd3d_shader_ver_ge(version, 5, 0)) + { + hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); +@@ -6144,7 +6259,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) ++static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; +@@ -6213,7 +6328,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h + } + } + +-static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) ++static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) + { + struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); + +@@ -6232,7 +6347,7 @@ static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct + write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); + } + +-static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) ++static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) + { + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; +@@ -6241,7 +6356,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + +- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); ++ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); + instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + +@@ -6251,7 +6366,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s) ++static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) + { + const struct hlsl_ir_node *selector = s->selector.node; + struct hlsl_ir_switch_case *c; +@@ -6291,7 +6406,7 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) ++static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) + { + unsigned int hlsl_swizzle; + struct sm4_instruction instr; +@@ -6312,7 +6427,7 @@ static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) ++static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) + { + const struct hlsl_ir_node *instr; + +@@ -6386,18 +6501,65 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + } + } + +-static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, +- struct sm4_stat *stat, struct dxbc_writer *dxbc) ++static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) + { +- const struct hlsl_profile_info *profile = ctx->profile; ++ struct hlsl_ctx *ctx = tpf->ctx; ++ const struct hlsl_scope *scope; ++ const struct hlsl_ir_var *var; ++ uint32_t temp_count; ++ ++ compute_liveness(ctx, func); ++ mark_indexable_vars(ctx, func); ++ temp_count = allocate_temp_registers(ctx, func); ++ if (ctx->result) ++ return; ++ ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if ((var->is_input_semantic && var->last_read) ++ || (var->is_output_semantic && var->first_write)) ++ tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); ++ } ++ ++ if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) ++ write_sm4_dcl_thread_group(tpf, ctx->thread_count); ++ ++ if (temp_count) ++ write_sm4_dcl_temps(tpf, temp_count); ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) ++ continue; ++ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) ++ continue; ++ ++ if (var->indexable) ++ { ++ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; ++ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; ++ ++ write_sm4_dcl_indexable_temp(tpf, id, size, 4); ++ } ++ } ++ } ++ ++ write_sm4_block(tpf, &func->body); ++ ++ write_sm4_ret(tpf); ++} ++ ++static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) ++{ ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; + struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; + unsigned int extern_resources_count, i; + const struct hlsl_buffer *cbuffer; +- const struct hlsl_scope *scope; +- const struct hlsl_ir_var *var; ++ struct hlsl_ctx *ctx = tpf->ctx; + size_t token_count_position; +- struct tpf_writer tpf; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { +@@ -6412,17 +6574,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d + VKD3D_SM4_LIB, + }; + +- tpf_writer_init(&tpf, ctx, stat, &buffer); ++ tpf->buffer = &buffer; + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + +- put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); ++ put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); + token_count_position = put_u32(&buffer, 0); + ++ if (version->type == VKD3D_SHADER_TYPE_HULL) ++ { ++ tpf_write_hs_decls(tpf); ++ ++ tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ ++ tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); ++ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); ++ tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); ++ tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); ++ } ++ + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- write_sm4_dcl_constant_buffer(&tpf, cbuffer); ++ write_sm4_dcl_constant_buffer(tpf, cbuffer); + } + + for (i = 0; i < extern_resources_count; ++i) +@@ -6430,62 +6603,40 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d + const struct extern_resource *resource = &extern_resources[i]; + + if (resource->regset == HLSL_REGSET_SAMPLERS) +- write_sm4_dcl_samplers(&tpf, resource); ++ write_sm4_dcl_samplers(tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) +- write_sm4_dcl_textures(&tpf, resource, false); ++ write_sm4_dcl_textures(tpf, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) +- write_sm4_dcl_textures(&tpf, resource, true); ++ write_sm4_dcl_textures(tpf, resource, true); + } + +- if (entry_func->early_depth_test && profile->major_version >= 5) +- write_sm4_dcl_global_flags(&tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) +- write_sm4_dcl_semantic(&tpf, var); +- } ++ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) ++ write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); + +- if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) +- write_sm4_dcl_thread_group(&tpf, ctx->thread_count); ++ if (version->type == VKD3D_SHADER_TYPE_HULL) ++ tpf_write_hs_control_point_phase(tpf); + +- if (ctx->temp_count) +- write_sm4_dcl_temps(&tpf, ctx->temp_count); ++ tpf_write_shader_function(tpf, entry_func); + +- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ if (version->type == VKD3D_SHADER_TYPE_HULL) + { +- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) +- { +- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) +- continue; +- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) +- continue; +- +- if (var->indexable) +- { +- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; +- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; +- +- write_sm4_dcl_indexable_temp(&tpf, id, size, 4); +- } +- } ++ tpf_write_hs_fork_phase(tpf); ++ tpf_write_shader_function(tpf, ctx->patch_constant_func); + } + +- write_sm4_block(&tpf, &entry_func->body); +- +- write_sm4_ret(&tpf); +- + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + +- add_section(ctx, dxbc, TAG_SHDR, &buffer); ++ add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); ++ tpf->buffer = NULL; + + sm4_free_extern_resources(extern_resources, extern_resources_count); + } + +-static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) ++static void tpf_write_sfi0(struct tpf_compiler *tpf) + { + struct extern_resource *extern_resources; + unsigned int extern_resources_count; ++ struct hlsl_ctx *ctx = tpf->ctx; + uint64_t *flags; + + flags = vkd3d_calloc(1, sizeof(*flags)); +@@ -6502,14 +6653,16 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ + + if (*flags) +- dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); ++ dxbc_writer_add_section(&tpf->dxbc, TAG_SFI0, flags, sizeof(*flags)); + else + vkd3d_free(flags); + } + +-static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, struct dxbc_writer *dxbc) ++static void tpf_write_stat(struct tpf_compiler *tpf) + { + struct vkd3d_bytecode_buffer buffer = {0}; ++ const struct sm4_stat *stat = tpf->stat; ++ struct hlsl_ctx *ctx = tpf->ctx; + + put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); +@@ -6553,28 +6706,40 @@ static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, st + put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); + } + +- add_section(ctx, dxbc, TAG_STAT, &buffer); ++ add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); + } + +-int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) ++/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving ++ * data from the other parameters instead, so they can be removed from the ++ * arguments and this function can be independent of HLSL structs. */ ++int tpf_compile(struct vsir_program *program, uint64_t config_flags, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { ++ struct tpf_compiler tpf = {0}; + struct sm4_stat stat = {0}; +- struct dxbc_writer dxbc; + size_t i; + int ret; + +- dxbc_writer_init(&dxbc); +- +- write_sm4_signature(ctx, &dxbc, false); +- write_sm4_signature(ctx, &dxbc, true); +- write_sm4_rdef(ctx, &dxbc); +- write_sm4_shdr(ctx, entry_func, &stat, &dxbc); +- write_sm4_sfi0(ctx, &dxbc); +- write_sm4_stat(ctx, &stat, &dxbc); ++ tpf.ctx = ctx; ++ tpf.program = program; ++ tpf.buffer = NULL; ++ tpf.stat = &stat; ++ init_sm4_lookup_tables(&tpf.lookup); ++ dxbc_writer_init(&tpf.dxbc); ++ ++ tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); ++ tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) ++ tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); ++ write_sm4_rdef(ctx, &tpf.dxbc); ++ tpf_write_shdr(&tpf, entry_func); ++ tpf_write_sfi0(&tpf); ++ tpf_write_stat(&tpf); + + if (!(ret = ctx->result)) +- ret = dxbc_writer_write(&dxbc, out); +- for (i = 0; i < dxbc.section_count; ++i) +- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); ++ ret = dxbc_writer_write(&tpf.dxbc, out); ++ for (i = 0; i < tpf.dxbc.section_count; ++i) ++ vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); + return ret; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 54b87373ed1..d63bfd96121 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1602,6 +1602,16 @@ void vkd3d_shader_trace_text_(const char *text, size_t size, const char *functio + #define vkd3d_shader_trace_text(text, size) \ + vkd3d_shader_trace_text_(text, size, __FUNCTION__) + ++bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); ++bool sm1_usage_from_semantic_name(const char *semantic_name, ++ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); ++bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, ++ const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); ++bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, ++ const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, ++ const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); ++ + int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, + struct vkd3d_shader_message_context *message_context, struct vsir_program *program); + int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, +@@ -1914,7 +1924,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, + #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) + #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) + +-#define DXBC_MAX_SECTION_COUNT 6 ++#define DXBC_MAX_SECTION_COUNT 7 + + struct dxbc_writer + { +-- +2.45.2 + diff --git a/patches/vkd3d-latest/0007-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch b/patches/vkd3d-latest/0007-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch new file mode 100644 index 00000000..660793c8 --- /dev/null +++ b/patches/vkd3d-latest/0007-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch @@ -0,0 +1,1584 @@ +From 1060207c8e59871433ade5578fd0a215ebebc6e3 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Fri, 18 Oct 2024 07:31:22 +1100 +Subject: [PATCH] Updated vkd3d to 03ad04c89004c7f800c5b1a0ea7ba28622916328. + +--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 21 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 3 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 220 ++++++---- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 3 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 68 ++-- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 4 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 185 ++++++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 3 + + libs/vkd3d/libs/vkd3d-shader/spirv.c | 3 + + libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 6 +- + libs/vkd3d/libs/vkd3d/command.c | 41 +- + libs/vkd3d/libs/vkd3d/state.c | 376 +++++++++--------- + 13 files changed, 607 insertions(+), 329 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 8f7e5cb666b..ae8e864c179 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -561,6 +561,21 @@ static struct signature_element *find_signature_element_by_register_index( + return NULL; + } + ++/* Add missing bits to a mask to make it contiguous. */ ++static unsigned int make_mask_contiguous(unsigned int mask) ++{ ++ static const unsigned int table[] = ++ { ++ 0x0, 0x1, 0x2, 0x3, ++ 0x4, 0x7, 0x6, 0x7, ++ 0x8, 0xf, 0xe, 0xf, ++ 0xc, 0xf, 0xe, 0xf, ++ }; ++ ++ VKD3D_ASSERT(mask < ARRAY_SIZE(table)); ++ return table[mask]; ++} ++ + static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, + const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, + unsigned int register_index, bool is_dcl, unsigned int mask) +@@ -576,7 +591,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + + if ((element = find_signature_element(signature, name, index))) + { +- element->mask |= mask; ++ element->mask = make_mask_contiguous(element->mask | mask); + if (!is_dcl) + element->used_mask |= mask; + return true; +@@ -596,7 +611,7 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; +- element->mask = mask; ++ element->mask = make_mask_contiguous(mask); + element->used_mask = is_dcl ? 0 : mask; + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + element->interpolation_mode = VKD3DSIM_LINEAR; +@@ -1305,7 +1320,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vsir_program_init(program, compile_info, &version, +- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED)) ++ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, false)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 59494d2500d..f9f44f34bcf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -10311,7 +10311,8 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro + + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; +- if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS)) ++ if (!vsir_program_init(program, compile_info, &version, ++ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, true)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); + sm6->ptr = &sm6->start[1]; +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index e98dfcf4f32..f3f7a2c765c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -25,6 +25,17 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin + return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); + } + ++struct fx_4_binary_type ++{ ++ uint32_t name; ++ uint32_t class; ++ uint32_t element_count; ++ uint32_t unpacked_size; ++ uint32_t stride; ++ uint32_t packed_size; ++ uint32_t typeinfo; ++}; ++ + struct string_entry + { + struct rb_entry entry; +@@ -2842,6 +2853,7 @@ struct fx_parser + const uint8_t *ptr, *start, *end; + struct vkd3d_shader_message_context *message_context; + struct vkd3d_string_buffer buffer; ++ unsigned int indent; + struct + { + const uint8_t *ptr; +@@ -2949,6 +2961,131 @@ static const char *fx_4_get_string(struct fx_parser *parser, uint32_t offset) + return (const char *)(parser->unstructured.ptr + offset); + } + ++static void parse_fx_start_indent(struct fx_parser *parser) ++{ ++ ++parser->indent; ++} ++ ++static void parse_fx_end_indent(struct fx_parser *parser) ++{ ++ --parser->indent; ++} ++ ++static void parse_fx_print_indent(struct fx_parser *parser) ++{ ++ vkd3d_string_buffer_printf(&parser->buffer, "%*s", 4 * parser->indent, ""); ++} ++ ++static void parse_fx_4_numeric_value(struct fx_parser *parser, uint32_t offset, ++ const struct fx_4_binary_type *type) ++{ ++ unsigned int base_type, comp_count; ++ size_t i; ++ ++ base_type = (type->typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; ++ ++ comp_count = type->packed_size / sizeof(uint32_t); ++ for (i = 0; i < comp_count; ++i) ++ { ++ union hlsl_constant_value_component value; ++ ++ fx_parser_read_unstructured(parser, &value, offset, sizeof(uint32_t)); ++ ++ if (base_type == FX_4_NUMERIC_TYPE_FLOAT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); ++ else if (base_type == FX_4_NUMERIC_TYPE_INT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); ++ else if (base_type == FX_4_NUMERIC_TYPE_UINT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); ++ else if (base_type == FX_4_NUMERIC_TYPE_BOOL) ++ vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); ++ else ++ vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); ++ ++ if (i < comp_count - 1) ++ vkd3d_string_buffer_printf(&parser->buffer, ", "); ++ ++ offset += sizeof(uint32_t); ++ } ++} ++ ++static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) ++{ ++ const char *str = fx_4_get_string(parser, offset); ++ vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); ++} ++ ++static void fx_parse_fx_4_annotations(struct fx_parser *parser) ++{ ++ struct fx_4_annotation ++ { ++ uint32_t name; ++ uint32_t type; ++ } var; ++ struct fx_4_binary_type type; ++ const char *name, *type_name; ++ uint32_t count, i, value; ++ ++ count = fx_parser_read_u32(parser); ++ ++ if (!count) ++ return; ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "<\n"); ++ parse_fx_start_indent(parser); ++ ++ for (i = 0; i < count; ++i) ++ { ++ fx_parser_read_u32s(parser, &var, sizeof(var)); ++ fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); ++ ++ name = fx_4_get_string(parser, var.name); ++ type_name = fx_4_get_string(parser, type.name); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); ++ vkd3d_string_buffer_printf(&parser->buffer, " = "); ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "{ "); ++ ++ if (type.class == FX_4_TYPE_CLASS_NUMERIC) ++ { ++ value = fx_parser_read_u32(parser); ++ parse_fx_4_numeric_value(parser, value, &type); ++ } ++ else if (type.class == FX_4_TYPE_CLASS_OBJECT && type.typeinfo == FX_4_OBJECT_TYPE_STRING) ++ { ++ uint32_t element_count = max(type.element_count, 1); ++ ++ for (uint32_t j = 0; j < element_count; ++j) ++ { ++ value = fx_parser_read_u32(parser); ++ fx_4_parse_string_initializer(parser, value); ++ if (j < element_count - 1) ++ vkd3d_string_buffer_printf(&parser->buffer, ", "); ++ } ++ } ++ else ++ { ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, ++ "Only numeric and string types are supported in annotations.\n"); ++ } ++ ++ if (type.element_count) ++ vkd3d_string_buffer_printf(&parser->buffer, " }"); ++ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); ++ } ++ parse_fx_end_indent(parser); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, ">"); ++} ++ ++ + static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) + { + struct fx_4_numeric_variable +@@ -2960,17 +3097,8 @@ static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t c + uint32_t value; + uint32_t flags; + } var; +- struct fx_4_type +- { +- uint32_t name; +- uint32_t class; +- uint32_t element_count; +- uint32_t unpacked_size; +- uint32_t stride; +- uint32_t packed_size; +- uint32_t typeinfo; +- } type; + const char *name, *semantic, *type_name; ++ struct fx_4_binary_type type; + uint32_t i; + + for (i = 0; i < count; ++i) +@@ -2984,54 +3112,21 @@ static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t c + vkd3d_string_buffer_printf(&parser->buffer, " %s %s", type_name, name); + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); ++ + if (var.semantic) + { + semantic = fx_4_get_string(parser, var.semantic); + vkd3d_string_buffer_printf(&parser->buffer, " : %s", semantic); + } ++ fx_parse_fx_4_annotations(parser); ++ + if (var.value) + { +- unsigned int base_type, comp_count; +- size_t j; +- +- if (type.class == FX_4_TYPE_CLASS_NUMERIC) +- base_type = (type.typeinfo >> FX_4_NUMERIC_BASE_TYPE_SHIFT) & 0xf; +- else +- base_type = 0; +- + vkd3d_string_buffer_printf(&parser->buffer, " = { "); +- +- comp_count = type.unpacked_size / sizeof(uint32_t); +- for (j = 0; j < comp_count; ++j) +- { +- union hlsl_constant_value_component value; +- +- fx_parser_read_unstructured(parser, &value, var.value + j * sizeof(uint32_t), sizeof(uint32_t)); +- +- if (base_type == FX_4_NUMERIC_TYPE_FLOAT) +- vkd3d_string_buffer_printf(&parser->buffer, "%f", value.f); +- else if (base_type == FX_4_NUMERIC_TYPE_INT) +- vkd3d_string_buffer_printf(&parser->buffer, "%d", value.i); +- else if (base_type == FX_4_NUMERIC_TYPE_UINT) +- vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); +- else if (base_type == FX_4_NUMERIC_TYPE_BOOL) +- vkd3d_string_buffer_printf(&parser->buffer, "%s", value.u ? "true" : "false" ); +- else +- vkd3d_string_buffer_printf(&parser->buffer, "%#x", value.u); +- +- if (j < comp_count - 1) +- vkd3d_string_buffer_printf(&parser->buffer, ", "); +- } +- ++ parse_fx_4_numeric_value(parser, var.value, &type); + vkd3d_string_buffer_printf(&parser->buffer, " }"); + } + vkd3d_string_buffer_printf(&parser->buffer, "; // Offset: %u, size %u.\n", var.offset, type.unpacked_size); +- +- if (fx_parser_read_u32(parser)) +- { +- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing annotations is not implemented.\n"); +- return; +- } + } + } + +@@ -3057,27 +3152,17 @@ static void fx_parse_buffers(struct fx_parser *parser) + + name = fx_4_get_string(parser, buffer.name); + +- vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s\n", name); +- vkd3d_string_buffer_printf(&parser->buffer, "{\n"); +- +- if (fx_parser_read_u32(parser)) +- { +- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing annotations is not implemented.\n"); +- return; +- } ++ vkd3d_string_buffer_printf(&parser->buffer, "cbuffer %s", name); ++ fx_parse_fx_4_annotations(parser); + ++ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); ++ parse_fx_start_indent(parser); + fx_parse_fx_4_numeric_variables(parser, buffer.count); +- ++ parse_fx_end_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); + } + } + +-static void fx_4_parse_string_initializer(struct fx_parser *parser, uint32_t offset) +-{ +- const char *str = fx_4_get_string(parser, offset); +- vkd3d_string_buffer_printf(&parser->buffer, "\"%s\"", str); +-} +- + static void fx_4_parse_objects(struct fx_parser *parser) + { + struct fx_4_object_variable +@@ -3087,17 +3172,8 @@ static void fx_4_parse_objects(struct fx_parser *parser) + uint32_t semantic; + uint32_t bind_point; + } var; +- struct fx_4_type +- { +- uint32_t name; +- uint32_t class; +- uint32_t element_count; +- uint32_t unpacked_size; +- uint32_t stride; +- uint32_t packed_size; +- uint32_t typeinfo; +- } type; + uint32_t i, j, value, element_count; ++ struct fx_4_binary_type type; + const char *name, *type_name; + + if (parser->failed) +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 1e17de54ede..46515818d07 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -2213,6 +2213,9 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + ++ VKD3D_ASSERT(program->normalised_io); ++ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ + vkd3d_glsl_generator_init(&generator, program, compile_info, + descriptor_info, combined_sampler_info, message_context); + ret = vkd3d_glsl_generator_generate(&generator, out); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index ee13e193d49..213cec79c3d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -40,6 +40,7 @@ struct parse_initializer + unsigned int args_count; + struct hlsl_block *instrs; + bool braces; ++ struct vkd3d_shader_location loc; + }; + + struct parse_parameter +@@ -2506,6 +2507,32 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i + } + } + ++static void initialize_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *dst, ++ const struct parse_initializer *initializer, bool is_default_values_initializer) ++{ ++ unsigned int store_index = 0; ++ ++ /* If any of the elements has an error type, then initializer_size() is not ++ * meaningful. */ ++ for (unsigned int i = 0; i < initializer->args_count; ++i) ++ { ++ if (initializer->args[i]->data_type->class == HLSL_CLASS_ERROR) ++ return; ++ } ++ ++ if (initializer_size(initializer) != hlsl_type_component_count(dst->data_type)) ++ { ++ hlsl_error(ctx, &initializer->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected %u components in initializer, but got %u.", ++ hlsl_type_component_count(dst->data_type), initializer_size(initializer)); ++ return; ++ } ++ ++ for (unsigned int i = 0; i < initializer->args_count; ++i) ++ initialize_var_components(ctx, initializer->instrs, dst, &store_index, ++ initializer->args[i], is_default_values_initializer); ++} ++ + static bool type_has_object_components(const struct hlsl_type *type) + { + if (type->class == HLSL_CLASS_ARRAY) +@@ -2832,8 +2859,6 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + if (v->initializer.args_count) + { + bool is_default_values_initializer; +- unsigned int store_index = 0; +- unsigned int size, k; + + is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) + || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) +@@ -2871,20 +2896,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + v->initializer.args[0] = node_from_block(v->initializer.instrs); + } + +- size = initializer_size(&v->initializer); +- if (component_count != size) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u components in initializer, but got %u.", component_count, size); +- free_parse_variable_def(v); +- continue; +- } +- +- for (k = 0; k < v->initializer.args_count; ++k) +- { +- initialize_var_components(ctx, v->initializer.instrs, var, +- &store_index, v->initializer.args[k], is_default_values_initializer); +- } ++ initialize_var(ctx, var, &v->initializer, is_default_values_initializer); + + if (is_default_values_initializer) + { +@@ -5402,13 +5414,11 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type + { + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; +- unsigned int i, idx = 0; + + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) + return NULL; + +- for (i = 0; i < params->args_count; ++i) +- initialize_var_components(ctx, params->instrs, var, &idx, params->args[i], false); ++ initialize_var(ctx, var, params, false); + + if (!(load = hlsl_new_var_load(ctx, var, loc))) + return NULL; +@@ -5425,6 +5435,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_type *cond_type = cond->data_type; + struct hlsl_type *common_type; + ++ if (cond->data_type->class == HLSL_CLASS_ERROR ++ || first->data_type->class == HLSL_CLASS_ERROR ++ || second->data_type->class == HLSL_CLASS_ERROR) ++ { ++ block->value = ctx->error_instr; ++ return true; ++ } ++ + if (cond_type->class > HLSL_CLASS_LAST_NUMERIC) + { + struct vkd3d_string_buffer *string; +@@ -8321,6 +8339,7 @@ complex_initializer: + $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; ++ $$.loc = @$; + } + | '{' complex_initializer_list '}' + { +@@ -8352,6 +8371,7 @@ complex_initializer_list: + $$.args[$$.args_count++] = $3.args[i]; + hlsl_block_add_block($$.instrs, $3.instrs); + free_parse_initializer(&$3); ++ $$.loc = @$; + } + + initializer_expr: +@@ -8369,6 +8389,7 @@ initializer_expr_list: + $$.args[0] = node_from_block($1); + $$.instrs = $1; + $$.braces = false; ++ $$.loc = @$; + } + | initializer_expr_list ',' initializer_expr + { +@@ -8687,6 +8708,7 @@ func_arguments: + if (!($$.instrs = make_empty_block(ctx))) + YYABORT; + $$.braces = false; ++ $$.loc = @$; + } + | initializer_expr_list + +@@ -8950,14 +8972,6 @@ postfix_expr: + free_parse_initializer(&$4); + YYABORT; + } +- if ($2->dimx * $2->dimy != initializer_size(&$4)) +- { +- hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u components in constructor, but got %u.", +- $2->dimx * $2->dimy, initializer_size(&$4)); +- free_parse_initializer(&$4); +- YYABORT; +- } + + if (!($$ = add_constructor(ctx, $2, &$4, &@2))) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 4cf9d5eb84a..88bec8610cb 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -7366,7 +7366,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; +@@ -7404,7 +7404,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index c475bf92279..b47f12d2188 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -74,7 +74,8 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil + } + + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type) ++ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, ++ bool normalised_io) + { + memset(program, 0, sizeof(*program)); + +@@ -97,6 +98,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c + + program->shader_version = *version; + program->cf_type = cf_type; ++ program->normalised_io = normalised_io; ++ program->normalised_hull_cp_io = normalised_io; + return shader_instruction_array_init(&program->instructions, reserve); + } + +@@ -1132,6 +1135,14 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + enum vkd3d_result ret; + unsigned int i, j; + ++ VKD3D_ASSERT(!program->normalised_hull_cp_io); ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) ++ { ++ program->normalised_hull_cp_io = true; ++ return VKD3D_OK; ++ } ++ + if (!(normaliser.outpointid_param = vsir_program_create_outpointid_param(program))) + { + ERR("Failed to allocate src param.\n"); +@@ -1175,6 +1186,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + program->instructions = normaliser.instructions; ++ program->normalised_hull_cp_io = true; + return VKD3D_OK; + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +@@ -1183,6 +1195,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, + input_control_point_count, i, &location); + program->instructions = normaliser.instructions; ++ program->normalised_hull_cp_io = true; + return ret; + default: + break; +@@ -1190,6 +1203,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + } + + program->instructions = normaliser.instructions; ++ program->normalised_hull_cp_io = true; + return VKD3D_OK; + } + +@@ -1903,6 +1917,8 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + struct vkd3d_shader_instruction *ins; + unsigned int i; + ++ VKD3D_ASSERT(!program->normalised_io); ++ + normaliser.phase = VKD3DSIH_INVALID; + normaliser.shader_type = program->shader_version.type; + normaliser.major = program->shader_version.major; +@@ -1959,6 +1975,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + + program->instructions = normaliser.instructions; + program->use_vocp = normaliser.use_vocp; ++ program->normalised_io = true; + return VKD3D_OK; + } + +@@ -6189,6 +6206,143 @@ static void vsir_validate_register_without_indices(struct validation_context *ct + reg->idx_count, reg->type); + } + ++static void vsir_validate_io_register(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ const struct shader_signature *signature; ++ bool has_control_point = false; ++ ++ switch (reg->type) ++ { ++ case VKD3DSPR_INPUT: ++ signature = &ctx->program->input_signature; ++ ++ switch (ctx->program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_GEOMETRY: ++ case VKD3D_SHADER_TYPE_HULL: ++ case VKD3D_SHADER_TYPE_DOMAIN: ++ has_control_point = true; ++ break; ++ ++ default: ++ break; ++ } ++ break; ++ ++ case VKD3DSPR_OUTPUT: ++ switch (ctx->program->shader_version.type) ++ { ++ case VKD3D_SHADER_TYPE_HULL: ++ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ { ++ signature = &ctx->program->output_signature; ++ has_control_point = ctx->program->normalised_hull_cp_io; ++ } ++ else if (ctx->program->normalised_io) ++ { ++ signature = &ctx->program->output_signature; ++ has_control_point = true; ++ } ++ else ++ { ++ signature = &ctx->program->patch_constant_signature; ++ } ++ break; ++ ++ default: ++ signature = &ctx->program->output_signature; ++ break; ++ } ++ break; ++ ++ case VKD3DSPR_INCONTROLPOINT: ++ signature = &ctx->program->input_signature; ++ has_control_point = true; ++ break; ++ ++ case VKD3DSPR_OUTCONTROLPOINT: ++ signature = &ctx->program->output_signature; ++ has_control_point = true; ++ break; ++ ++ case VKD3DSPR_PATCHCONST: ++ signature = &ctx->program->patch_constant_signature; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!ctx->program->normalised_io) ++ { ++ /* Indices are [register] or [control point, register]. Both are ++ * allowed to have a relative address. */ ++ unsigned int expected_idx_count = 1 + !!has_control_point; ++ ++ if (reg->idx_count != expected_idx_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++ return; ++ } ++ } ++ else ++ { ++ struct signature_element *element; ++ unsigned int expected_idx_count; ++ unsigned int signature_idx; ++ bool is_array = false; ++ ++ /* If the signature element is not an array, indices are ++ * [signature] or [control point, signature]. If the signature ++ * element is an array, indices are [array, signature] or ++ * [control point, array, signature]. In any case `signature' is ++ * not allowed to have a relative address, while the others are. ++ */ ++ if (reg->idx_count < 1) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++ return; ++ } ++ ++ if (reg->idx[reg->idx_count - 1].rel_addr) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL relative address for the signature index of a register of type %#x.", ++ reg->type); ++ return; ++ } ++ ++ signature_idx = reg->idx[reg->idx_count - 1].offset; ++ ++ if (signature_idx >= signature->element_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Signature index %u exceeds the signature size %u in a register of type %#x.", ++ signature_idx, signature->element_count, reg->type); ++ return; ++ } ++ ++ element = &signature->elements[signature_idx]; ++ if (element->register_count > 1 || vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) ++ is_array = true; ++ ++ expected_idx_count = 1 + !!has_control_point + !!is_array; ++ ++ if (reg->idx_count != expected_idx_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++ return; ++ } ++ } ++} ++ + static void vsir_validate_temp_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) + { +@@ -6502,10 +6656,18 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_temp_register(ctx, reg); + break; + ++ case VKD3DSPR_INPUT: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ + case VKD3DSPR_RASTOUT: + vsir_validate_rastout_register(ctx, reg); + break; + ++ case VKD3DSPR_OUTPUT: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ + case VKD3DSPR_DEPTHOUT: + vsir_validate_register_without_indices(ctx, reg); + break; +@@ -6542,6 +6704,18 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_uav_register(ctx, reg); + break; + ++ case VKD3DSPR_INCONTROLPOINT: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_OUTCONTROLPOINT: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ ++ case VKD3DSPR_PATCHCONST: ++ vsir_validate_io_register(ctx, reg); ++ break; ++ + case VKD3DSPR_DEPTHOUTGE: + vsir_validate_register_without_indices(ctx, reg); + break; +@@ -6810,6 +6984,11 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); + ++ if (!vkd3d_bitmask_is_contiguous(element->mask)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Non-contiguous mask %#x.", ++ idx, signature_type_name, element->mask); ++ + /* Here we'd likely want to validate that the usage mask is a subset of the + * signature mask. Unfortunately the D3DBC parser sometimes violates this. + * For example I've seen a shader like this: +@@ -7649,11 +7828,9 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t + vsir_transform(&ctx, vsir_program_remap_output_signature); + + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) +- { + vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); +- vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); +- } + ++ vsir_transform(&ctx, instruction_array_normalise_hull_shader_control_point_io); + vsir_transform(&ctx, vsir_program_normalise_io_registers); + vsir_transform(&ctx, vsir_program_normalise_flat_constants); + vsir_transform(&ctx, vsir_program_remove_dead_code); +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index 36750de1fd8..5baefbc1f44 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -869,6 +869,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + ++ VKD3D_ASSERT(program->normalised_io); ++ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ + if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) + return ret; + msl_generator_generate(&generator); +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 1efd717e970..6a28e2cd68e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -10650,6 +10650,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + compile_info, compiler->message_context)) < 0) + return result; + ++ VKD3D_ASSERT(program->normalised_io); ++ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); + if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) + return VKD3D_ERROR_OUT_OF_MEMORY; +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 54344f2afc0..848e78a34d3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -2792,7 +2792,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20, VSIR_CF_STRUCTURED)) ++ if (!vsir_program_init(program, compile_info, ++ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, false)) + return false; + vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); + sm4->ptr = sm4->start; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index d63bfd96121..9df538a0da0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -255,6 +255,7 @@ enum vkd3d_shader_error + + VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED = 11000, + VKD3D_SHADER_ERROR_FX_INVALID_VERSION = 11001, ++ VKD3D_SHADER_ERROR_FX_INVALID_DATA = 11002, + }; + + enum vkd3d_shader_opcode +@@ -1414,6 +1415,8 @@ struct vsir_program + bool use_vocp; + bool has_point_size; + enum vsir_control_flow_type cf_type; ++ bool normalised_io; ++ bool normalised_hull_cp_io; + + const char **block_names; + size_t block_name_count; +@@ -1426,7 +1429,8 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( + const struct vsir_program *program, enum vkd3d_shader_parameter_name name); + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type); ++ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, ++ bool normalised_io); + enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index eab0436bebd..5495809fcb9 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -2795,39 +2795,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + /* We use separate bindings for buffer and texture SRVs/UAVs. + * See d3d12_root_signature_init(). For unbounded ranges the + * descriptors exist in two consecutive sets, otherwise they occur +- * in pairs in one set. */ +- if (range->descriptor_count == UINT_MAX) +- { +- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER +- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) +- { +- vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; +- vk_descriptor_write->dstBinding = 0; +- } +- } +- else +- { +- if (!use_array) +- vk_descriptor_write->dstBinding = vk_binding + 2 * index; +- if (vk_descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER +- && vk_descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) +- ++vk_descriptor_write->dstBinding; +- } +- ++ * as consecutive ranges within a set. */ + if (vk_descriptor_type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER + || vk_descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) + { + vk_descriptor_write->pTexelBufferView = &u.view->v.u.vk_buffer_view; ++ break; ++ } ++ ++ if (range->descriptor_count == UINT_MAX) ++ { ++ vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; ++ vk_descriptor_write->dstBinding = 0; + } + else + { +- vk_image_info->sampler = VK_NULL_HANDLE; +- vk_image_info->imageView = u.view->v.u.vk_image_view; +- vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV +- ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; +- +- vk_descriptor_write->pImageInfo = vk_image_info; ++ vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; + } ++ ++ vk_image_info->sampler = VK_NULL_HANDLE; ++ vk_image_info->imageView = u.view->v.u.vk_image_view; ++ vk_image_info->imageLayout = u.header->magic == VKD3D_DESCRIPTOR_MAGIC_SRV ++ ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL; ++ ++ vk_descriptor_write->pImageInfo = vk_image_info; + break; + + case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index fb377177403..8e5ec70a577 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -219,6 +219,30 @@ static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY vi + } + } + ++static VkShaderStageFlags stage_flags_from_vkd3d_shader_visibility(enum vkd3d_shader_visibility visibility) ++{ ++ switch (visibility) ++ { ++ case VKD3D_SHADER_VISIBILITY_ALL: ++ return VK_SHADER_STAGE_ALL; ++ case VKD3D_SHADER_VISIBILITY_VERTEX: ++ return VK_SHADER_STAGE_VERTEX_BIT; ++ case VKD3D_SHADER_VISIBILITY_HULL: ++ return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; ++ case VKD3D_SHADER_VISIBILITY_DOMAIN: ++ return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; ++ case VKD3D_SHADER_VISIBILITY_GEOMETRY: ++ return VK_SHADER_STAGE_GEOMETRY_BIT; ++ case VKD3D_SHADER_VISIBILITY_PIXEL: ++ return VK_SHADER_STAGE_FRAGMENT_BIT; ++ case VKD3D_SHADER_VISIBILITY_COMPUTE: ++ return VK_SHADER_STAGE_COMPUTE_BIT; ++ default: ++ FIXME("Unhandled visibility %#x.\n", visibility); ++ return VKD3D_SHADER_VISIBILITY_ALL; ++ } ++} ++ + static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHADER_VISIBILITY visibility) + { + switch (visibility) +@@ -260,23 +284,6 @@ static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d + } + } + +-static VkDescriptorType vk_descriptor_type_from_d3d12_root_parameter(D3D12_ROOT_PARAMETER_TYPE type) +-{ +- switch (type) +- { +- /* SRV and UAV root parameters are buffer views. */ +- case D3D12_ROOT_PARAMETER_TYPE_SRV: +- return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; +- case D3D12_ROOT_PARAMETER_TYPE_UAV: +- return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; +- case D3D12_ROOT_PARAMETER_TYPE_CBV: +- return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; +- default: +- FIXME("Unhandled descriptor root parameter type %#x.\n", type); +- return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; +- } +-} +- + static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( + D3D12_DESCRIPTOR_RANGE_TYPE type) + { +@@ -313,20 +320,6 @@ static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_root_p + } + } + +-static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutBinding *binding_desc, +- enum vkd3d_shader_descriptor_type descriptor_type, D3D12_SHADER_VISIBILITY shader_visibility, +- bool is_buffer, uint32_t vk_binding, unsigned int descriptor_count) +-{ +- binding_desc->binding = vk_binding; +- binding_desc->descriptorType +- = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, is_buffer); +- binding_desc->descriptorCount = descriptor_count; +- binding_desc->stageFlags = stage_flags_from_visibility(shader_visibility); +- binding_desc->pImmutableSamplers = NULL; +- +- return true; +-} +- + struct d3d12_root_signature_info + { + size_t binding_count; +@@ -719,18 +712,66 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat + return S_OK; + } + ++struct vk_binding_array ++{ ++ VkDescriptorSetLayoutBinding *bindings; ++ size_t capacity, count; ++ ++ unsigned int table_index; ++ unsigned int unbounded_offset; ++ VkDescriptorSetLayoutCreateFlags flags; ++}; ++ ++static void vk_binding_array_cleanup(struct vk_binding_array *array) ++{ ++ vkd3d_free(array->bindings); ++ array->bindings = NULL; ++} ++ ++static bool vk_binding_array_add_binding(struct vk_binding_array *array, ++ VkDescriptorType descriptor_type, unsigned int descriptor_count, ++ VkShaderStageFlags stage_flags, const VkSampler *immutable_sampler, unsigned int *binding_idx) ++{ ++ unsigned int binding_count = array->count; ++ VkDescriptorSetLayoutBinding *binding; ++ ++ if (!vkd3d_array_reserve((void **)&array->bindings, &array->capacity, ++ array->count + 1, sizeof(*array->bindings))) ++ { ++ ERR("Failed to reallocate the Vulkan binding array.\n"); ++ return false; ++ } ++ ++ *binding_idx = binding_count; ++ binding = &array->bindings[binding_count]; ++ binding->binding = binding_count; ++ binding->descriptorType = descriptor_type; ++ binding->descriptorCount = descriptor_count; ++ binding->stageFlags = stage_flags; ++ binding->pImmutableSamplers = immutable_sampler; ++ ++array->count; ++ ++ return true; ++} ++ + struct vkd3d_descriptor_set_context + { +- VkDescriptorSetLayoutBinding *current_binding; +- VkDescriptorSetLayoutBinding *first_binding; ++ struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; + unsigned int table_index; + unsigned int unbounded_offset; + unsigned int descriptor_index; + unsigned int uav_counter_index; + unsigned int push_constant_index; +- uint32_t descriptor_binding; + }; + ++static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) ++{ ++ size_t i; ++ ++ for (i = 0; i < ARRAY_SIZE(context->vk_bindings); ++i) ++ vk_binding_array_cleanup(&context->vk_bindings[i]); ++} ++ + static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, unsigned int set_count) + { + uint32_t max_count = min(VKD3D_MAX_DESCRIPTOR_SETS, device->vk_info.device_limits.maxBoundDescriptorSets); +@@ -745,56 +786,56 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns + return true; + } + +-static HRESULT vkd3d_create_descriptor_set_layout(struct d3d12_device *device, +- VkDescriptorSetLayoutCreateFlags flags, unsigned int binding_count, bool unbounded, +- const VkDescriptorSetLayoutBinding *bindings, VkDescriptorSetLayout *set_layout); +- +-static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_root_signature *root_signature, +- struct vkd3d_descriptor_set_context *context, VkDescriptorSetLayoutCreateFlags flags) ++static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( ++ struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) + { +- struct d3d12_descriptor_set_layout *layout; +- unsigned int index; +- HRESULT hr; +- +- if (!context->descriptor_binding) +- return S_OK; ++ if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) ++ return NULL; + +- index = root_signature->vk_set_count; +- layout = &root_signature->descriptor_set_layouts[index]; ++ return &context->vk_bindings[root_signature->vk_set_count]; ++} + +- if (!vkd3d_validate_descriptor_set_count(root_signature->device, index + 1)) +- return E_INVALIDARG; ++static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, ++ VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) ++{ ++ struct vk_binding_array *array; + +- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, flags, context->descriptor_binding, +- context->unbounded_offset != UINT_MAX, context->first_binding, &layout->vk_layout))) +- return hr; +- layout->table_index = context->table_index; +- layout->unbounded_offset = context->unbounded_offset; +- ++root_signature->vk_set_count; ++ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) ++ return; + +- context->current_binding = context->first_binding; +- context->descriptor_binding = 0; ++ array->table_index = context->table_index; ++ array->unbounded_offset = context->unbounded_offset; ++ array->flags = flags; + +- return S_OK; ++ ++root_signature->vk_set_count; + } + + static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, +- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, +- bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, +- unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) ++ enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, ++ unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, ++ unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, ++ const VkSampler *immutable_sampler, unsigned int *binding_idx) + { + struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets + ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; +- struct vkd3d_shader_resource_binding *mapping +- = &root_signature->descriptor_mapping[context->descriptor_index++]; ++ struct vkd3d_shader_resource_binding *mapping; ++ struct vk_binding_array *array; ++ unsigned int idx; ++ ++ if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) ++ || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], ++ vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, ++ stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) ++ return E_OUTOFMEMORY; + ++ mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; + mapping->type = descriptor_type; + mapping->register_space = register_space; + mapping->register_index = register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + mapping->binding.set = root_signature->vk_set_count; +- mapping->binding.binding = context->descriptor_binding++; ++ mapping->binding.binding = idx; + mapping->binding.count = descriptor_count; + if (offset) + { +@@ -803,37 +844,11 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur + } + + if (context->unbounded_offset != UINT_MAX) +- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); ++ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); + +- return S_OK; +-} ++ if (binding_idx) ++ *binding_idx = idx; + +-static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, +- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, +- unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, +- enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, +- uint32_t *first_binding) +-{ +- unsigned int i; +- HRESULT hr; +- +- is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; +- duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV +- || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) +- && duplicate_descriptors; +- +- *first_binding = context->descriptor_binding; +- for (i = 0; i < binding_count; ++i) +- { +- if (duplicate_descriptors +- && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, +- register_space, base_register_idx + i, true, shader_visibility, 1, context))) +- return hr; +- +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, +- base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) +- return hr; +- } + return S_OK; + } + +@@ -895,38 +910,41 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro + return min(count, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); + } + +-static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_root_signature *root_signature, ++static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, ++ unsigned int vk_binding_array_count, unsigned int bindings_per_range, + struct vkd3d_descriptor_set_context *context) + { + enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); +- bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; ++ bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; + enum vkd3d_shader_descriptor_type descriptor_type = range->type; ++ unsigned int i, register_space = range->register_space; + HRESULT hr; + + if (range->descriptor_count == UINT_MAX) + context->unbounded_offset = range->offset; + +- if (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) ++ for (i = 0; i < bindings_per_range; ++i) + { +- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, +- descriptor_type, visibility, true, context->descriptor_binding, range->vk_binding_count)) +- return E_NOTIMPL; +- ++context->current_binding; +- +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, +- range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, ++ register_space, range->base_register_idx + i, is_buffer, shader_visibility, ++ vk_binding_array_count, context, NULL, NULL))) + return hr; + } + +- if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, +- descriptor_type, visibility, is_buffer, context->descriptor_binding, range->vk_binding_count)) +- return E_NOTIMPL; +- ++context->current_binding; ++ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) ++ { ++ context->unbounded_offset = UINT_MAX; ++ return S_OK; ++ } + +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, +- range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) +- return hr; ++ for (i = 0; i < bindings_per_range; ++i) ++ { ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, ++ register_space, range->base_register_idx + i, false, shader_visibility, ++ vk_binding_array_count, context, NULL, NULL))) ++ return hr; ++ } + + context->unbounded_offset = UINT_MAX; + +@@ -1116,11 +1134,10 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, + struct vkd3d_descriptor_set_context *context) + { ++ unsigned int i, j, range_count, bindings_per_range, vk_binding_array_count; + const struct d3d12_device *device = root_signature->device; + bool use_vk_heaps = root_signature->device->use_vk_heaps; + struct d3d12_root_descriptor_table *table; +- unsigned int i, j, k, range_count; +- uint32_t vk_binding; + HRESULT hr; + + root_signature->descriptor_table_mask = 0; +@@ -1177,7 +1194,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + for (j = 0; j < range_count; ++j) + { + struct d3d12_root_descriptor_table_range *range; +- VkDescriptorSetLayoutBinding *cur_binding; + + range = &table->ranges[j]; + +@@ -1223,53 +1239,23 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + base_range = range; + } + +- range->binding = context->descriptor_binding; + range->vk_binding_count = vk_binding_count_from_descriptor_range(range, + info, &device->vk_info.descriptor_limits); +- +- if (FAILED(hr = d3d12_root_signature_init_descriptor_array_binding(root_signature, +- range, p->ShaderVisibility, context))) +- return hr; +- +- continue; ++ vk_binding_array_count = range->vk_binding_count; ++ bindings_per_range = 1; + } +- +- cur_binding = context->current_binding; +- +- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, +- range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, +- shader_visibility, context, &vk_binding))) +- return hr; +- +- /* Unroll descriptor range. */ +- for (k = 0; k < range->descriptor_count; ++k) ++ else + { +- uint32_t vk_current_binding = vk_binding + k; +- +- if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV +- || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) +- { +- vk_current_binding = vk_binding + 2 * k; +- +- /* Assign binding for image view. */ +- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, +- range->type, p->ShaderVisibility, false, vk_current_binding + 1, 1)) +- return E_NOTIMPL; +- +- ++cur_binding; +- } +- +- if (!vk_binding_from_d3d12_descriptor_range(cur_binding, +- range->type, p->ShaderVisibility, true, vk_current_binding, 1)) +- return E_NOTIMPL; +- +- ++cur_binding; ++ range->vk_binding_count = range->descriptor_count; ++ vk_binding_array_count = 1; ++ bindings_per_range = range->descriptor_count; + } + +- table->ranges[j].vk_binding_count = table->ranges[j].descriptor_count; +- table->ranges[j].binding = vk_binding; ++ range->binding = context->vk_bindings[root_signature->vk_set_count].count; + +- context->current_binding = cur_binding; ++ if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, ++ p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) ++ return hr; + } + ++context->push_constant_index; + } +@@ -1280,8 +1266,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) + { +- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; +- unsigned int i; ++ unsigned int binding, i; + HRESULT hr; + + root_signature->push_descriptor_mask = 0; +@@ -1296,23 +1281,16 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign + + root_signature->push_descriptor_mask |= 1u << i; + +- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, + vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), +- p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, +- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) ++ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, ++ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) + return hr; +- cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); +- cur_binding->descriptorCount = 1; +- cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); +- cur_binding->pImmutableSamplers = NULL; + + root_signature->parameters[i].parameter_type = p->ParameterType; +- root_signature->parameters[i].u.descriptor.binding = cur_binding->binding; +- +- ++cur_binding; ++ root_signature->parameters[i].u.descriptor.binding = binding; + } + +- context->current_binding = cur_binding; + return S_OK; + } + +@@ -1320,7 +1298,6 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, + struct vkd3d_descriptor_set_context *context) + { +- VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; + unsigned int i; + HRESULT hr; + +@@ -1332,21 +1309,15 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) + return hr; + +- if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, +- VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, +- vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, ++ vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, ++ &root_signature->static_samplers[i], NULL))) + return hr; +- cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; +- cur_binding->descriptorCount = 1; +- cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); +- cur_binding->pImmutableSamplers = &root_signature->static_samplers[i]; +- +- ++cur_binding; + } + +- context->current_binding = cur_binding; + if (device->use_vk_heaps) +- return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); ++ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); + + return S_OK; + } +@@ -1479,6 +1450,34 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, + return S_OK; + } + ++static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, ++ struct vkd3d_descriptor_set_context *context) ++{ ++ unsigned int i; ++ HRESULT hr; ++ ++ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); ++ ++ if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) ++ return E_INVALIDARG; ++ ++ for (i = 0; i < root_signature->vk_set_count; ++i) ++ { ++ struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; ++ struct vk_binding_array *array = &context->vk_bindings[i]; ++ ++ VKD3D_ASSERT(array->count); ++ ++ if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, ++ array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) ++ return hr; ++ layout->unbounded_offset = array->unbounded_offset; ++ layout->table_index = array->table_index; ++ } ++ ++ return S_OK; ++} ++ + static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, + VkDescriptorSetLayout *vk_set_layouts) + { +@@ -1510,7 +1509,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; + const struct vkd3d_vulkan_info *vk_info = &device->vk_info; + struct vkd3d_descriptor_set_context context; +- VkDescriptorSetLayoutBinding *binding_desc; + struct d3d12_root_signature_info info; + bool use_vk_heaps; + unsigned int i; +@@ -1518,7 +1516,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + + memset(&context, 0, sizeof(context)); + context.unbounded_offset = UINT_MAX; +- binding_desc = NULL; + + root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; + root_signature->refcount = 1; +@@ -1580,20 +1577,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + sizeof(*root_signature->static_samplers)))) + goto fail; + +- if (!(binding_desc = vkd3d_calloc(info.binding_count, sizeof(*binding_desc)))) +- goto fail; +- context.first_binding = binding_desc; +- context.current_binding = binding_desc; +- + if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) + goto fail; + + /* We use KHR_push_descriptor for root descriptor parameters. */ + if (vk_info->KHR_push_descriptor) + { +- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, +- &context, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR))) +- goto fail; ++ d3d12_root_signature_append_vk_binding_array(root_signature, ++ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); + } + + root_signature->main_set = root_signature->vk_set_count; +@@ -1609,11 +1600,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + if (use_vk_heaps) + d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context); + +- if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) ++ if (FAILED(hr = d3d12_root_signature_create_descriptor_set_layouts(root_signature, &context))) + goto fail; + +- vkd3d_free(binding_desc); +- binding_desc = NULL; ++ descriptor_set_context_cleanup(&context); + + i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts); + if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, +@@ -1629,7 +1619,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + return S_OK; + + fail: +- vkd3d_free(binding_desc); ++ descriptor_set_context_cleanup(&context); + d3d12_root_signature_cleanup(root_signature, device); + return hr; + } +-- +2.45.2 +