From 6bcdf76e80406cdbc80e1fb9596b65db4f5fa1a4 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 17 Oct 2024 06:55:39 +1100 Subject: [PATCH] Updated vkd3d to c89e547e3ef767da28be46bc37ac2ba71eabdbbe. --- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 10 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 + libs/vkd3d/libs/vkd3d-shader/glsl.c | 1 + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 35 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 109 ++- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 483 ++++++++---- libs/vkd3d/libs/vkd3d-shader/ir.c | 50 ++ libs/vkd3d/libs/vkd3d-shader/tpf.c | 691 +++++++++++------- .../libs/vkd3d-shader/vkd3d_shader_private.h | 12 +- 10 files changed, 951 insertions(+), 443 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 44b1714b56b..8f7e5cb666b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1395,7 +1395,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c return VKD3D_OK; } -bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, +bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) { unsigned int i; @@ -1463,7 +1463,7 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, return false; } -bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +bool sm1_usage_from_semantic_name(const char *semantic_name, uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) { static const struct @@ -1780,7 +1780,7 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff size_offset = put_u32(buffer, 0); ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); - ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ creator_offset = put_u32(buffer, 0); put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); put_u32(buffer, uniform_count); @@ -2289,7 +2289,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, uint32_t token, usage_idx; bool ret; - if (hlsl_sm1_register_from_semantic(version, element->semantic_name, + if (sm1_register_from_semantic_name(version, element->semantic_name, element->semantic_index, output, ®.type, ®.reg)) { usage = 0; @@ -2297,7 +2297,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, } else { - ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); + ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); VKD3D_ASSERT(ret); reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; reg.reg = element->register_index; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 5db9d6da063..59494d2500d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -5872,6 +5872,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr return; } e = &signature->elements[row_index]; + if (!e->sysval_semantic) + column_index += vsir_write_mask_get_component_idx(e->mask); if (column_index >= VKD3D_VEC4_SIZE) { diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index c8efdae3386..1e17de54ede 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -1293,6 +1293,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, break; case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_INPUT_PS: + case VKD3DSIH_DCL_INPUT_PS_SGV: case VKD3DSIH_DCL_INPUT_PS_SIV: case VKD3DSIH_DCL_INPUT_SGV: case VKD3DSIH_DCL_OUTPUT: diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index b44c0296f69..6ad0117fd5c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -2545,6 +2545,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, decl->return_type = return_type; decl->parameters = *parameters; decl->loc = *loc; + list_init(&decl->extern_vars); if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 4082b14fe04..efe3aec024b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -488,6 +488,8 @@ struct hlsl_ir_var * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 * means function entry. */ unsigned int first_write, last_read; + /* Whether the variable is read in any entry function. */ + bool is_read; /* Offset where the variable's value is stored within its buffer in numeric register components. * This in case the variable is uniform. */ unsigned int buffer_offset; @@ -611,6 +613,12 @@ struct hlsl_ir_function_decl * executed. Needed to deal with return statements in non-uniform control * flow, since some backends can't handle them. */ struct hlsl_ir_var *early_return_var; + + /* List of all the extern semantic variables; linked by the + * hlsl_ir_var.extern_entry fields. This exists as a convenience because + * it is often necessary to iterate all extern variables and these can be + * declared in as function parameters, or as the function return value. */ + struct list extern_vars; }; struct hlsl_ir_call @@ -1019,10 +1027,11 @@ struct hlsl_ctx struct hlsl_scope *dummy_scope; /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */ struct list scopes; - /* List of all the extern variables; linked by the hlsl_ir_var.extern_entry fields. - * This exists as a convenience because it is often necessary to iterate all extern variables - * and these can be declared in global scope, as function parameters, or as the function - * return value. */ + + /* List of all the extern variables, excluding semantic variables; linked + * by the hlsl_ir_var.extern_entry fields. This exists as a convenience + * because it is often necessary to iterate all extern variables declared + * in the global scope or as function parameters. */ struct list extern_vars; /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared @@ -1080,9 +1089,6 @@ struct hlsl_ctx } constant_defs; /* 'c' registers where the constants expected by SM2 sincos are stored. */ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register - * index that will be used in the output bytecode (+1). */ - uint32_t temp_count; /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ @@ -1421,6 +1427,9 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); +uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); +void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); +void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); @@ -1615,21 +1624,15 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); -bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, - unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); -bool hlsl_sm1_usage_from_semantic(const char *semantic_name, - uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, - struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output); -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_shader_register_type *type, bool *has_idx); -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); +int tpf_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 2230cd5b919..ee13e193d49 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -1208,6 +1208,32 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, return true; } +static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + const char *name, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *record = node_from_block(block); + const struct hlsl_type *type = record->data_type; + const struct hlsl_struct_field *field, *base; + + if ((field = get_struct_field(type->e.record.fields, type->e.record.field_count, name))) + { + unsigned int field_idx = field - type->e.record.fields; + + return add_record_access(ctx, block, record, field_idx, loc); + } + else if ((base = get_struct_field(type->e.record.fields, type->e.record.field_count, "$super"))) + { + unsigned int base_idx = base - type->e.record.fields; + + if (!add_record_access(ctx, block, record, base_idx, loc)) + return false; + return add_record_access_recurse(ctx, block, name, loc); + } + + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); + return false; +} + static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) { struct parse_variable_def *v, *v_next; @@ -4260,6 +4286,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); } +static bool intrinsic_modf(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s modf(%s x, out %s ip)\n" + "{\n" + " ip = trunc(x);\n" + " return x - ip;\n" + "}"; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + type = params->args[0]->data_type; + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "modf", body); + vkd3d_free(body); + if (!func) + return false; + + return !!add_user_call(ctx, func, params, false, loc); +} + static bool intrinsic_mul(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -5147,6 +5202,7 @@ intrinsic_functions[] = {"mad", 3, true, intrinsic_mad}, {"max", 2, true, intrinsic_max}, {"min", 2, true, intrinsic_min}, + {"modf", 2, true, intrinsic_modf}, {"mul", 2, true, intrinsic_mul}, {"normalize", 1, true, intrinsic_normalize}, {"pow", 2, true, intrinsic_pow}, @@ -6560,6 +6616,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %type switch_case +%type base_optional %type field_type %type named_struct_spec %type unnamed_struct_spec @@ -6774,11 +6831,28 @@ struct_spec: | unnamed_struct_spec named_struct_spec: - KW_STRUCT any_identifier '{' fields_list '}' + KW_STRUCT any_identifier base_optional '{' fields_list '}' { bool ret; - $$ = hlsl_new_struct_type(ctx, $2, $4.fields, $4.count); + if ($3) + { + char *name; + + if (!(name = hlsl_strdup(ctx, "$super"))) + YYABORT; + if (!hlsl_array_reserve(ctx, (void **)&$5.fields, &$5.capacity, 1 + $5.count, sizeof(*$5.fields))) + YYABORT; + memmove(&$5.fields[1], $5.fields, $5.count * sizeof(*$5.fields)); + ++$5.count; + + memset(&$5.fields[0], 0, sizeof($5.fields[0])); + $5.fields[0].type = $3; + $5.fields[0].loc = @3; + $5.fields[0].name = name; + } + + $$ = hlsl_new_struct_type(ctx, $2, $5.fields, $5.count); if (hlsl_get_var(ctx->cur_scope, $2)) { @@ -6805,6 +6879,23 @@ any_identifier: | TYPE_IDENTIFIER | NEW_IDENTIFIER +/* TODO: Multiple inheritance support for interfaces. */ +base_optional: + %empty + { + $$ = NULL; + } + | ':' TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); + if ($$->class != HLSL_CLASS_STRUCT) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Base type \"%s\" is not a struct.", $2); + YYABORT; + } + vkd3d_free($2); + } + fields_list: %empty { @@ -8795,19 +8886,7 @@ postfix_expr: if (node->data_type->class == HLSL_CLASS_STRUCT) { - struct hlsl_type *type = node->data_type; - const struct hlsl_struct_field *field; - unsigned int field_idx = 0; - - if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3))) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); - vkd3d_free($3); - YYABORT; - } - - field_idx = field - type->e.record.fields; - if (!add_record_access(ctx, $1, node, field_idx, &@2)) + if (!add_record_access_recurse(ctx, $1, $3, &@2)) { vkd3d_free($3); YYABORT; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index c5dd5e71e02..4cf9d5eb84a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -276,8 +276,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls == base_type_get_semantic_equivalent(type2->e.numeric.type); } -static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, +static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t index, bool output, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; @@ -287,7 +287,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) return NULL; - LIST_FOR_EACH_ENTRY(ext_var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { if (!ascii_strcasecmp(ext_var->name, new_name)) { @@ -339,12 +339,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir ext_var->is_input_semantic = 1; ext_var->is_param = var->is_param; list_add_before(&var->scope_entry, &ext_var->scope_entry); - list_add_tail(&ctx->extern_vars, &ext_var->extern_entry); + list_add_tail(&func->extern_vars, &ext_var->extern_entry); return ext_var; } -static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, +static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; @@ -375,7 +375,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s struct hlsl_ir_var *input; struct hlsl_ir_load *load; - if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, + if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, semantic_index + i, false, loc))) return; @@ -408,8 +408,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s } } -static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) +static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_type *type = lhs->node.data_type; @@ -466,30 +466,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * return; list_add_after(&c->entry, &element_load->node.entry); - prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index); + prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); } } else { - prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); + prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); } } /* Split inputs into two variables representing the semantic and temp registers, * and copy the former to the latter, so that writes to input variables work. */ -static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) +static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) { struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - list_add_head(&block->instrs, &load->node.entry); + list_add_head(&func->body.instrs, &load->node.entry); - prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); } -static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, +static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct hlsl_type *type = rhs->node.data_type, *vector_type; @@ -517,18 +517,19 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s struct hlsl_ir_var *output; struct hlsl_ir_load *load; - if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) + if (!(output = add_semantic_var(ctx, func, var, vector_type, + modifiers, semantic, semantic_index + i, true, loc))) return; if (type->class == HLSL_CLASS_MATRIX) { if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - hlsl_block_add_instr(block, c); + hlsl_block_add_instr(&func->body, c); if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; - hlsl_block_add_instr(block, &load->node); + hlsl_block_add_instr(&func->body, &load->node); } else { @@ -536,17 +537,17 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) return; - hlsl_block_add_instr(block, &load->node); + hlsl_block_add_instr(&func->body, &load->node); } if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) return; - hlsl_block_add_instr(block, store); + hlsl_block_add_instr(&func->body, store); } } -static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) { struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_type *type = rhs->node.data_type; @@ -580,34 +581,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; - hlsl_block_add_instr(block, c); + hlsl_block_add_instr(&func->body, c); if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) return; - hlsl_block_add_instr(block, &element_load->node); + hlsl_block_add_instr(&func->body, &element_load->node); - append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); } } else { - append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); + append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); } } /* Split outputs into two variables representing the temp and semantic * registers, and copy the former to the latter, so that reads from output * variables work. */ -static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) +static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var) { struct hlsl_ir_load *load; /* This redundant load is expected to be deleted later by DCE. */ if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) return; - hlsl_block_add_instr(block, &load->node); + hlsl_block_add_instr(&func->body, &load->node); - append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); } bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -4113,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context) } } -static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, +static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { if (!deref->rel_offset.node) @@ -4126,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, return true; } +void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + var->indexable = false; + } + + transform_derefs(ctx, mark_indexable_var, &entry_func->body); +} + static char get_regset_name(enum hlsl_regset regset) { switch (regset) @@ -4142,11 +4157,11 @@ static char get_regset_name(enum hlsl_regset regset) vkd3d_unreachable(); } -static void allocate_register_reservations(struct hlsl_ctx *ctx) +static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars) { struct hlsl_ir_var *var; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry) { const struct hlsl_reg_reservation *reservation = &var->reg_reservation; unsigned int r; @@ -4352,7 +4367,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop } } -static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static void init_var_liveness(struct hlsl_ir_var *var) +{ + if (var->is_uniform || var->is_input_semantic) + var->first_write = 1; + else if (var->is_output_semantic) + var->last_read = UINT_MAX; +} + +void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct hlsl_scope *scope; struct hlsl_ir_var *var; @@ -4366,16 +4389,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl } LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform || var->is_input_semantic) - var->first_write = 1; - else if (var->is_output_semantic) - var->last_read = UINT_MAX; - } + init_var_liveness(var); + + LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) + init_var_liveness(var); compute_liveness_recurse(&entry_func->body, 0, 0); } +static void mark_vars_usage(struct hlsl_ctx *ctx) +{ + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->last_read) + var->is_read = true; + } + } +} + struct register_allocator { struct allocation @@ -5094,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * index to all (simultaneously live) variables or intermediate values. Agnostic * as to how many registers are actually available for the current backend, and * does not handle constants. */ -static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator = {0}; + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + /* Reset variable temp register allocations. */ + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) + memset(var->regs, 0, sizeof(var->regs)); + } + } /* ps_1_* outputs are special and go in temp register 0. */ if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) @@ -5105,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio for (i = 0; i < entry_func->parameters.count; ++i) { - const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; - + var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); @@ -5116,11 +5163,13 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio } allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); - ctx->temp_count = allocator.reg_count; vkd3d_free(allocator.allocations); + + return allocator.reg_count; } -static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) +static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, + unsigned int *counter, bool output, bool is_patch_constant_func) { static const char *const shader_names[] = { @@ -5133,27 +5182,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var }; enum vkd3d_shader_register_type type; + struct vkd3d_shader_version version; uint32_t reg; bool builtin; VKD3D_ASSERT(var->semantic.name); - if (ctx->profile->major_version < 4) + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + + if (version.major < 4) { - struct vkd3d_shader_version version; enum vkd3d_decl_usage usage; uint32_t usage_idx; /* ps_1_* outputs are special and go in temp register 0. */ - if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL) return; - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; - builtin = hlsl_sm1_register_from_semantic(&version, + builtin = sm1_register_from_semantic_name(&version, var->semantic.name, var->semantic.index, output, &type, ®); - if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) + if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -5168,19 +5218,21 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var enum vkd3d_shader_sysval_semantic semantic; bool has_idx; - if (!sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output)) + if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, + ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); return; } - if ((builtin = hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, &has_idx))) + + if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) reg = has_idx ? var->semantic.index : 0; } if (builtin) { - TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[ctx->profile->type], + TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type], output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); } else @@ -5194,17 +5246,18 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } } -static void allocate_semantic_registers(struct hlsl_ctx *ctx) +static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { + bool is_patch_constant_func = entry_func == ctx->patch_constant_func; unsigned int input_counter = 0, output_counter = 0; struct hlsl_ir_var *var; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_input_semantic) - allocate_semantic_register(ctx, var, &input_counter, false); + allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); if (var->is_output_semantic) - allocate_semantic_register(ctx, var, &output_counter, true); + allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); } } @@ -5279,7 +5332,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); - if (var->last_read) + if (var->is_read) buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); } @@ -5510,15 +5563,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum return NULL; } -static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) +static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset) { char regset_name = get_regset_name(regset); uint32_t min_index = 0, id = 0; struct hlsl_ir_var *var; - if (regset == HLSL_REGSET_UAVS) + if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) { - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") || !ascii_strcasecmp(var->semantic.name, "sv_target"))) @@ -6228,22 +6281,104 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) } while (progress); } -static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - struct vsir_program *program, bool output, struct hlsl_ir_var *var) +static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, + struct shader_signature *signature, bool output, struct hlsl_ir_var *var) { enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + enum vkd3d_shader_component_type component_type; + unsigned int register_index, mask, use_mask; + const char *name = var->semantic.name; enum vkd3d_shader_register_type type; - struct shader_signature *signature; struct signature_element *element; - unsigned int register_index, mask; - if ((!output && !var->last_read) || (output && !var->first_write)) - return; + if (hlsl_version_ge(ctx, 4, 0)) + { + struct vkd3d_string_buffer *string; + bool has_idx, ret; - if (output) - signature = &program->output_signature; + ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, + ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, + output, signature == &program->patch_constant_signature); + VKD3D_ASSERT(ret); + if (sysval == ~0u) + return; + + if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) + { + register_index = has_idx ? var->semantic.index : ~0u; + } + else + { + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + register_index = var->regs[HLSL_REGSET_NUMERIC].id; + } + + /* NOTE: remember to change this to the actually allocated mask once + * we start optimizing interstage signatures. */ + mask = (1u << var->data_type->dimx) - 1; + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + + switch (var->data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + break; + + case HLSL_TYPE_INT: + component_type = VKD3D_SHADER_COMPONENT_INT; + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: + component_type = VKD3D_SHADER_COMPONENT_UINT; + break; + + default: + if ((string = hlsl_type_to_string(ctx, var->data_type))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Invalid data type %s for semantic variable %s.", string->buffer, var->name); + hlsl_release_string_buffer(ctx, string); + component_type = VKD3D_SHADER_COMPONENT_VOID; + break; + } + + if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) + name = "SV_Target"; + else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) + name ="SV_Depth"; + else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) + name = "SV_Position"; + } else - signature = &program->input_signature; + { + if ((!output && !var->last_read) || (output && !var->first_write)) + return; + + if (!sm1_register_from_semantic_name(&program->shader_version, + var->semantic.name, var->semantic.index, output, &type, ®ister_index)) + { + enum vkd3d_decl_usage usage; + unsigned int usage_idx; + bool ret; + + register_index = var->regs[HLSL_REGSET_NUMERIC].id; + + ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + /* With the exception of vertex POSITION output, none of these are + * system values. Pixel POSITION input is not equivalent to + * SV_Position; the closer equivalent is VPOS, which is not declared + * as a semantic. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX + && output && usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + } + + mask = (1 << var->data_type->dimx) - 1; + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + } if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, signature->element_count + 1, sizeof(*signature->elements))) @@ -6252,30 +6387,9 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, return; } element = &signature->elements[signature->element_count++]; - - if (!hlsl_sm1_register_from_semantic(&program->shader_version, - var->semantic.name, var->semantic.index, output, &type, ®ister_index)) - { - enum vkd3d_decl_usage usage; - unsigned int usage_idx; - bool ret; - - register_index = var->regs[HLSL_REGSET_NUMERIC].id; - - ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); - VKD3D_ASSERT(ret); - /* With the exception of vertex POSITION output, none of these are - * system values. Pixel POSITION input is not equivalent to - * SV_Position; the closer equivalent is VPOS, which is not declared - * as a semantic. */ - if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX - && output && usage == VKD3D_DECL_USAGE_POSITION) - sysval = VKD3D_SHADER_SV_POSITION; - } - mask = (1 << var->data_type->dimx) - 1; - memset(element, 0, sizeof(*element)); - if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) + + if (!(element->semantic_name = vkd3d_strdup(name))) { --signature->element_count; ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; @@ -6283,26 +6397,35 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, } element->semantic_index = var->semantic.index; element->sysval_semantic = sysval; - element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + element->component_type = component_type; element->register_index = register_index; element->target_location = register_index; element->register_count = 1; element->mask = mask; - element->used_mask = mask; + element->used_mask = use_mask; if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) element->interpolation_mode = VKD3DSIM_LINEAR; } -static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) +static void generate_vsir_signature(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_function_decl *func) { struct hlsl_ir_var *var; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_input_semantic) - sm1_generate_vsir_signature_entry(ctx, program, false, var); - if (var->is_output_semantic) - sm1_generate_vsir_signature_entry(ctx, program, true, var); + if (func == ctx->patch_constant_func) + { + generate_vsir_signature_entry(ctx, program, + &program->patch_constant_signature, var->is_output_semantic, var); + } + else + { + if (var->is_input_semantic) + generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); + if (var->is_output_semantic) + generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); + } } } @@ -6873,7 +6996,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, type = VKD3DSPR_TEMP; register_index = 0; } - else if (!hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, + else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, deref->var->semantic.index, true, &type, ®ister_index)) { VKD3D_ASSERT(reg.allocated); @@ -6928,7 +7051,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, version.major = ctx->profile->major_version; version.minor = ctx->profile->minor_version; version.type = ctx->profile->type; - if (hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name, + if (sm1_register_from_semantic_name(&version, deref->var->semantic.name, deref->var->semantic.index, false, &type, ®ister_index)) { writemask = (1 << deref->var->data_type->dimx) - 1; @@ -7233,9 +7356,6 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo } } -/* OBJECTIVE: Translate all the information from ctx and entry_func to the - * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() - * without relying on ctx and entry_func. */ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) { @@ -7262,7 +7382,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl ctab->code = buffer.data; ctab->size = buffer.size; - sm1_generate_vsir_signature(ctx, program); + generate_vsir_signature(ctx, program, entry_func); hlsl_block_init(&block); sm1_generate_vsir_constant_defs(ctx, program, &block); @@ -7272,6 +7392,29 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl sm1_generate_vsir_block(ctx, &entry_func->body, program); } +/* OBJECTIVE: Translate all the information from ctx and entry_func to the + * vsir_program, so it can be used as input to tpf_compile() without relying + * on ctx and entry_func. */ +static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint64_t config_flags, struct vsir_program *program) +{ + struct vkd3d_shader_version version = {0}; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + + if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + generate_vsir_signature(ctx, program, entry_func); + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_signature(ctx, program, ctx->patch_constant_func); +} + static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, struct hlsl_block **found_block) { @@ -7572,16 +7715,23 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return true; } -int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) +static void process_entry_function(struct hlsl_ctx *ctx, + const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) { const struct hlsl_profile_info *profile = ctx->profile; + struct hlsl_block static_initializers, global_uniforms; struct hlsl_block *const body = &entry_func->body; struct recursive_call_ctx recursive_call_ctx; struct hlsl_ir_var *var; unsigned int i; - list_move_head(&body->instrs, &ctx->static_initializers.instrs); + if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers)) + return; + list_move_head(&body->instrs, &static_initializers.instrs); + + if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block)) + return; + list_move_head(&body->instrs, &global_uniforms.instrs); memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); @@ -7590,7 +7740,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry /* Avoid going into an infinite loop when processing call instructions. * lower_return() recurses into inferior calls. */ if (ctx->result) - return ctx->result; + return; if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) lower_ir(ctx, lower_f16tof32, body); @@ -7602,20 +7752,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_matrix_swizzles, body); lower_ir(ctx, lower_index_loads, body); - LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) - { - if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) - prepend_uniform_copy(ctx, body, var); - } - for (i = 0; i < entry_func->parameters.count; ++i) { var = entry_func->parameters.vars[i]; - if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type)) { prepend_uniform_copy(ctx, body, var); } + else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { + if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && entry_func == ctx->patch_constant_func) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Patch constant function parameter \"%s\" cannot be uniform.", var->name); + else + prepend_uniform_copy(ctx, body, var); + } else { if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT @@ -7627,9 +7779,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } if (var->storage_modifiers & HLSL_STORAGE_IN) - prepend_input_var_copy(ctx, body, var); + prepend_input_var_copy(ctx, entry_func, var); if (var->storage_modifiers & HLSL_STORAGE_OUT) - append_output_var_copy(ctx, body, var); + append_output_var_copy(ctx, entry_func, var); } } if (entry_func->return_var) @@ -7638,19 +7790,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); - append_output_var_copy(ctx, body, entry_func->return_var); + append_output_var_copy(ctx, entry_func, entry_func->return_var); } - parse_entry_function_attributes(ctx, entry_func); - if (ctx->result) - return ctx->result; - - if (profile->type == VKD3D_SHADER_TYPE_HULL) - validate_hull_shader_attributes(ctx, entry_func); - else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); - if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); @@ -7726,29 +7868,69 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry while (hlsl_transform_ir(ctx, dce, body, NULL)); compute_liveness(ctx, entry_func); + mark_vars_usage(ctx); - if (TRACE_ON()) - rb_for_each_entry(&ctx->functions, dump_function, ctx); + calculate_resource_register_counts(ctx); - transform_derefs(ctx, mark_indexable_vars, body); + allocate_register_reservations(ctx, &ctx->extern_vars); + allocate_register_reservations(ctx, &entry_func->extern_vars); + allocate_semantic_registers(ctx, entry_func); +} - calculate_resource_register_counts(ctx); +int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) +{ + const struct hlsl_profile_info *profile = ctx->profile; + struct hlsl_block global_uniform_block; + struct hlsl_ir_var *var; + + parse_entry_function_attributes(ctx, entry_func); + if (ctx->result) + return ctx->result; + + if (profile->type == VKD3D_SHADER_TYPE_HULL) + validate_hull_shader_attributes(ctx, entry_func); + else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); + + hlsl_block_init(&global_uniform_block); + + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) + prepend_uniform_copy(ctx, &global_uniform_block, var); + } - allocate_register_reservations(ctx); + process_entry_function(ctx, &global_uniform_block, entry_func); + if (ctx->result) + return ctx->result; + + if (profile->type == VKD3D_SHADER_TYPE_HULL) + { + process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func); + if (ctx->result) + return ctx->result; + } + + hlsl_block_cleanup(&global_uniform_block); - allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { + mark_indexable_vars(ctx, entry_func); + allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); } else { allocate_buffers(ctx); - allocate_objects(ctx, HLSL_REGSET_TEXTURES); - allocate_objects(ctx, HLSL_REGSET_UAVS); + allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); + allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); } - allocate_semantic_registers(ctx); - allocate_objects(ctx, HLSL_REGSET_SAMPLERS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); + + if (TRACE_ON()) + rb_for_each_entry(&ctx->functions, dump_function, ctx); if (ctx->result) return ctx->result; @@ -7777,7 +7959,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } case VKD3D_SHADER_TARGET_DXBC_TPF: - return hlsl_sm4_write(ctx, entry_func, out); + { + uint32_t config_flags = vkd3d_shader_init_config_flags(); + struct vsir_program program; + int result; + + sm4_generate_vsir(ctx, entry_func, config_flags, &program); + if (ctx->result) + { + vsir_program_cleanup(&program); + return ctx->result; + } + + result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); + vsir_program_cleanup(&program); + return result; + } default: ERR("Unsupported shader target type %#x.\n", target_type); diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 0bcc3d0a1f7..c475bf92279 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -6786,6 +6786,12 @@ sysval_validation_data[] = VKD3D_SHADER_COMPONENT_FLOAT, 4}, [VKD3D_SHADER_SV_CULL_DISTANCE] = {PS_BIT | GS_BIT | HS_BIT | DS_BIT, PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, 0, VKD3D_SHADER_COMPONENT_FLOAT, 4}, + [VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_QUADINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_TRIINT] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_LINEDET] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, + [VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN] = {0, 0, HS_BIT | DS_BIT, VKD3D_SHADER_COMPONENT_FLOAT, 1}, }; static void vsir_validate_signature_element(struct validation_context *ctx, @@ -6942,11 +6948,23 @@ static void vsir_validate_signature_element(struct validation_context *ctx, idx, signature_type_name, element->interpolation_mode); } +static const unsigned int allowed_signature_phases[] = +{ + [SIGNATURE_TYPE_INPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, + [SIGNATURE_TYPE_OUTPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, + [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, +}; + static void vsir_validate_signature(struct validation_context *ctx, const struct shader_signature *signature, enum vsir_signature_type signature_type) { unsigned int i; + if (signature->element_count != 0 && !(allowed_signature_phases[signature_type] + & (1u << ctx->program->shader_version.type))) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Unexpected %s signature.", signature_type_names[signature_type]); + for (i = 0; i < signature->element_count; ++i) vsir_validate_signature_element(ctx, signature, signature_type, i); } @@ -7501,6 +7519,38 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c "Patch constant signature is only valid for hull and domain shaders."); } + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: + case VKD3D_SHADER_TYPE_GEOMETRY: + if (program->input_control_point_count == 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid zero input control point count."); + break; + + default: + if (program->input_control_point_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid input control point count %u.", + program->input_control_point_count); + } + + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: + if (program->output_control_point_count == 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid zero output control point count."); + break; + + default: + if (program->output_control_point_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Invalid output control point count %u.", + program->output_control_point_count); + } + vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index f79e97e92d4..54344f2afc0 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -1403,12 +1403,16 @@ struct sm4_stat uint32_t fields[VKD3D_STAT_COUNT]; }; -struct tpf_writer +struct tpf_compiler { + /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ struct hlsl_ctx *ctx; - struct vkd3d_bytecode_buffer *buffer; + struct vsir_program *program; struct vkd3d_sm4_lookup_tables lookup; struct sm4_stat *stat; + + struct vkd3d_bytecode_buffer *buffer; + struct dxbc_writer dxbc; }; static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) @@ -1916,15 +1920,6 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) } } -static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct sm4_stat *stat, - struct vkd3d_bytecode_buffer *buffer) -{ - tpf->ctx = ctx; - tpf->buffer = buffer; - tpf->stat = stat; - init_sm4_lookup_tables(&tpf->lookup); -} - static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) { @@ -2986,7 +2981,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_OK; } -static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); +static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); static bool type_is_integer(const struct hlsl_type *type) { @@ -3002,8 +2997,8 @@ static bool type_is_integer(const struct hlsl_type *type) } } -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_shader_register_type *type, bool *has_idx) +bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) { unsigned int i; @@ -3023,6 +3018,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, + {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_PRIMID, false}, + /* Put sv_target in this table, instead of letting it fall through to * default varying allocation, so that the register index matches the * usage index. */ @@ -3035,9 +3033,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem for (i = 0; i < ARRAY_SIZE(register_table); ++i) { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type) + && version->type == register_table[i].shader_type) { if (type) *type = register_table[i].type; @@ -3049,8 +3047,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem return false; } -bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, - struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output) +static bool get_tessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, + enum vkd3d_tessellator_domain domain, uint32_t index) +{ + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + if (index == 0) + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN; + else if (index == 1) + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; + else + return false; + return true; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE; + return index < 3; + + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE; + return index < 4; + + default: + vkd3d_unreachable(); + } +} + +static bool get_insidetessfactor_sysval_semantic(enum vkd3d_shader_sysval_semantic *semantic, + enum vkd3d_tessellator_domain domain, uint32_t index) +{ + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: + return false; + + case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_TRIINT; + return index == 0; + + case VKD3D_TESSELLATOR_DOMAIN_QUAD: + *semantic = VKD3D_SHADER_SV_TESS_FACTOR_QUADINT; + return index < 2; + + default: + vkd3d_unreachable(); + } +} + +bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func) { unsigned int i; @@ -3075,6 +3122,12 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, + {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + {"sv_position", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_HULL, ~0u}, + + {"sv_position", true, VKD3D_SHADER_TYPE_HULL, VKD3D_SHADER_SV_POSITION}, + {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, @@ -3098,16 +3151,42 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, }; - bool needs_compat_mapping = ascii_strncasecmp(hlsl_semantic->name, "sv_", 3); + bool needs_compat_mapping = ascii_strncasecmp(semantic_name, "sv_", 3); + + if (is_patch_constant_func) + { + if (output) + { + if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) + return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); + if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) + return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); + if (!ascii_strcasecmp(semantic_name, "sv_position")) + { + *sysval_semantic = VKD3D_SHADER_SV_NONE; + return true; + } + } + else + { + if (!ascii_strcasecmp(semantic_name, "sv_primitiveid") + || !ascii_strcasecmp(semantic_name, "sv_position")) + { + *sysval_semantic = ~0u; + return true; + } + return false; + } + } for (i = 0; i < ARRAY_SIZE(semantics); ++i) { - if (!ascii_strcasecmp(hlsl_semantic->name, semantics[i].name) + if (!ascii_strcasecmp(semantic_name, semantics[i].name) && output == semantics[i].output - && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) - && ctx->profile->type == semantics[i].shader_type) + && (semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) + && version->type == semantics[i].shader_type) { - *semantic = semantics[i].semantic; + *sysval_semantic = semantics[i].semantic; return true; } } @@ -3115,7 +3194,7 @@ bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, if (!needs_compat_mapping) return false; - *semantic = VKD3D_SHADER_SV_NONE; + *sysval_semantic = VKD3D_SHADER_SV_NONE; return true; } @@ -3133,110 +3212,46 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, ctx->result = buffer->status; } -static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) +static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) { + bool output = tag == TAG_OSGN || tag == TAG_PCSG; struct vkd3d_bytecode_buffer buffer = {0}; - struct vkd3d_string_buffer *string; - const struct hlsl_ir_var *var; - size_t count_position; unsigned int i; - bool ret; - count_position = put_u32(&buffer, 0); + put_u32(&buffer, signature->element_count); put_u32(&buffer, 8); /* unknown */ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (i = 0; i < signature->element_count; ++i) { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - enum vkd3d_shader_sysval_semantic semantic; - uint32_t usage_idx, reg_idx; - bool has_idx; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - ret = sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); - VKD3D_ASSERT(ret); - if (semantic == ~0u) - continue; - usage_idx = var->semantic.index; + const struct signature_element *element = &signature->elements[i]; + enum vkd3d_shader_sysval_semantic sysval; + uint32_t used_mask = element->used_mask; - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; - } - else - { - VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - - use_mask = width; /* FIXME: accurately report use mask */ if (output) - use_mask = 0xf ^ use_mask; + used_mask = 0xf ^ used_mask; - /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ - if (semantic >= VKD3D_SHADER_SV_TARGET) - semantic = VKD3D_SHADER_SV_NONE; + sysval = element->sysval_semantic; + if (sysval >= VKD3D_SHADER_SV_TARGET) + sysval = VKD3D_SHADER_SV_NONE; put_u32(&buffer, 0); /* name */ - put_u32(&buffer, usage_idx); - put_u32(&buffer, semantic); - switch (var->data_type->e.numeric.type) - { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - put_u32(&buffer, VKD3D_SHADER_COMPONENT_FLOAT); - break; - - case HLSL_TYPE_INT: - put_u32(&buffer, VKD3D_SHADER_COMPONENT_INT); - break; - - case HLSL_TYPE_BOOL: - case HLSL_TYPE_UINT: - put_u32(&buffer, VKD3D_SHADER_COMPONENT_UINT); - break; - - default: - if ((string = hlsl_type_to_string(ctx, var->data_type))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid data type %s for semantic variable %s.", string->buffer, var->name); - hlsl_release_string_buffer(ctx, string); - put_u32(&buffer, VKD3D_SHADER_COMPONENT_VOID); - } - put_u32(&buffer, reg_idx); - put_u32(&buffer, vkd3d_make_u16(width, use_mask)); + put_u32(&buffer, element->semantic_index); + put_u32(&buffer, sysval); + put_u32(&buffer, element->component_type); + put_u32(&buffer, element->register_index); + put_u32(&buffer, vkd3d_make_u16(element->mask, used_mask)); } - i = 0; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (i = 0; i < signature->element_count; ++i) { - enum vkd3d_shader_sysval_semantic semantic; - const char *name = var->semantic.name; + const struct signature_element *element = &signature->elements[i]; size_t string_offset; - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - - sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); - if (semantic == ~0u) - continue; - - if (semantic == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) - string_offset = put_string(&buffer, "SV_Target"); - else if (semantic == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) - string_offset = put_string(&buffer, "SV_Depth"); - else if (semantic == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) - string_offset = put_string(&buffer, "SV_Position"); - else - string_offset = put_string(&buffer, name); - set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); + string_offset = put_string(&buffer, element->semantic_name); + set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); } - set_u32(&buffer, count_position, i); - - add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); + add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); } static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) @@ -3863,7 +3878,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { uint32_t flags = 0; - if (var->last_read) + if (var->is_read) flags |= D3D_SVF_USED; put_u32(&buffer, 0); /* name */ @@ -4091,11 +4106,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); } -static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, +static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) { - const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); + const struct vkd3d_shader_version *version = &tpf->program->shader_version; + const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); const struct hlsl_ir_var *var = deref->var; + struct hlsl_ctx *ctx = tpf->ctx; if (var->is_uniform) { @@ -4105,7 +4122,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_RESOURCE; reg->dimension = VSIR_DIMENSION_VEC4; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ @@ -4124,7 +4141,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_UAV; reg->dimension = VSIR_DIMENSION_VEC4; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ @@ -4143,7 +4160,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_SAMPLER; reg->dimension = VSIR_DIMENSION_NONE; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ @@ -4165,7 +4182,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); reg->type = VKD3DSPR_CONSTBUFFER; reg->dimension = VSIR_DIMENSION_VEC4; - if (hlsl_version_ge(ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { reg->idx[0].offset = var->buffer->reg.id; reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ @@ -4185,7 +4202,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { bool has_idx; - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, &has_idx)) + if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) { unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); @@ -4195,7 +4212,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re reg->idx_count = 1; } - reg->dimension = VSIR_DIMENSION_VEC4; + if (shader_sm4_is_scalar_register(reg)) + reg->dimension = VSIR_DIMENSION_SCALAR; + else + reg->dimension = VSIR_DIMENSION_VEC4; *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); } else @@ -4214,7 +4234,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { bool has_idx; - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, &has_idx)) + if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) { unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); @@ -4224,7 +4244,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re reg->idx_count = 1; } - if (reg->type == VKD3DSPR_DEPTHOUT) + if (shader_sm4_is_scalar_register(reg)) reg->dimension = VSIR_DIMENSION_SCALAR; else reg->dimension = VSIR_DIMENSION_VEC4; @@ -4250,13 +4270,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re } } -static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, +static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) { unsigned int hlsl_swizzle; uint32_t writemask; - sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); + sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) { hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); @@ -4294,7 +4314,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, } } -static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, +static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, const struct hlsl_ir_node *instr, uint32_t map_writemask) { unsigned int hlsl_swizzle; @@ -4330,7 +4350,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ return 0; } -static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, +static uint32_t sm4_encode_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { const struct vkd3d_sm4_register_type_info *register_type_info; @@ -4390,7 +4410,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v return token; } -static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, +static void sm4_write_register_index(const struct tpf_compiler *tpf, const struct vkd3d_shader_register *reg, unsigned int j) { unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); @@ -4420,7 +4440,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct } } -static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) +static void sm4_write_dst_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_dst_param *dst) { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = 0; @@ -4433,7 +4453,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk sm4_write_register_index(tpf, &dst->reg, j); } -static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) +static void sm4_write_src_register(const struct tpf_compiler *tpf, const struct vkd3d_shader_src_param *src) { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = 0, mod_token = 0; @@ -4494,9 +4514,9 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk } } -static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct sm4_instruction *instr) +static void sm4_update_stat_counters(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) { - enum vkd3d_shader_type shader_type = tpf->ctx->profile->type; + enum vkd3d_shader_type shader_type = tpf->program->shader_version.type; enum vkd3d_sm4_stat_field stat_field; uint32_t opcode; @@ -4539,7 +4559,7 @@ static void sm4_update_stat_counters(const struct tpf_writer *tpf, const struct } } -static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) +static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct sm4_instruction *instr) { uint32_t token = instr->opcode | instr->extra_bits; struct vkd3d_bytecode_buffer *buffer = tpf->buffer; @@ -4606,7 +4626,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, return true; } -static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) +static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) { size_t size = (cbuffer->used_size + 3) / 4; @@ -4641,7 +4661,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) +static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) { unsigned int i; struct sm4_instruction instr = @@ -4682,9 +4702,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex } } -static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, +static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, bool uav) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; struct hlsl_type *component_type; struct sm4_instruction instr; @@ -4714,14 +4735,14 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; - if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) + if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) { hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Multisampled texture object declaration needs sample count for profile %s.", - tpf->ctx->profile->name); + "Multisampled texture object declaration needs sample count for profile %u.%u.", + version->major, version->minor); } - if (hlsl_version_ge(tpf->ctx, 5, 1)) + if (vkd3d_shader_ver_ge(version, 5, 1)) { VKD3D_ASSERT(!i); instr.dsts[0].reg.idx[0].offset = resource->id; @@ -4767,9 +4788,10 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex } } -static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) +static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, + const struct hlsl_ir_var *var, bool is_patch_constant_func) { - const struct hlsl_profile_info *profile = tpf->ctx->profile; + const struct vkd3d_shader_version *version = &tpf->program->shader_version; const bool output = var->is_output_semantic; enum vkd3d_shader_sysval_semantic semantic; bool has_idx; @@ -4780,7 +4802,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl .dst_count = 1, }; - if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx)) + if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) { if (has_idx) { @@ -4801,10 +4823,11 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; } - if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) + if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; - sysval_semantic_from_hlsl(&semantic, tpf->ctx, &var->semantic, output); + sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, + tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); if (semantic == ~0u) semantic = VKD3D_SHADER_SV_NONE; @@ -4813,25 +4836,26 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl switch (semantic) { case VKD3D_SHADER_SV_NONE: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; break; case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: case VKD3D_SHADER_SV_PRIMITIVE_ID: - case VKD3D_SHADER_SV_VERTEX_ID: case VKD3D_SHADER_SV_SAMPLE_INDEX: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + case VKD3D_SHADER_SV_VERTEX_ID: + instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; break; default: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; break; } - if (profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (version->type == VKD3D_SHADER_TYPE_PIXEL) { enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; @@ -4870,32 +4894,32 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl } else { - if (semantic == VKD3D_SHADER_SV_NONE || profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; else instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; } - switch (semantic) + if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) { - case VKD3D_SHADER_SV_COVERAGE: - case VKD3D_SHADER_SV_DEPTH: - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: - case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: - case VKD3D_SHADER_SV_TARGET: - case VKD3D_SHADER_SV_NONE: - break; - - default: - instr.idx_count = 1; - instr.idx[0] = semantic; - break; + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET + || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); + } + else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) + { + VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); + } + else + { + VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); + instr.idx_count = 1; + instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); } write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) +static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) { struct sm4_instruction instr = { @@ -4908,7 +4932,7 @@ static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_coun write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, +static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, uint32_t size, uint32_t comp_count) { struct sm4_instruction instr = @@ -4922,7 +4946,7 @@ static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) +static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) { struct sm4_instruction instr = { @@ -4937,7 +4961,7 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t flags) +static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) { struct sm4_instruction instr = { @@ -4948,7 +4972,94 @@ static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t fl write_sm4_instruction(tpf, &instr); } -static void write_sm4_ret(const struct tpf_writer *tpf) +static void tpf_write_hs_decls(const struct tpf_compiler *tpf) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_DECLS, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, + .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_partitioning partitioning) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, + .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_output_primitive output_primitive) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, + .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); +} + +static void write_sm4_ret(const struct tpf_compiler *tpf) { struct sm4_instruction instr = { @@ -4958,7 +5069,7 @@ static void write_sm4_ret(const struct tpf_writer *tpf) write_sm4_instruction(tpf, &instr); } -static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) { struct sm4_instruction instr; @@ -4976,7 +5087,7 @@ static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opco write_sm4_instruction(tpf, &instr); } -static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) { struct sm4_instruction instr; @@ -4997,7 +5108,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp write_sm4_instruction(tpf, &instr); } -static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -5016,7 +5127,7 @@ static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opc } /* dp# instructions don't map the swizzle. */ -static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { struct sm4_instruction instr; @@ -5034,7 +5145,7 @@ static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4 write_sm4_instruction(tpf, &instr); } -static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, +static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) { @@ -5057,7 +5168,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t write_sm4_instruction(tpf, &instr); } -static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, const struct hlsl_ir_node *src3) { @@ -5077,7 +5188,7 @@ static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_op write_sm4_instruction(tpf, &instr); } -static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, +static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) @@ -5086,6 +5197,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); + const struct vkd3d_shader_version *version = &tpf->program->shader_version; unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; struct sm4_instruction instr; @@ -5140,7 +5252,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node reg->dimension = VSIR_DIMENSION_SCALAR; reg->u.immconst_u32[0] = index->value.u[0].u; } - else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) + else if (version->major == 4 && version->minor == 0) { hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); } @@ -5155,7 +5267,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node write_sm4_instruction(tpf, &instr); } -static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; @@ -5235,7 +5347,7 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_deref *resource = &load->resource; const struct hlsl_ir_node *dst = &load->node; @@ -5257,7 +5369,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl write_sm4_instruction(tpf, &instr); } -static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_deref *resource = &load->resource; const struct hlsl_ir_node *dst = &load->node; @@ -5292,7 +5404,7 @@ static bool type_is_float(const struct hlsl_type *type) return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; } -static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, +static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, const struct hlsl_ir_node *arg, uint32_t mask) { struct sm4_instruction instr; @@ -5312,7 +5424,7 @@ static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct write_sm4_instruction(tpf, &instr); } -static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) { static const union { @@ -5421,7 +5533,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex } } -static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, +static void write_sm4_store_uav_typed(const struct tpf_compiler *tpf, const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) { struct sm4_instruction instr; @@ -5429,7 +5541,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); + sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); instr.dst_count = 1; sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); @@ -5439,7 +5551,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct write_sm4_instruction(tpf, &instr); } -static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) +static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) { struct sm4_instruction instr; @@ -5458,8 +5570,9 @@ static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, cons write_sm4_instruction(tpf, &instr); } -static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; const struct hlsl_ir_node *arg1 = expr->operands[0].node; const struct hlsl_ir_node *arg2 = expr->operands[1].node; const struct hlsl_ir_node *arg3 = expr->operands[2].node; @@ -5474,7 +5587,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex switch (expr->op) { case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: - if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) + if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) write_sm4_rasterizer_sample_count(tpf, &expr->node); else hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, @@ -5595,7 +5708,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { case HLSL_TYPE_FLOAT: /* SM5 comes with a RCP opcode */ - if (tpf->ctx->profile->major_version >= 5) + if (vkd3d_shader_ver_ge(version, 5, 0)) { write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); } @@ -5973,7 +6086,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex hlsl_release_string_buffer(tpf->ctx, dst_type_string); } -static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) +static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) { struct sm4_instruction instr = { @@ -6002,7 +6115,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * write_sm4_instruction(tpf, &instr); } -static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) +static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) { struct sm4_instruction instr = {0}; @@ -6041,16 +6154,17 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju /* Does this variable's data come directly from the API user, rather than being * temporary or from a previous shader stage? * I.e. is it a uniform or VS input? */ -static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) { if (var->is_uniform) return true; - return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; + return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; } -static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) +static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; const struct hlsl_type *type = load->node.data_type; struct sm4_instruction instr; @@ -6060,7 +6174,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo instr.dst_count = 1; VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) { struct hlsl_constant_value value; @@ -6088,7 +6202,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo write_sm4_instruction(tpf, &instr); } -static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) +static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) { struct sm4_instruction instr = { @@ -6103,10 +6217,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo write_sm4_instruction(tpf, &instr); } -static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, +static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) { + const struct vkd3d_shader_version *version = &tpf->program->shader_version; struct vkd3d_shader_src_param *src; struct sm4_instruction instr; @@ -6123,7 +6238,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { - if (tpf->ctx->profile->major_version < 5) + if (!vkd3d_shader_ver_ge(version, 5, 0)) { hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); @@ -6144,7 +6259,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) { const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *sample_index = load->sample_index.node; @@ -6213,7 +6328,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h } } -static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) +static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) { struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); @@ -6232,7 +6347,7 @@ static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); } -static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) +static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) { const struct hlsl_ir_node *rhs = store->rhs.node; struct sm4_instruction instr; @@ -6241,7 +6356,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV; - sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); + sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1; @@ -6251,7 +6366,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s write_sm4_instruction(tpf, &instr); } -static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s) +static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) { const struct hlsl_ir_node *selector = s->selector.node; struct hlsl_ir_switch_case *c; @@ -6291,7 +6406,7 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) +static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) { unsigned int hlsl_swizzle; struct sm4_instruction instr; @@ -6312,7 +6427,7 @@ static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir write_sm4_instruction(tpf, &instr); } -static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) +static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) { const struct hlsl_ir_node *instr; @@ -6386,18 +6501,65 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc } } -static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, - struct sm4_stat *stat, struct dxbc_writer *dxbc) +static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) { - const struct hlsl_profile_info *profile = ctx->profile; + struct hlsl_ctx *ctx = tpf->ctx; + const struct hlsl_scope *scope; + const struct hlsl_ir_var *var; + uint32_t temp_count; + + compute_liveness(ctx, func); + mark_indexable_vars(ctx, func); + temp_count = allocate_temp_registers(ctx, func); + if (ctx->result) + return; + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) + || (var->is_output_semantic && var->first_write)) + tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); + } + + if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) + write_sm4_dcl_thread_group(tpf, ctx->thread_count); + + if (temp_count) + write_sm4_dcl_temps(tpf, temp_count); + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + continue; + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + continue; + + if (var->indexable) + { + unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; + unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + + write_sm4_dcl_indexable_temp(tpf, id, size, 4); + } + } + } + + write_sm4_block(tpf, &func->body); + + write_sm4_ret(tpf); +} + +static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) +{ + const struct vkd3d_shader_version *version = &tpf->program->shader_version; struct vkd3d_bytecode_buffer buffer = {0}; struct extern_resource *extern_resources; unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; - const struct hlsl_scope *scope; - const struct hlsl_ir_var *var; + struct hlsl_ctx *ctx = tpf->ctx; size_t token_count_position; - struct tpf_writer tpf; static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -6412,17 +6574,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d VKD3D_SM4_LIB, }; - tpf_writer_init(&tpf, ctx, stat, &buffer); + tpf->buffer = &buffer; extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); + put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); token_count_position = put_u32(&buffer, 0); + if (version->type == VKD3D_SHADER_TYPE_HULL) + { + tpf_write_hs_decls(tpf); + + tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ + tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); + tpf_write_dcl_tessellator_domain(tpf, ctx->domain); + tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); + tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); + } + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - write_sm4_dcl_constant_buffer(&tpf, cbuffer); + write_sm4_dcl_constant_buffer(tpf, cbuffer); } for (i = 0; i < extern_resources_count; ++i) @@ -6430,62 +6603,40 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d const struct extern_resource *resource = &extern_resources[i]; if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&tpf, resource); + write_sm4_dcl_samplers(tpf, resource); else if (resource->regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(&tpf, resource, false); + write_sm4_dcl_textures(tpf, resource, false); else if (resource->regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(&tpf, resource, true); + write_sm4_dcl_textures(tpf, resource, true); } - if (entry_func->early_depth_test && profile->major_version >= 5) - write_sm4_dcl_global_flags(&tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(&tpf, var); - } + if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) + write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&tpf, ctx->thread_count); + if (version->type == VKD3D_SHADER_TYPE_HULL) + tpf_write_hs_control_point_phase(tpf); - if (ctx->temp_count) - write_sm4_dcl_temps(&tpf, ctx->temp_count); + tpf_write_shader_function(tpf, entry_func); - LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + if (version->type == VKD3D_SHADER_TYPE_HULL) { - LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) - { - if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) - continue; - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - continue; - - if (var->indexable) - { - unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; - unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; - - write_sm4_dcl_indexable_temp(&tpf, id, size, 4); - } - } + tpf_write_hs_fork_phase(tpf); + tpf_write_shader_function(tpf, ctx->patch_constant_func); } - write_sm4_block(&tpf, &entry_func->body); - - write_sm4_ret(&tpf); - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - add_section(ctx, dxbc, TAG_SHDR, &buffer); + add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); + tpf->buffer = NULL; sm4_free_extern_resources(extern_resources, extern_resources_count); } -static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +static void tpf_write_sfi0(struct tpf_compiler *tpf) { struct extern_resource *extern_resources; unsigned int extern_resources_count; + struct hlsl_ctx *ctx = tpf->ctx; uint64_t *flags; flags = vkd3d_calloc(1, sizeof(*flags)); @@ -6502,14 +6653,16 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ if (*flags) - dxbc_writer_add_section(dxbc, TAG_SFI0, flags, sizeof(*flags)); + dxbc_writer_add_section(&tpf->dxbc, TAG_SFI0, flags, sizeof(*flags)); else vkd3d_free(flags); } -static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, struct dxbc_writer *dxbc) +static void tpf_write_stat(struct tpf_compiler *tpf) { struct vkd3d_bytecode_buffer buffer = {0}; + const struct sm4_stat *stat = tpf->stat; + struct hlsl_ctx *ctx = tpf->ctx; put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); @@ -6553,28 +6706,40 @@ static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, st put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); } - add_section(ctx, dxbc, TAG_STAT, &buffer); + add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); } -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving + * data from the other parameters instead, so they can be removed from the + * arguments and this function can be independent of HLSL structs. */ +int tpf_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { + struct tpf_compiler tpf = {0}; struct sm4_stat stat = {0}; - struct dxbc_writer dxbc; size_t i; int ret; - dxbc_writer_init(&dxbc); - - write_sm4_signature(ctx, &dxbc, false); - write_sm4_signature(ctx, &dxbc, true); - write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &stat, &dxbc); - write_sm4_sfi0(ctx, &dxbc); - write_sm4_stat(ctx, &stat, &dxbc); + tpf.ctx = ctx; + tpf.program = program; + tpf.buffer = NULL; + tpf.stat = &stat; + init_sm4_lookup_tables(&tpf.lookup); + dxbc_writer_init(&tpf.dxbc); + + tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); + tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); + if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) + tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); + write_sm4_rdef(ctx, &tpf.dxbc); + tpf_write_shdr(&tpf, entry_func); + tpf_write_sfi0(&tpf); + tpf_write_stat(&tpf); if (!(ret = ctx->result)) - ret = dxbc_writer_write(&dxbc, out); - for (i = 0; i < dxbc.section_count; ++i) - vkd3d_shader_free_shader_code(&dxbc.sections[i].data); + ret = dxbc_writer_write(&tpf.dxbc, out); + for (i = 0; i < tpf.dxbc.section_count; ++i) + vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 54b87373ed1..d63bfd96121 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1602,6 +1602,16 @@ void vkd3d_shader_trace_text_(const char *text, size_t size, const char *functio #define vkd3d_shader_trace_text(text, size) \ vkd3d_shader_trace_text_(text, size, __FUNCTION__) +bool sm1_register_from_semantic_name(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); +bool sm1_usage_from_semantic_name(const char *semantic_name, + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); +bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); +bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); + int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, struct vkd3d_shader_message_context *message_context, struct vsir_program *program); int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, @@ -1914,7 +1924,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) -#define DXBC_MAX_SECTION_COUNT 6 +#define DXBC_MAX_SECTION_COUNT 7 struct dxbc_writer { -- 2.45.2