From 05cf5fe0eb241db9e3c9a9ad0495e482ddafe178 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 21 Feb 2025 09:15:01 +1100 Subject: [PATCH] Updated vkd3d to 2feb3a3bbade41b8d7374e0ced625342b35cd50b. --- libs/vkd3d/include/private/vkd3d_common.h | 2 +- libs/vkd3d/include/vkd3d_shader.h | 3 +- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 6 + libs/vkd3d/libs/vkd3d-shader/dxil.c | 68 +++--- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 40 +++- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 8 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 46 +--- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 186 +++++++-------- libs/vkd3d/libs/vkd3d-shader/ir.c | 219 ++++++++++-------- libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + libs/vkd3d/libs/vkd3d-shader/preproc.y | 10 + libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 +- .../libs/vkd3d-shader/vkd3d_shader_main.c | 75 +++--- libs/vkd3d/libs/vkd3d/command.c | 50 +++- libs/vkd3d/libs/vkd3d/device.c | 2 +- libs/vkd3d/libs/vkd3d/resource.c | 14 +- libs/vkd3d/libs/vkd3d/state.c | 116 ++++++++-- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 1 - 19 files changed, 506 insertions(+), 345 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index ec1dd70c9b2..fd62730f948 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -275,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 058166aa2f9..7a5d4eecbe5 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -2991,7 +2991,8 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( * signature. To retrieve signatures from other shader types, or other signature * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. * This function returns the same input signature that is returned in - * struct vkd3d_shader_scan_signature_info. + * struct vkd3d_shader_scan_signature_info for dxbc-tpf shaders, but may return + * different information for dxbc-dxil shaders. * * \param dxbc Compiled byte code, in DXBC format. * diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index f60ef7db769..c2c6ad67804 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -20,6 +20,7 @@ #define WIDL_C_INLINE_WRAPPERS #endif #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 81af62f7810..270d607bc0e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -542,6 +542,8 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, { case TAG_ISGN: case TAG_ISG1: + if (desc->is_dxil) + break; if (desc->input_signature.elements) { FIXME("Multiple input signatures.\n"); @@ -554,6 +556,8 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, case TAG_OSGN: case TAG_OSG5: case TAG_OSG1: + if (desc->is_dxil) + break; if (desc->output_signature.elements) { FIXME("Multiple output signatures.\n"); @@ -565,6 +569,8 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, case TAG_PCSG: case TAG_PSG1: + if (desc->is_dxil) + break; if (desc->patch_constant_signature.elements) { FIXME("Multiple patch constant signatures.\n"); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index a10de68008a..d13c2791fa6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -3911,23 +3911,51 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade } } -static void sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) +static int sm6_parser_init_output_signature(struct sm6_parser *sm6, const struct shader_signature *output_signature) { + if (!(sm6->output_params = vsir_program_get_dst_params(sm6->p.program, output_signature->element_count))) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Failed to allocate output parameters."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + sm6_parser_init_signature(sm6, output_signature, false, VKD3DSPR_OUTPUT, sm6->output_params); + + return VKD3D_OK; } -static void sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) +static int sm6_parser_init_input_signature(struct sm6_parser *sm6, const struct shader_signature *input_signature) { + if (!(sm6->input_params = vsir_program_get_dst_params(sm6->p.program, input_signature->element_count))) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Failed to allocate input parameters."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + sm6_parser_init_signature(sm6, input_signature, true, VKD3DSPR_INPUT, sm6->input_params); + + return VKD3D_OK; } -static void sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, +static int sm6_parser_init_patch_constant_signature(struct sm6_parser *sm6, const struct shader_signature *patch_constant_signature) { bool is_input = sm6->p.program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; + if (!(sm6->patch_constant_params = vsir_program_get_dst_params(sm6->p.program, + patch_constant_signature->element_count))) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Failed to allocate patch constant parameters."); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + sm6_parser_init_signature(sm6, patch_constant_signature, is_input, VKD3DSPR_PATCHCONST, sm6->patch_constant_params); + + return VKD3D_OK; } static const struct sm6_value *sm6_parser_next_function_definition(struct sm6_parser *sm6) @@ -9631,23 +9659,24 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], &program->input_signature, tessellator_domain, true)) < 0) - { return ret; - } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], &program->output_signature, tessellator_domain, false)) < 0) - { return ret; - } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], &program->patch_constant_signature, tessellator_domain, false)) < 0) - { return ret; - } - sm6_parser_init_input_signature(sm6, &program->input_signature); - sm6_parser_init_output_signature(sm6, &program->output_signature); - sm6_parser_init_patch_constant_signature(sm6, &program->patch_constant_signature); + if ((ret = sm6_parser_init_input_signature(sm6, &program->input_signature)) < 0) + return ret; + + if ((ret = sm6_parser_init_output_signature(sm6, &program->output_signature) < 0)) + return ret; + + if ((ret = sm6_parser_init_patch_constant_signature(sm6, &program->patch_constant_signature)) < 0) + return ret; return VKD3D_OK; } @@ -10432,9 +10461,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro input_signature = &program->input_signature; output_signature = &program->output_signature; patch_constant_signature = &program->patch_constant_signature; - *input_signature = dxbc_desc->input_signature; - *output_signature = dxbc_desc->output_signature; - *patch_constant_signature = dxbc_desc->patch_constant_signature; program->features = dxbc_desc->features; memset(dxbc_desc, 0, sizeof(*dxbc_desc)); @@ -10498,18 +10524,6 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro goto fail; } - if (!(sm6->output_params = vsir_program_get_dst_params(program, output_signature->element_count)) - || !(sm6->input_params = vsir_program_get_dst_params(program, input_signature->element_count)) - || !(sm6->patch_constant_params = vsir_program_get_dst_params(program, - patch_constant_signature->element_count))) - { - ERR("Failed to allocate input/output parameters.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, - "Out of memory allocating input/output parameters."); - ret = VKD3D_ERROR_OUT_OF_MEMORY; - goto fail; - } - function_count = dxil_block_compute_function_count(&sm6->root_block); if (!(sm6->functions = vkd3d_calloc(function_count, sizeof(*sm6->functions)))) { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 41586550203..db216b5df30 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -845,13 +845,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl { unsigned int next_index = traverse_path_from_component_index(ctx, &path_type, &path_index); - if (!(c = hlsl_new_uint_constant(ctx, next_index, loc))) - { - hlsl_block_cleanup(block); - return false; - } - hlsl_block_add_instr(block, c); - + c = hlsl_block_add_uint_constant(ctx, block, next_index, loc); hlsl_src_from_node(&deref->path[deref_path_len++], c); } @@ -1324,6 +1318,18 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type) return true; } +static struct hlsl_ir_node *append_new_instr(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *instr) +{ + if (!instr) + { + block->value = ctx->error_instr; + return ctx->error_instr; + } + + hlsl_block_add_instr(block, instr); + return instr; +} + struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, const struct vkd3d_shader_location *loc) { @@ -1584,7 +1590,8 @@ struct hlsl_ir_node *hlsl_new_float_constant(struct hlsl_ctx *ctx, float f, return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_FLOAT), &value, loc); } -struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc) +static struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, + const struct vkd3d_shader_location *loc) { struct hlsl_constant_value value; @@ -1592,6 +1599,12 @@ struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, cons return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), &value, loc); } +struct hlsl_ir_node *hlsl_block_add_int_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, + int32_t n, const struct vkd3d_shader_location *loc) +{ + return append_new_instr(ctx, block, hlsl_new_int_constant(ctx, n, loc)); +} + struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc) { @@ -1601,6 +1614,12 @@ struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); } +struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, + unsigned int n, const struct vkd3d_shader_location *loc) +{ + return append_new_instr(ctx, block, hlsl_new_uint_constant(ctx, n, loc)); +} + struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, const struct vkd3d_shader_location *loc) { @@ -1662,8 +1681,6 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; - VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); - VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -1867,6 +1884,9 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned struct hlsl_ir_swizzle *swizzle; struct hlsl_type *type; + if (val->data_type->class == HLSL_CLASS_ERROR) + return val; + VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR); if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index f614e12036e..49e8b0460fb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -482,6 +482,9 @@ struct hlsl_ir_var union hlsl_constant_value_component number; } *default_values; + /* Pointer to the temp copy of the variable, in case it is uniform. */ + struct hlsl_ir_var *temp_copy; + /* A dynamic array containing the state block on the variable's declaration, if any. * An array variable may contain multiple state blocks. * A technique pass will always contain one. @@ -1502,6 +1505,10 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); void hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl); +struct hlsl_ir_node *hlsl_block_add_int_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, + int32_t n, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, + unsigned int n, const struct vkd3d_shader_location *loc); void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); @@ -1583,7 +1590,6 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); -struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 7afc9274c2e..cc09eecffad 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -925,9 +925,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, st VKD3D_ASSERT(idx < record->data_type->e.record.field_count); - if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) - return false; - hlsl_block_add_instr(block, c); + c = hlsl_block_add_uint_constant(ctx, block, idx, loc); if (!(index = hlsl_new_index(ctx, record, c, loc))) return false; @@ -2275,9 +2273,7 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc if (!(writemask & (1 << i))) continue; - if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) - return false; - hlsl_block_add_instr(block, c); + c = hlsl_block_add_uint_constant(ctx, block, i, &lhs->loc); if (!(cell = hlsl_new_index(ctx, &row->node, c, &lhs->loc))) return false; @@ -2332,9 +2328,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); - if (!(one = hlsl_new_int_constant(ctx, 1, loc))) - return false; - hlsl_block_add_instr(block, one); + one = hlsl_block_add_int_constant(ctx, block, 1, loc); if (!add_assignment(ctx, block, lhs, decrement ? ASSIGN_OP_SUB : ASSIGN_OP_ADD, one, false)) return false; @@ -2869,12 +2863,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var continue; } - if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) - { - free_parse_variable_def(v); - continue; - } - hlsl_block_add_instr(&ctx->static_initializers, zero); + zero = hlsl_block_add_uint_constant(ctx, &ctx->static_initializers, 0, &var->loc); if (!(cast = add_cast(ctx, &ctx->static_initializers, zero, var->data_type, &var->loc))) { @@ -6377,8 +6366,8 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc bool uint_resinfo, has_uint_arg, has_float_arg; struct hlsl_resource_load_params load_params; struct hlsl_ir_node *sample_info, *res_info; - struct hlsl_ir_node *zero = NULL, *void_ret; struct hlsl_type *uint_type, *float_type; + struct hlsl_ir_node *void_ret; unsigned int i, j; enum func_argument { @@ -6478,12 +6467,7 @@ static bool add_getdimensions_method_call(struct hlsl_ctx *ctx, struct hlsl_bloc } if (!args[ARG_MIP_LEVEL]) - { - if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) - return false; - hlsl_block_add_instr(block, zero); - args[ARG_MIP_LEVEL] = zero; - } + args[ARG_MIP_LEVEL] = hlsl_block_add_uint_constant(ctx, block, 0, loc); memset(&load_params, 0, sizeof(load_params)); load_params.type = HLSL_RESOURCE_RESINFO; @@ -9177,9 +9161,7 @@ jump_statement: if (!($$ = make_empty_block(ctx))) YYABORT; - if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) - return false; - hlsl_block_add_instr($$, c); + c = hlsl_block_add_uint_constant(ctx, $$, ~0u, &@1); if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) return false; @@ -9392,21 +9374,15 @@ primary_expr: } | C_INTEGER { - struct hlsl_ir_node *c; - - if (!(c = hlsl_new_int_constant(ctx, $1, &@1))) - YYABORT; - if (!($$ = make_block(ctx, c))) + if (!($$ = make_empty_block(ctx))) YYABORT; + hlsl_block_add_int_constant(ctx, $$, $1, &@1); } | C_UNSIGNED { - struct hlsl_ir_node *c; - - if (!(c = hlsl_new_uint_constant(ctx, $1, &@1))) - YYABORT; - if (!($$ = make_block(ctx, c))) + if (!($$ = make_empty_block(ctx))) YYABORT; + hlsl_block_add_uint_constant(ctx, $$, $1, &@1); } | boolean { diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 2afd3e1e1e5..d5e53c58618 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -62,9 +62,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str size /= 4; } - if (!(c = hlsl_new_uint_constant(ctx, size, loc))) - return NULL; - hlsl_block_add_instr(block, c); + c = hlsl_block_add_uint_constant(ctx, block, size, loc); if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) return NULL; @@ -86,12 +84,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str field_offset /= 4; } - if (!(c = hlsl_new_uint_constant(ctx, field_offset, loc))) - return NULL; - hlsl_block_add_instr(block, c); - - idx_offset = c; - + idx_offset = hlsl_block_add_uint_constant(ctx, block, field_offset, loc); break; } @@ -122,9 +115,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st hlsl_block_init(block); - if (!(offset = hlsl_new_uint_constant(ctx, 0, loc))) - return NULL; - hlsl_block_add_instr(block, offset); + offset = hlsl_block_add_uint_constant(ctx, block, 0, loc); VKD3D_ASSERT(deref->var); type = deref->var->data_type; @@ -203,41 +194,34 @@ static bool clean_constant_deref_offset_srcs(struct hlsl_ctx *ctx, struct hlsl_d } -/* Split uniforms into two variables representing the constant and temp - * registers, and copy the former to the latter, so that writes to uniforms - * work. */ -static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) +/* For a uniform variable, create a temp copy of it so, in case a value is + * stored to the uniform at some point the shader, all derefs can be diverted + * to this temp copy instead. + * Also, promote the uniform to an extern var. */ +static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *uniform) { - struct hlsl_ir_var *uniform; struct hlsl_ir_node *store; struct hlsl_ir_load *load; + struct hlsl_ir_var *temp; char *new_name; - /* Use the synthetic name for the temp, rather than the uniform, so that we - * can write the uniform name into the shader reflection data. */ + uniform->is_uniform = 1; + list_add_tail(&ctx->extern_vars, &uniform->extern_entry); - if (!(uniform = hlsl_new_var(ctx, temp->name, temp->data_type, - &temp->loc, NULL, temp->storage_modifiers, &temp->reg_reservation))) + if (!(new_name = hlsl_sprintf_alloc(ctx, "", uniform->name))) return; - list_add_before(&temp->scope_entry, &uniform->scope_entry); - list_add_tail(&ctx->extern_vars, &uniform->extern_entry); - uniform->is_uniform = 1; - uniform->is_param = temp->is_param; - uniform->buffer = temp->buffer; - if (temp->default_values) + + if (!(temp = hlsl_new_var(ctx, new_name, uniform->data_type, + &uniform->loc, NULL, uniform->storage_modifiers, NULL))) { - /* Transfer default values from the temp to the uniform. */ - VKD3D_ASSERT(!uniform->default_values); - VKD3D_ASSERT(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); - uniform->default_values = temp->default_values; - temp->default_values = NULL; + vkd3d_free(new_name); + return; } + list_add_before(&uniform->scope_entry, &temp->scope_entry); - if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) - return; - temp->name = new_name; + uniform->temp_copy = temp; - if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) + if (!(load = hlsl_new_var_load(ctx, uniform, &uniform->loc))) return; list_add_head(&block->instrs, &load->node.entry); @@ -246,6 +230,25 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, list_add_after(&load->node.entry, &store->entry); } +/* If a uniform is written to at some point in the shader, all dereferences + * must point to the temp copy instead, which is what this pass does. */ +static bool divert_written_uniform_derefs_to_temp(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + struct hlsl_ir_node *instr) +{ + if (!deref->var->is_uniform || !deref->var->first_write) + return false; + + /* Skip derefs from instructions before first write so copies from the + * uniform to the temp are unaffected. */ + if (instr->index < deref->var->first_write) + return false; + + VKD3D_ASSERT(deref->var->temp_copy); + + deref->var = deref->var->temp_copy; + return true; +} + static void validate_field_semantic(struct hlsl_ctx *ctx, struct hlsl_struct_field *field) { if (!field->semantic.name && hlsl_is_numeric_type(hlsl_get_multiarray_element_type(field->type)) @@ -436,9 +439,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec return; hlsl_init_simple_deref_from_var(&patch_deref, input); - if (!(idx = hlsl_new_uint_constant(ctx, patch_index, &var->loc))) - return; - hlsl_block_add_instr(block, idx); + idx = hlsl_block_add_uint_constant(ctx, block, patch_index, &var->loc); if (!(load = hlsl_new_load_index(ctx, &patch_deref, idx, loc))) return; @@ -461,9 +462,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec if (type->class == HLSL_CLASS_MATRIX) { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - hlsl_block_add_instr(block, c); + c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc); if (!(store = hlsl_new_store_index(ctx, &lhs->src, c, cast, 0, &var->loc))) return; @@ -526,9 +525,7 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func force_align = (i == 0); } - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - hlsl_block_add_instr(block, c); + c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc); /* This redundant load is expected to be deleted later by DCE. */ if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc))) @@ -603,9 +600,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec if (type->class == HLSL_CLASS_MATRIX) { - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - hlsl_block_add_instr(&func->body, c); + c = hlsl_block_add_uint_constant(ctx, &func->body, i, &var->loc); if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) return; @@ -666,9 +661,7 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, force_align = (i == 0); } - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) - return; - hlsl_block_add_instr(&func->body, c); + c = hlsl_block_add_uint_constant(ctx, &func->body, i, &var->loc); if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) return; @@ -705,6 +698,9 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, *next; bool progress = false; + if (ctx->result) + return false; + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) { if (instr->type == HLSL_IR_IF) @@ -1112,9 +1108,7 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h return NULL; hlsl_block_add_instr(block, store); - if (!(zero = hlsl_new_uint_constant(ctx, 0, loc))) - return NULL; - hlsl_block_add_instr(block, zero); + zero = hlsl_block_add_uint_constant(ctx, block, 0, loc); if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, zero, 1u << dim_count, loc))) return NULL; @@ -1326,9 +1320,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, { struct hlsl_ir_node *c; - if (!(c = hlsl_new_uint_constant(ctx, i, &instr->loc))) - return false; - hlsl_block_add_instr(block, c); + c = hlsl_block_add_uint_constant(ctx, block, i, &instr->loc); if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc))) return false; @@ -1398,7 +1390,7 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, s /* Allocate a unique, ordered index to each instruction, which will be used for * copy propagation and computing liveness ranges. - * Index 0 means unused; index 1 means function entry, so start at 2. */ + * Index 0 means unused, so start at 1. */ static unsigned int index_instructions(struct hlsl_block *block, unsigned int index) { struct hlsl_ir_node *instr; @@ -2210,7 +2202,10 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc struct copy_propagation_state state; bool progress; - index_instructions(block, 2); + if (ctx->result) + return false; + + index_instructions(block, 1); copy_propagation_state_init(&state, ctx); @@ -2959,9 +2954,7 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n struct hlsl_ir_load *var_load, *specific_load; struct hlsl_deref deref_copy = {0}; - if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc))) - return false; - hlsl_block_add_instr(block, const_i); + const_i = hlsl_block_add_uint_constant(ctx, block, i, &cut_index->loc); operands[0] = cut_index; operands[1] = const_i; @@ -4634,6 +4627,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) struct hlsl_ir_store *store = hlsl_ir_store(instr); struct hlsl_ir_var *var = store->lhs.var; + if (var->is_output_semantic) + break; + if (var->last_read < instr->index) { list_remove(&instr->entry); @@ -4938,20 +4934,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop } } -static void init_var_liveness(struct hlsl_ir_var *var) -{ - if (var->is_uniform || var->is_input_semantic) - var->first_write = 1; - else if (var->is_output_semantic) - var->last_read = UINT_MAX; -} - static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct hlsl_scope *scope; struct hlsl_ir_var *var; - index_instructions(&entry_func->body, 2); + if (ctx->result) + return; + + index_instructions(&entry_func->body, 1); LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) { @@ -4959,12 +4950,6 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl var->first_write = var->last_read = 0; } - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - init_var_liveness(var); - - LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) - init_var_liveness(var); - compute_liveness_recurse(&entry_func->body, 0, 0); } @@ -5001,7 +4986,7 @@ struct register_allocator /* Indexable temps are allocated separately and always keep their index regardless of their * lifetime. */ - size_t indexable_count; + uint32_t indexable_count; /* Total number of registers allocated so far. Used to declare sm4 temp count. */ uint32_t reg_count; @@ -5773,7 +5758,7 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun if (var->is_output_semantic) { record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, - var->first_write, var->last_read, 0, false); + var->first_write, UINT_MAX, 0, false); break; } } @@ -5782,6 +5767,13 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); vkd3d_free(allocator.allocations); + if (allocator.indexable_count) + TRACE("Declaration of function \"%s\" required %u temp registers, and %u indexable temps.\n", + entry_func->func->name, allocator.reg_count, allocator.indexable_count); + else + TRACE("Declaration of function \"%s\" required %u temp registers.\n", + entry_func->func->name, allocator.reg_count); + return allocator.reg_count; } @@ -7016,6 +7008,24 @@ void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) lower_ir(ctx, lower_index_loads, body); } +static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +{ + bool progress; + + hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + } while (progress); +} + void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) { bool progress; @@ -7040,19 +7050,8 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) lower_ir(ctx, lower_int_abs, body); lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_float_modulus, body); - hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); - } while (progress); + hlsl_run_folding_passes(ctx, body); } static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, @@ -12513,6 +12512,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); + compute_liveness(ctx, entry_func); + transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); + if (hlsl_version_lt(ctx, 4, 0)) hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); @@ -12566,6 +12568,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, validate_nonconstant_vector_store_derefs, body); + hlsl_run_folding_passes(ctx, body); + do compute_liveness(ctx, entry_func); while (hlsl_transform_ir(ctx, dce, body, NULL)); diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index b608fae21ac..d145617ec36 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -1732,8 +1732,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i return VKD3D_OK; } +struct io_normaliser_register_data +{ + struct + { + uint8_t register_count; + uint32_t mask; + uint32_t used_mask; + } component[VKD3D_VEC4_SIZE]; +}; + + struct io_normaliser { + struct vkd3d_shader_message_context *message_context; struct vkd3d_shader_instruction_array instructions; enum vkd3d_shader_type shader_type; uint8_t major; @@ -1751,9 +1763,9 @@ struct io_normaliser struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT]; struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT]; struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT]; - uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; - uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; - uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE]; + struct io_normaliser_register_data input_range_map[MAX_REG_OUTPUT]; + struct io_normaliser_register_data output_range_map[MAX_REG_OUTPUT]; + struct io_normaliser_register_data pc_range_map[MAX_REG_OUTPUT]; bool use_vocp; }; @@ -1794,36 +1806,44 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade return NULL; } -static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], +static unsigned int range_map_get_register_count(struct io_normaliser_register_data range_map[], unsigned int register_idx, uint32_t write_mask) { - return range_map[register_idx][vsir_write_mask_get_component_idx(write_mask)]; + return range_map[register_idx].component[vsir_write_mask_get_component_idx(write_mask)].register_count; } -static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx, - unsigned int register_count, uint32_t write_mask, bool is_dcl_indexrange) +static enum vkd3d_result range_map_set_register_range(struct io_normaliser *normaliser, + struct io_normaliser_register_data range_map[], unsigned int register_idx, + unsigned int register_count, uint32_t mask, uint32_t used_mask, bool is_dcl_indexrange) { unsigned int i, j, r, c, component_idx, component_count; - VKD3D_ASSERT(write_mask <= VKD3DSP_WRITEMASK_ALL); - component_idx = vsir_write_mask_get_component_idx(write_mask); - component_count = vsir_write_mask_component_count(write_mask); + VKD3D_ASSERT(mask <= VKD3DSP_WRITEMASK_ALL); + component_idx = vsir_write_mask_get_component_idx(mask); + component_count = vsir_write_mask_component_count(mask); VKD3D_ASSERT(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); - if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) + if (range_map[register_idx].component[component_idx].register_count > register_count && is_dcl_indexrange) { - /* Validated in the TPF reader. */ - VKD3D_ASSERT(range_map[register_idx][component_idx] != UINT8_MAX); - return; + if (range_map[register_idx].component[component_idx].register_count == UINT8_MAX) + { + WARN("Conflicting index ranges.\n"); + vkd3d_shader_error(normaliser->message_context, NULL, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "Conflicting index ranges."); + return VKD3D_ERROR_INVALID_SHADER; + } + return VKD3D_OK; } - if (range_map[register_idx][component_idx] == register_count) + if (range_map[register_idx].component[component_idx].register_count == register_count) { /* Already done. This happens when fxc splits a register declaration by * component(s). The dcl_indexrange instructions are split too. */ - return; + return VKD3D_OK; } - range_map[register_idx][component_idx] = register_count; + range_map[register_idx].component[component_idx].register_count = register_count; + range_map[register_idx].component[component_idx].mask = mask; + range_map[register_idx].component[component_idx].used_mask = used_mask; for (i = 0; i < register_count; ++i) { @@ -1834,21 +1854,31 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u /* A synthetic patch constant range which overlaps an existing range can start upstream of it * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. * The latter is validated in the TPF reader. */ - VKD3D_ASSERT(!range_map[r][c] || !is_dcl_indexrange); - range_map[r][c] = UINT8_MAX; + if (range_map[r].component[c].register_count && is_dcl_indexrange) + { + WARN("Conflicting index ranges.\n"); + vkd3d_shader_error(normaliser->message_context, NULL, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, "Conflicting index ranges."); + return VKD3D_ERROR_INVALID_SHADER; + } + range_map[r].component[c].register_count = UINT8_MAX; + range_map[r].component[c].mask = mask; + range_map[r].component[c].used_mask = used_mask; } } + + return VKD3D_OK; } -static void io_normaliser_add_index_range(struct io_normaliser *normaliser, +static enum vkd3d_result io_normaliser_add_index_range(struct io_normaliser *normaliser, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; const struct vkd3d_shader_register *reg = &range->dst.reg; + struct io_normaliser_register_data *range_map; const struct shader_signature *signature; - uint8_t (*range_map)[VKD3D_VEC4_SIZE]; - struct signature_element *element; - unsigned int reg_idx, write_mask; + uint32_t mask, used_mask; + unsigned int reg_idx, i; switch (reg->type) { @@ -1879,9 +1909,21 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, } reg_idx = reg->idx[reg->idx_count - 1].offset; - write_mask = range->dst.write_mask; - element = vsir_signature_find_element_for_reg(signature, reg_idx, write_mask); - range_map_set_register_range(range_map, reg_idx, range->register_count, element->mask, true); + mask = range->dst.write_mask; + used_mask = 0; + + for (i = 0; i < range->register_count; ++i) + { + struct signature_element *element; + + if ((element = vsir_signature_find_element_for_reg(signature, reg_idx + i, mask))) + { + mask |= element->mask; + used_mask |= element->used_mask; + } + } + + return range_map_set_register_range(normaliser, range_map, reg_idx, range->register_count, mask, used_mask, true); } static int signature_element_mask_compare(const void *a, const void *b) @@ -1908,11 +1950,12 @@ static bool sysval_semantics_should_merge(const struct signature_element *e, con } /* Merge tess factor sysvals because they are an array in SPIR-V. */ -static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s, - uint8_t range_map[][VKD3D_VEC4_SIZE]) +static enum vkd3d_result shader_signature_map_patch_constant_index_ranges(struct io_normaliser *normaliser, + struct shader_signature *s, struct io_normaliser_register_data range_map[]) { - struct signature_element *e, *f; unsigned int i, j, register_count; + struct signature_element *e, *f; + enum vkd3d_result ret; qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare); @@ -1933,8 +1976,12 @@ static void shader_signature_map_patch_constant_index_ranges(struct shader_signa if (register_count < 2) continue; - range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false); + if ((ret = range_map_set_register_range(normaliser, range_map, + e->register_index, register_count, e->mask, e->used_mask, false) < 0)) + return ret; } + + return VKD3D_OK; } static int signature_element_register_compare(const void *a, const void *b) @@ -1977,62 +2024,19 @@ static int signature_element_index_compare(const void *a, const void *b) return vkd3d_u32_compare(e->sort_index, f->sort_index); } -static unsigned int signature_element_range_expand_mask(struct signature_element *e, unsigned int register_count, - uint8_t range_map[][VKD3D_VEC4_SIZE]) -{ - unsigned int i, j, component_idx, component_count, merged_write_mask = e->mask; - - /* dcl_indexrange instructions can declare a subset of the full mask, and the masks of - * the elements within the range may differ. TPF's handling of arrayed inputs with - * dcl_indexrange is really just a hack. Here we create a mask which covers all element - * masks, and check for collisions with other ranges. */ - - for (i = 1; i < register_count; ++i) - merged_write_mask |= e[i].mask; - - if (merged_write_mask == e->mask) - return merged_write_mask; - - /* Reaching this point is very rare to begin with, and collisions are even rarer or - * impossible. If the latter shows up, the fallback in shader_signature_find_element_for_reg() - * may be sufficient. */ - - component_idx = vsir_write_mask_get_component_idx(e->mask); - component_count = vsir_write_mask_component_count(e->mask); - - for (i = e->register_index; i < e->register_index + register_count; ++i) - { - for (j = 0; j < component_idx; ++j) - if (range_map[i][j]) - break; - for (j = component_idx + component_count; j < VKD3D_VEC4_SIZE; ++j) - if (range_map[i][j]) - break; - } - - if (i == register_count) - { - WARN("Expanding mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask, - e->semantic_name, e->register_index, register_count); - return merged_write_mask; - } - - WARN("Cannot expand mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask, - e->semantic_name, e->register_index, register_count); - return e->mask; -} - -static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE], +static enum vkd3d_result shader_signature_merge(struct io_normaliser *normaliser, + struct shader_signature *s, struct io_normaliser_register_data range_map[], bool is_patch_constant) { unsigned int i, j, element_count, new_count, register_count; struct signature_element *elements; + enum vkd3d_result ret = VKD3D_OK; struct signature_element *e, *f; bool used; element_count = s->element_count; if (!(elements = vkd3d_malloc(element_count * sizeof(*elements)))) - return false; + return VKD3D_ERROR_OUT_OF_MEMORY; if (element_count) memcpy(elements, s->elements, element_count * sizeof(*elements)); @@ -2091,42 +2095,49 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map s->elements = elements; s->element_count = element_count; - if (is_patch_constant) - shader_signature_map_patch_constant_index_ranges(s, range_map); + if (is_patch_constant + && (ret = shader_signature_map_patch_constant_index_ranges(normaliser, s, range_map)) < 0) + goto out; - for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e) + for (i = 0, new_count = 0; i < element_count; ++i) { e = &elements[i]; register_count = 1; if (e->register_index >= MAX_REG_OUTPUT) + { + elements[new_count++] = *e; continue; + } register_count = range_map_get_register_count(range_map, e->register_index, e->mask); - VKD3D_ASSERT(register_count != UINT8_MAX); - register_count += !register_count; - if (register_count > 1) + if (register_count == UINT8_MAX) { - TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); - e->register_count = register_count; - e->mask = signature_element_range_expand_mask(e, register_count, range_map); + TRACE("Register %u mask %#x semantic %s%u has already been merged, dropping it.\n", + e->register_index, e->mask, e->semantic_name, e->semantic_index); + vkd3d_free((void *)e->semantic_name); + continue; + } - for (j = 1; j < register_count; ++j) - { - f = &elements[i + j]; - vkd3d_free((void *)f->semantic_name); - } + if (register_count > 0) + { + TRACE("Register %u mask %#x semantic %s%u is used as merge destination.\n", + e->register_index, e->mask, e->semantic_name, e->semantic_index); + e->register_count = register_count; + e->mask = range_map[e->register_index].component[vsir_write_mask_get_component_idx(e->mask)].mask; + e->used_mask = range_map[e->register_index].component[vsir_write_mask_get_component_idx(e->mask)].used_mask; } + + elements[new_count++] = *e; } - element_count = new_count; + s->element_count = new_count; +out: /* Restoring the original order is required for sensible trace output. */ - qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare); - - s->element_count = element_count; + qsort(s->elements, s->element_count, sizeof(elements[0]), signature_element_index_compare); - return true; + return ret; } static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, @@ -2342,8 +2353,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, struct vsir_transformation_context *ctx) { - struct io_normaliser normaliser = {program->instructions}; + struct io_normaliser normaliser = {ctx->message_context, program->instructions}; struct vkd3d_shader_instruction *ins; + enum vkd3d_result ret; unsigned int i; VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO); @@ -2365,7 +2377,8 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.output_control_point_count = ins->declaration.count; break; case VKD3DSIH_DCL_INDEX_RANGE: - io_normaliser_add_index_range(&normaliser, ins); + if ((ret = io_normaliser_add_index_range(&normaliser, ins)) < 0) + return ret; vkd3d_shader_instruction_make_nop(ins); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: @@ -2378,12 +2391,14 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program } } - if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) - || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) - || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) + if ((ret = shader_signature_merge(&normaliser, &program->input_signature, normaliser.input_range_map, false)) < 0 + || (ret = shader_signature_merge(&normaliser, &program->output_signature, + normaliser.output_range_map, false)) < 0 + || (ret = shader_signature_merge(&normaliser, &program->patch_constant_signature, + normaliser.pc_range_map, true)) < 0) { program->instructions = normaliser.instructions; - return VKD3D_ERROR_OUT_OF_MEMORY; + return ret; } normaliser.phase = VKD3DSIH_INVALID; diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index 4a8d0fddae1..d167415c356 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ %{ +#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y index c6be17bd230..95987831faa 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.y +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y @@ -178,6 +178,16 @@ static int default_open_include(const char *filename, bool local, if (S_ISREG(st.st_mode)) size = st.st_size; + if (!size) + { + fclose(f); + + out->code = NULL; + out->size = 0; + + return VKD3D_OK; + } + if (!(data = vkd3d_malloc(size))) { fclose(f); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 82302aac666..d41f1c65fa7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -1051,7 +1051,8 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins register_idx, register_count, write_mask, e->sysval_semantic); return; } - if ((io_masks[register_idx + i] & write_mask) != write_mask) + if ((io_masks[register_idx + i] & write_mask) != write_mask + && (io_masks[register_idx + i] & write_mask) != 0) { WARN("No matching declaration for index range base %u, count %u, mask %#x.\n", register_idx, register_count, write_mask); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 021691bb3a1..d3e4d9cea5a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -454,8 +456,15 @@ struct shader_dump_data const char *target_suffix; }; +enum shader_dump_type +{ + SHADER_DUMP_TYPE_SOURCE, + SHADER_DUMP_TYPE_PREPROC, + SHADER_DUMP_TYPE_TARGET, +}; + static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, - const void *data, size_t size, bool source) + const void *data, size_t size, enum shader_dump_type type) { static const char hexadecimal_digits[] = "0123456789abcdef"; const uint8_t *checksum = dump_data->checksum; @@ -480,8 +489,10 @@ static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, if (dump_data->profile) pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); - if (source) + if (type == SHADER_DUMP_TYPE_SOURCE) pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-source.%s", dump_data->source_suffix); + else if (type == SHADER_DUMP_TYPE_PREPROC) + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-preproc.%s", dump_data->source_suffix); else pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-target.%s", dump_data->target_suffix); @@ -737,12 +748,20 @@ void vkd3d_shader_free_messages(char *messages) static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, const struct shader_signature *src) { - unsigned int i; + struct vkd3d_shader_signature_element *d; + const struct signature_element *e; + size_t count, i, j; - signature->element_count = src->element_count; + for (i = 0, count = 0; i < src->element_count; ++i) + { + e = &src->elements[i]; + count += e->register_count; + } + + signature->element_count = count; if (!src->elements) { - VKD3D_ASSERT(!signature->element_count); + VKD3D_ASSERT(!count); signature->elements = NULL; return true; } @@ -750,30 +769,25 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) return false; - for (i = 0; i < signature->element_count; ++i) + for (i = 0, d = signature->elements; i < src->element_count; ++i) { - struct vkd3d_shader_signature_element *d = &signature->elements[i]; - struct signature_element *e = &src->elements[i]; - - if (!(d->semantic_name = vkd3d_strdup(e->semantic_name))) + for (j = 0, e = &src->elements[i]; j < e->register_count; ++j) { - for (unsigned int j = 0; j < i; ++j) + if (!(d->semantic_name = vkd3d_strdup(e->semantic_name))) { - vkd3d_free((void *)signature->elements[j].semantic_name); + vkd3d_shader_free_shader_signature(signature); + return false; } - vkd3d_free(signature->elements); - return false; + d->semantic_index = e->semantic_index + j; + d->stream_index = e->stream_index; + d->sysval_semantic = e->sysval_semantic; + d->component_type = e->component_type; + d->register_index = e->register_index + j; + d->mask = e->mask; + d->used_mask = e->used_mask; + d->min_precision = e->min_precision; + ++d; } - d->semantic_index = e->semantic_index; - d->stream_index = e->stream_index; - d->sysval_semantic = e->sysval_semantic; - d->component_type = e->component_type; - d->register_index = e->register_index; - if (e->register_count > 1) - FIXME("Arrayed elements are not supported yet.\n"); - d->mask = e->mask; - d->used_mask = e->used_mask; - d->min_precision = e->min_precision; } return true; @@ -1631,7 +1645,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_message_context_init(&message_context, compile_info->log_level); fill_shader_dump_data(compile_info, &dump_data); - vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); + vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, SHADER_DUMP_TYPE_SOURCE); if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { @@ -1711,7 +1725,8 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, } static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + const struct shader_dump_data *dump_data, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_code preprocessed; int ret; @@ -1719,6 +1734,8 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, if ((ret = preproc_lexer_parse(compile_info, &preprocessed, message_context))) return ret; + vkd3d_shader_dump_shader(dump_data, preprocessed.code, preprocessed.size, SHADER_DUMP_TYPE_PREPROC); + ret = hlsl_compile_shader(&preprocessed, compile_info, out, message_context); vkd3d_shader_free_shader_code(&preprocessed); @@ -1745,11 +1762,11 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_message_context_init(&message_context, compile_info->log_level); fill_shader_dump_data(compile_info, &dump_data); - vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); + vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, SHADER_DUMP_TYPE_SOURCE); if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { - ret = compile_hlsl(compile_info, out, &message_context); + ret = compile_hlsl(compile_info, &dump_data, out, &message_context); } else if (compile_info->source_type == VKD3D_SHADER_SOURCE_FX) { @@ -1768,7 +1785,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, } if (ret >= 0) - vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); + vkd3d_shader_dump_shader(&dump_data, out->code, out->size, SHADER_DUMP_TYPE_TARGET); vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index ce0c3b9128f..1ff58f97565 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -1499,7 +1499,7 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct VkDescriptorPoolCreateInfo pool_desc; VkDevice vk_device = device->vk_device; - VkDescriptorPoolSize vk_pool_sizes[2]; + VkDescriptorPoolSize vk_pool_sizes[4]; unsigned int pool_size, pool_limit; VkDescriptorPool vk_pool; VkResult vr; @@ -1530,21 +1530,43 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( } descriptor_count = pool_size; - vk_pool_sizes[0].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, true); - vk_pool_sizes[0].descriptorCount = descriptor_count; - - vk_pool_sizes[1].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, false); - vk_pool_sizes[1].descriptorCount = descriptor_count; - pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; pool_desc.pNext = NULL; pool_desc.flags = 0; pool_desc.maxSets = 512; - pool_desc.poolSizeCount = 1; - if (vk_pool_sizes[1].type != vk_pool_sizes[0].type) - ++pool_desc.poolSizeCount; pool_desc.pPoolSizes = vk_pool_sizes; + if (allocator->device->use_vk_heaps) + { + /* SRV root descriptors. */ + vk_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + vk_pool_sizes[0].descriptorCount = descriptor_count; + + /* UAV root descriptors and UAV counters. */ + vk_pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + vk_pool_sizes[1].descriptorCount = descriptor_count; + + /* CBV root descriptors. */ + vk_pool_sizes[2].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + vk_pool_sizes[2].descriptorCount = descriptor_count; + + /* Static samplers. */ + vk_pool_sizes[3].type = VK_DESCRIPTOR_TYPE_SAMPLER; + vk_pool_sizes[3].descriptorCount = descriptor_count; + + pool_desc.poolSizeCount = 4; + } + else + { + vk_pool_sizes[0].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, true); + vk_pool_sizes[0].descriptorCount = descriptor_count; + + vk_pool_sizes[1].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, false); + vk_pool_sizes[1].descriptorCount = descriptor_count; + + pool_desc.poolSizeCount = 1 + (vk_pool_sizes[0].type != vk_pool_sizes[1].type); + } + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) { ERR("Failed to create descriptor pool, vr %d.\n", vr); @@ -1578,6 +1600,10 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(struct d3 VkDescriptorSet vk_descriptor_set; VkResult vr; + /* With Vulkan heaps we use just one descriptor pool. */ + if (device->use_vk_heaps) + descriptor_type = 0; + if (!allocator->vk_descriptor_pools[descriptor_type]) allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, descriptor_type, descriptor_count, unbounded); @@ -2222,7 +2248,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, if (!stencil_state || (stencil_state & D3D12_RESOURCE_STATE_DEPTH_WRITE)) *image_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; else - *image_layout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL; + *image_layout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR; } return true; @@ -2256,7 +2282,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, { if (stencil_state & D3D12_RESOURCE_STATE_DEPTH_WRITE) { - *image_layout = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL; + *image_layout = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR; *access_mask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; } else diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index b51e2963efa..0575b492d64 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -73,6 +73,7 @@ static const struct vkd3d_optional_extension_info optional_instance_extensions[] static const char * const required_device_extensions[] = { VK_KHR_MAINTENANCE1_EXTENSION_NAME, + VK_KHR_MAINTENANCE2_EXTENSION_NAME, VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, }; @@ -91,7 +92,6 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_DRAW_INDIRECT_COUNT, KHR_draw_indirect_count), VK_EXTENSION(KHR_GET_MEMORY_REQUIREMENTS_2, KHR_get_memory_requirements2), VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), - VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index eab97715944..cb184986f2a 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -3094,7 +3094,7 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkIm if (vk_image) { view_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_desc.pNext = NULL; + view_desc.pNext = &usage_desc; view_desc.flags = 0; view_desc.image = vk_image; view_desc.viewType = desc->view_type; @@ -3107,13 +3107,11 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, uint32_t magic, VkIm view_desc.subresourceRange.levelCount = desc->miplevel_count; view_desc.subresourceRange.baseArrayLayer = desc->layer_idx; view_desc.subresourceRange.layerCount = desc->layer_count; - if (device->vk_info.KHR_maintenance2) - { - usage_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; - usage_desc.pNext = NULL; - usage_desc.usage = desc->usage; - view_desc.pNext = &usage_desc; - } + + usage_desc.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + usage_desc.pNext = NULL; + usage_desc.usage = desc->usage; + if ((vr = VK_CALL(vkCreateImageView(device->vk_device, &view_desc, NULL, &vk_view))) < 0) { WARN("Failed to create Vulkan image view, vr %d.\n", vr); diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index aa08dc985bd..b5a8d1331fb 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -754,8 +754,11 @@ struct vkd3d_descriptor_set_context unsigned int uav_counter_index; unsigned int push_constant_index; - struct vk_binding_array *push_descriptor_set; + struct vk_binding_array *root_descriptor_set; + struct vk_binding_array *static_samplers_descriptor_set; bool push_descriptor; + bool static_samplers; + bool use_vk_heaps; }; static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) @@ -806,13 +809,59 @@ static struct vk_binding_array *d3d12_root_signature_vk_binding_array_for_type( { struct vk_binding_array *array, **current; + /* There are a few different ways we can reach this point: + * * If we are using virtual heaps we want to allocate descriptors to sets + * depending on their descriptor type, in order to minimize waste when + * recycling descriptor pools. + * + With the exception of root descriptors when we are using push + * descriptors: the push descriptors must be in a separate set, so we + * keep one specifically for them. + * * If we are using Vulkan heaps then all the root table descriptors don't + * even reach here, because they are managed by the D3D12 descriptor + * heap. Thus we only have to deal with root descriptors and static + * samplers. + * + If we're using push descriptors then again we have to dedicate a set + * for them, so static samplers will and up in their own set too. + * + If we're not using push descriptors then we can use the same set and + * save one. In this case we don't care too much about minimizing + * wasted descriptors, because few descriptors can end up here anyway. + */ + if (context->push_descriptor) { - if (!context->push_descriptor_set) - context->push_descriptor_set = d3d12_root_signature_append_vk_binding_array(root_signature, - descriptor_type, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, context); + /* The descriptor type is irrelevant here, it will never be used. */ + if (!context->root_descriptor_set) + context->root_descriptor_set = d3d12_root_signature_append_vk_binding_array(root_signature, + 0, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, context); + + return context->root_descriptor_set; + } + + if (context->use_vk_heaps) + { + if (context->static_samplers) + { + if (!context->static_samplers_descriptor_set) + { + if (!context->push_descriptor && context->root_descriptor_set) + context->static_samplers_descriptor_set = context->root_descriptor_set; + else + /* The descriptor type is irrelevant here, it will never be used. */ + context->static_samplers_descriptor_set = d3d12_root_signature_append_vk_binding_array( + root_signature, 0, 0, context); + } + + return context->static_samplers_descriptor_set; + } + else + { + /* The descriptor type is irrelevant here, it will never be used. */ + if (!context->root_descriptor_set) + context->root_descriptor_set = d3d12_root_signature_append_vk_binding_array( + root_signature, 0, 0, context); - return context->push_descriptor_set; + return context->root_descriptor_set; + } } current = context->current_binding_array; @@ -1638,17 +1687,22 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa sizeof(*root_signature->static_samplers)))) goto fail; + context.use_vk_heaps = use_vk_heaps; context.push_descriptor = vk_info->KHR_push_descriptor; if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) goto fail; - root_signature->main_set = !!context.push_descriptor_set; + root_signature->main_set = context.root_descriptor_set && context.push_descriptor; context.push_descriptor = false; if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, root_signature->push_constant_ranges, &root_signature->push_constant_range_count))) goto fail; + + context.static_samplers = true; if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context))) goto fail; + context.static_samplers = false; + context.push_constant_index = 0; if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) goto fail; @@ -3146,13 +3200,13 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s struct vkd3d_shader_spirv_target_info *stage_target_info; uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT]; struct vkd3d_shader_descriptor_offset_info offset_info; + struct vkd3d_shader_scan_signature_info signature_info; struct vkd3d_shader_parameter ps_shader_parameters[1]; struct vkd3d_shader_transform_feedback_info xfb_info; struct vkd3d_shader_spirv_target_info ps_target_info; struct vkd3d_shader_interface_info shader_interface; struct vkd3d_shader_spirv_target_info target_info; - const struct d3d12_root_signature *root_signature; - struct vkd3d_shader_signature input_signature; + struct d3d12_root_signature *root_signature; bool have_attachment, is_dsv_format_unknown; VkShaderStageFlagBits xfb_stage = 0; VkSampleCountFlagBits sample_count; @@ -3163,7 +3217,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s size_t rt_count; uint32_t mask; HRESULT hr; - int ret; static const DWORD default_ps_code[] = { @@ -3196,7 +3249,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s memset(&state->uav_counters, 0, sizeof(state->uav_counters)); graphics->stage_count = 0; - memset(&input_signature, 0, sizeof(input_signature)); + memset(&signature_info, 0, sizeof(signature_info)); + signature_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO; for (i = desc->rtv_formats.NumRenderTargets; i < ARRAY_SIZE(desc->rtv_formats.RTFormats); ++i) { @@ -3207,10 +3261,25 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } } + state->implicit_root_signature = NULL; if (!(root_signature = unsafe_impl_from_ID3D12RootSignature(desc->root_signature))) { - WARN("Root signature is NULL.\n"); - return E_INVALIDARG; + TRACE("Root signature is NULL, looking for an embedded signature in the vertex shader.\n"); + if (FAILED(hr = d3d12_root_signature_create(device, + desc->vs.pShaderBytecode, desc->vs.BytecodeLength, &root_signature)) + && FAILED(hr = d3d12_root_signature_create(device, + desc->ps.pShaderBytecode, desc->ps.BytecodeLength, &root_signature)) + && FAILED(hr = d3d12_root_signature_create(device, + desc->ds.pShaderBytecode, desc->ds.BytecodeLength, &root_signature)) + && FAILED(hr = d3d12_root_signature_create(device, + desc->hs.pShaderBytecode, desc->hs.BytecodeLength, &root_signature)) + && FAILED(hr = d3d12_root_signature_create(device, + desc->gs.pShaderBytecode, desc->gs.BytecodeLength, &root_signature))) + { + WARN("Failed to find an embedded root signature, hr %s.\n", debugstr_hresult(hr)); + goto fail; + } + state->implicit_root_signature = &root_signature->ID3D12RootSignature_iface; } sample_count = vk_samples_from_dxgi_sample_desc(&desc->sample_desc); @@ -3425,7 +3494,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) { const D3D12_SHADER_BYTECODE *b = (const void *)((uintptr_t)desc + shader_stages[i].offset); - const struct vkd3d_shader_code dxbc = {b->pShaderBytecode, b->BytecodeLength}; if (!b->pShaderBytecode) continue; @@ -3439,14 +3507,6 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s stage_target_info = &target_info; switch (shader_stages[i].stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - if ((ret = vkd3d_shader_parse_input_signature(&dxbc, &input_signature, NULL)) < 0) - { - hr = hresult_from_vkd3d_result(ret); - goto fail; - } - break; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: if (desc->primitive_topology_type != D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH) @@ -3457,6 +3517,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s } break; + case VK_SHADER_STAGE_VERTEX_BIT: case VK_SHADER_STAGE_GEOMETRY_BIT: break; @@ -3478,11 +3539,14 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s ps_target_info.next = NULL; target_info.next = NULL; offset_info.next = NULL; + signature_info.next = NULL; if (shader_stages[i].stage == xfb_stage) vkd3d_prepend_struct(&shader_interface, &xfb_info); vkd3d_prepend_struct(&shader_interface, stage_target_info); if (root_signature->descriptor_offsets) vkd3d_prepend_struct(&shader_interface, &offset_info); + if (shader_stages[i].stage == VK_SHADER_STAGE_VERTEX_BIT) + vkd3d_prepend_struct(&shader_interface, &signature_info); if (FAILED(hr = create_shader_stage(device, &graphics->stages[graphics->stage_count], shader_stages[i].stage, b, &shader_interface))) @@ -3533,7 +3597,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s goto fail; } - if (!(signature_element = vkd3d_shader_find_signature_element(&input_signature, + if (!(signature_element = vkd3d_shader_find_signature_element(&signature_info.input, e->SemanticName, e->SemanticIndex, 0))) { WARN("Unused input element %u.\n", i); @@ -3660,19 +3724,21 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) goto fail; - vkd3d_shader_free_shader_signature(&input_signature); + vkd3d_shader_free_scan_signature_info(&signature_info); state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; - state->implicit_root_signature = NULL; d3d12_device_add_ref(state->device = device); return S_OK; fail: + if (state->implicit_root_signature) + ID3D12RootSignature_Release(state->implicit_root_signature); + for (i = 0; i < graphics->stage_count; ++i) { VK_CALL(vkDestroyShaderModule(device->vk_device, state->u.graphics.stages[i].module, NULL)); } - vkd3d_shader_free_shader_signature(&input_signature); + vkd3d_shader_free_scan_signature_info(&signature_info); d3d12_pipeline_uav_counter_state_cleanup(&state->uav_counters, device); diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index fd1fbb1679a..7015508e384 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -127,7 +127,6 @@ struct vkd3d_vulkan_info bool KHR_draw_indirect_count; bool KHR_get_memory_requirements2; bool KHR_image_format_list; - bool KHR_maintenance2; bool KHR_maintenance3; bool KHR_portability_subset; bool KHR_push_descriptor; -- 2.47.2