From 0c5dc53fd20665e8406264b016b4e747b5cbb8ce Mon Sep 17 00:00:00 2001 From: Shaun Ren Date: Fri, 4 Oct 2024 21:18:53 -0400 Subject: [PATCH] vkd3d-shader/hlsl: Allocate temporary registers separately for each entry function. --- libs/vkd3d-shader/hlsl.h | 5 ++-- libs/vkd3d-shader/hlsl_codegen.c | 41 +++++++++++++++++++++++++------- libs/vkd3d-shader/tpf.c | 12 +++++++--- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 6bec38d0..28175098 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1089,9 +1089,6 @@ struct hlsl_ctx } constant_defs; /* 'c' registers where the constants expected by SM2 sincos are stored. */ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register - * index that will be used in the output bytecode (+1). */ - uint32_t temp_count; /* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in * compute shader profiles. It is set using the numthreads() attribute in the entry point. */ @@ -1430,6 +1427,8 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); +uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); +void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 653afdc1..01770efc 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -4114,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context) } } -static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, +static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { if (!deref->rel_offset.node) @@ -4127,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, return true; } +void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + var->indexable = false; + } + + transform_derefs(ctx, mark_indexable_var, &entry_func->body); +} + static char get_regset_name(enum hlsl_regset regset) { switch (regset) @@ -5116,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * index to all (simultaneously live) variables or intermediate values. Agnostic * as to how many registers are actually available for the current backend, and * does not handle constants. */ -static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator = {0}; + struct hlsl_scope *scope; + struct hlsl_ir_var *var; + + /* Reset variable temp register allocations. */ + LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform)) + memset(var->regs, 0, sizeof(var->regs)); + } + } /* ps_1_* outputs are special and go in temp register 0. */ if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) @@ -5127,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio for (i = 0; i < entry_func->parameters.count; ++i) { - const struct hlsl_ir_var *var = entry_func->parameters.vars[i]; - + var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); @@ -5138,8 +5163,9 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio } allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); - ctx->temp_count = allocator.reg_count; vkd3d_free(allocator.allocations); + + return allocator.reg_count; } static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) @@ -7743,13 +7769,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct hlsl_ir_function compute_liveness(ctx, entry_func); mark_vars_usage(ctx); - transform_derefs(ctx, mark_indexable_vars, body); - calculate_resource_register_counts(ctx); allocate_register_reservations(ctx, &ctx->extern_vars); allocate_register_reservations(ctx, &entry_func->extern_vars); - allocate_temp_registers(ctx, entry_func); allocate_semantic_registers(ctx, entry_func); } @@ -7774,6 +7797,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (profile->major_version < 4) { + mark_indexable_vars(ctx, entry_func); + allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); } else diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index d0ca6e8d..1957dd25 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -6387,7 +6387,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc } } -static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, +static void write_sm4_shdr(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct sm4_stat *stat, struct dxbc_writer *dxbc) { const struct hlsl_profile_info *profile = ctx->profile; @@ -6399,6 +6399,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d const struct hlsl_ir_var *var; size_t token_count_position; struct tpf_writer tpf; + uint32_t temp_count; static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { @@ -6413,6 +6414,11 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d VKD3D_SM4_LIB, }; + mark_indexable_vars(ctx, entry_func); + temp_count = allocate_temp_registers(ctx, entry_func); + if (ctx->result) + return; + tpf_writer_init(&tpf, ctx, stat, &buffer); extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); @@ -6450,8 +6456,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) write_sm4_dcl_thread_group(&tpf, ctx->thread_count); - if (ctx->temp_count) - write_sm4_dcl_temps(&tpf, ctx->temp_count); + if (temp_count) + write_sm4_dcl_temps(&tpf, temp_count); LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) {