vkd3d-shader/hlsl: Allocate temporary registers separately for each entry function.

This commit is contained in:
Shaun Ren 2024-10-04 21:18:53 -04:00 committed by Henri Verbeet
parent 5f8570b933
commit 0c5dc53fd2
Notes: Henri Verbeet 2024-10-15 17:03:41 +02:00
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1154
3 changed files with 44 additions and 14 deletions

View File

@ -1089,9 +1089,6 @@ struct hlsl_ctx
} constant_defs;
/* 'c' registers where the constants expected by SM2 sincos are stored. */
struct hlsl_reg d3dsincosconst1, d3dsincosconst2;
/* Number of temp. registers required for the shader to run, i.e. the largest temp register
* index that will be used in the output bytecode (+1). */
uint32_t temp_count;
/* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in
* compute shader profiles. It is set using the numthreads() attribute in the entry point. */
@ -1430,6 +1427,8 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body);
void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body);
uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out);

View File

@ -4114,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context)
}
}
static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
struct hlsl_ir_node *instr)
{
if (!deref->rel_offset.node)
@ -4127,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
return true;
}
void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
{
struct hlsl_scope *scope;
struct hlsl_ir_var *var;
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
{
LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
var->indexable = false;
}
transform_derefs(ctx, mark_indexable_var, &entry_func->body);
}
static char get_regset_name(enum hlsl_regset regset)
{
switch (regset)
@ -5116,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
* index to all (simultaneously live) variables or intermediate values. Agnostic
* as to how many registers are actually available for the current backend, and
* does not handle constants. */
static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
{
struct register_allocator allocator = {0};
struct hlsl_scope *scope;
struct hlsl_ir_var *var;
/* Reset variable temp register allocations. */
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
{
LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
{
if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform))
memset(var->regs, 0, sizeof(var->regs));
}
}
/* ps_1_* outputs are special and go in temp register 0. */
if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
@ -5127,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
for (i = 0; i < entry_func->parameters.count; ++i)
{
const struct hlsl_ir_var *var = entry_func->parameters.vars[i];
var = entry_func->parameters.vars[i];
if (var->is_output_semantic)
{
record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read);
@ -5138,8 +5163,9 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
}
allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator);
ctx->temp_count = allocator.reg_count;
vkd3d_free(allocator.allocations);
return allocator.reg_count;
}
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
@ -7743,13 +7769,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct hlsl_ir_function
compute_liveness(ctx, entry_func);
mark_vars_usage(ctx);
transform_derefs(ctx, mark_indexable_vars, body);
calculate_resource_register_counts(ctx);
allocate_register_reservations(ctx, &ctx->extern_vars);
allocate_register_reservations(ctx, &entry_func->extern_vars);
allocate_temp_registers(ctx, entry_func);
allocate_semantic_registers(ctx, entry_func);
}
@ -7774,6 +7797,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
if (profile->major_version < 4)
{
mark_indexable_vars(ctx, entry_func);
allocate_temp_registers(ctx, entry_func);
allocate_const_registers(ctx, entry_func);
}
else

View File

@ -6387,7 +6387,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
}
}
static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func,
static void write_sm4_shdr(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
struct sm4_stat *stat, struct dxbc_writer *dxbc)
{
const struct hlsl_profile_info *profile = ctx->profile;
@ -6399,6 +6399,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
const struct hlsl_ir_var *var;
size_t token_count_position;
struct tpf_writer tpf;
uint32_t temp_count;
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
{
@ -6413,6 +6414,11 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
VKD3D_SM4_LIB,
};
mark_indexable_vars(ctx, entry_func);
temp_count = allocate_temp_registers(ctx, entry_func);
if (ctx->result)
return;
tpf_writer_init(&tpf, ctx, stat, &buffer);
extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
@ -6450,8 +6456,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
if (profile->type == VKD3D_SHADER_TYPE_COMPUTE)
write_sm4_dcl_thread_group(&tpf, ctx->thread_count);
if (ctx->temp_count)
write_sm4_dcl_temps(&tpf, ctx->temp_count);
if (temp_count)
write_sm4_dcl_temps(&tpf, temp_count);
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
{