mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-01-28 13:05:02 -08:00
vkd3d-shader/hlsl: Allocate temporary registers separately for each entry function.
This commit is contained in:
parent
5f8570b933
commit
0c5dc53fd2
Notes:
Henri Verbeet
2024-10-15 17:03:41 +02:00
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1154
@ -1089,9 +1089,6 @@ struct hlsl_ctx
|
||||
} constant_defs;
|
||||
/* 'c' registers where the constants expected by SM2 sincos are stored. */
|
||||
struct hlsl_reg d3dsincosconst1, d3dsincosconst2;
|
||||
/* Number of temp. registers required for the shader to run, i.e. the largest temp register
|
||||
* index that will be used in the output bytecode (+1). */
|
||||
uint32_t temp_count;
|
||||
|
||||
/* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in
|
||||
* compute shader profiles. It is set using the numthreads() attribute in the entry point. */
|
||||
@ -1430,6 +1427,8 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
|
||||
|
||||
void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body);
|
||||
void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body);
|
||||
uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
|
||||
void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
|
||||
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
||||
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
|
||||
int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out);
|
||||
|
@ -4114,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context)
|
||||
}
|
||||
}
|
||||
|
||||
static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
|
||||
static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
|
||||
struct hlsl_ir_node *instr)
|
||||
{
|
||||
if (!deref->rel_offset.node)
|
||||
@ -4127,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
|
||||
return true;
|
||||
}
|
||||
|
||||
void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
|
||||
{
|
||||
struct hlsl_scope *scope;
|
||||
struct hlsl_ir_var *var;
|
||||
|
||||
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
|
||||
{
|
||||
LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
|
||||
var->indexable = false;
|
||||
}
|
||||
|
||||
transform_derefs(ctx, mark_indexable_var, &entry_func->body);
|
||||
}
|
||||
|
||||
static char get_regset_name(enum hlsl_regset regset)
|
||||
{
|
||||
switch (regset)
|
||||
@ -5116,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
|
||||
* index to all (simultaneously live) variables or intermediate values. Agnostic
|
||||
* as to how many registers are actually available for the current backend, and
|
||||
* does not handle constants. */
|
||||
static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
|
||||
uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
|
||||
{
|
||||
struct register_allocator allocator = {0};
|
||||
struct hlsl_scope *scope;
|
||||
struct hlsl_ir_var *var;
|
||||
|
||||
/* Reset variable temp register allocations. */
|
||||
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
|
||||
{
|
||||
LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
|
||||
{
|
||||
if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform))
|
||||
memset(var->regs, 0, sizeof(var->regs));
|
||||
}
|
||||
}
|
||||
|
||||
/* ps_1_* outputs are special and go in temp register 0. */
|
||||
if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
||||
@ -5127,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
|
||||
|
||||
for (i = 0; i < entry_func->parameters.count; ++i)
|
||||
{
|
||||
const struct hlsl_ir_var *var = entry_func->parameters.vars[i];
|
||||
|
||||
var = entry_func->parameters.vars[i];
|
||||
if (var->is_output_semantic)
|
||||
{
|
||||
record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read);
|
||||
@ -5138,8 +5163,9 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
|
||||
}
|
||||
|
||||
allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator);
|
||||
ctx->temp_count = allocator.reg_count;
|
||||
vkd3d_free(allocator.allocations);
|
||||
|
||||
return allocator.reg_count;
|
||||
}
|
||||
|
||||
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
|
||||
@ -7743,13 +7769,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct hlsl_ir_function
|
||||
compute_liveness(ctx, entry_func);
|
||||
mark_vars_usage(ctx);
|
||||
|
||||
transform_derefs(ctx, mark_indexable_vars, body);
|
||||
|
||||
calculate_resource_register_counts(ctx);
|
||||
|
||||
allocate_register_reservations(ctx, &ctx->extern_vars);
|
||||
allocate_register_reservations(ctx, &entry_func->extern_vars);
|
||||
allocate_temp_registers(ctx, entry_func);
|
||||
allocate_semantic_registers(ctx, entry_func);
|
||||
}
|
||||
|
||||
@ -7774,6 +7797,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
|
||||
|
||||
if (profile->major_version < 4)
|
||||
{
|
||||
mark_indexable_vars(ctx, entry_func);
|
||||
allocate_temp_registers(ctx, entry_func);
|
||||
allocate_const_registers(ctx, entry_func);
|
||||
}
|
||||
else
|
||||
|
@ -6387,7 +6387,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
|
||||
}
|
||||
}
|
||||
|
||||
static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func,
|
||||
static void write_sm4_shdr(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
||||
struct sm4_stat *stat, struct dxbc_writer *dxbc)
|
||||
{
|
||||
const struct hlsl_profile_info *profile = ctx->profile;
|
||||
@ -6399,6 +6399,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
|
||||
const struct hlsl_ir_var *var;
|
||||
size_t token_count_position;
|
||||
struct tpf_writer tpf;
|
||||
uint32_t temp_count;
|
||||
|
||||
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
|
||||
{
|
||||
@ -6413,6 +6414,11 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
|
||||
VKD3D_SM4_LIB,
|
||||
};
|
||||
|
||||
mark_indexable_vars(ctx, entry_func);
|
||||
temp_count = allocate_temp_registers(ctx, entry_func);
|
||||
if (ctx->result)
|
||||
return;
|
||||
|
||||
tpf_writer_init(&tpf, ctx, stat, &buffer);
|
||||
|
||||
extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
|
||||
@ -6450,8 +6456,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
|
||||
if (profile->type == VKD3D_SHADER_TYPE_COMPUTE)
|
||||
write_sm4_dcl_thread_group(&tpf, ctx->thread_count);
|
||||
|
||||
if (ctx->temp_count)
|
||||
write_sm4_dcl_temps(&tpf, ctx->temp_count);
|
||||
if (temp_count)
|
||||
write_sm4_dcl_temps(&tpf, temp_count);
|
||||
|
||||
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user