mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-01-28 13:05:02 -08:00
vkd3d-shader/hlsl: Allocate temporary registers separately for each entry function.
This commit is contained in:
parent
5f8570b933
commit
0c5dc53fd2
Notes:
Henri Verbeet
2024-10-15 17:03:41 +02:00
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1154
@ -1089,9 +1089,6 @@ struct hlsl_ctx
|
|||||||
} constant_defs;
|
} constant_defs;
|
||||||
/* 'c' registers where the constants expected by SM2 sincos are stored. */
|
/* 'c' registers where the constants expected by SM2 sincos are stored. */
|
||||||
struct hlsl_reg d3dsincosconst1, d3dsincosconst2;
|
struct hlsl_reg d3dsincosconst1, d3dsincosconst2;
|
||||||
/* Number of temp. registers required for the shader to run, i.e. the largest temp register
|
|
||||||
* index that will be used in the output bytecode (+1). */
|
|
||||||
uint32_t temp_count;
|
|
||||||
|
|
||||||
/* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in
|
/* Number of threads to be executed (on the X, Y, and Z dimensions) in a single thread group in
|
||||||
* compute shader profiles. It is set using the numthreads() attribute in the entry point. */
|
* compute shader profiles. It is set using the numthreads() attribute in the entry point. */
|
||||||
@ -1430,6 +1427,8 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx,
|
|||||||
|
|
||||||
void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body);
|
void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body);
|
||||||
void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body);
|
void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body);
|
||||||
|
uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
|
||||||
|
void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func);
|
||||||
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
||||||
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
|
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
|
||||||
int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out);
|
int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out);
|
||||||
|
@ -4114,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
|
static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
|
||||||
struct hlsl_ir_node *instr)
|
struct hlsl_ir_node *instr)
|
||||||
{
|
{
|
||||||
if (!deref->rel_offset.node)
|
if (!deref->rel_offset.node)
|
||||||
@ -4127,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
|
||||||
|
{
|
||||||
|
struct hlsl_scope *scope;
|
||||||
|
struct hlsl_ir_var *var;
|
||||||
|
|
||||||
|
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
|
||||||
|
{
|
||||||
|
LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
|
||||||
|
var->indexable = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
transform_derefs(ctx, mark_indexable_var, &entry_func->body);
|
||||||
|
}
|
||||||
|
|
||||||
static char get_regset_name(enum hlsl_regset regset)
|
static char get_regset_name(enum hlsl_regset regset)
|
||||||
{
|
{
|
||||||
switch (regset)
|
switch (regset)
|
||||||
@ -5116,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
|
|||||||
* index to all (simultaneously live) variables or intermediate values. Agnostic
|
* index to all (simultaneously live) variables or intermediate values. Agnostic
|
||||||
* as to how many registers are actually available for the current backend, and
|
* as to how many registers are actually available for the current backend, and
|
||||||
* does not handle constants. */
|
* does not handle constants. */
|
||||||
static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
|
uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
|
||||||
{
|
{
|
||||||
struct register_allocator allocator = {0};
|
struct register_allocator allocator = {0};
|
||||||
|
struct hlsl_scope *scope;
|
||||||
|
struct hlsl_ir_var *var;
|
||||||
|
|
||||||
|
/* Reset variable temp register allocations. */
|
||||||
|
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
|
||||||
|
{
|
||||||
|
LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
|
||||||
|
{
|
||||||
|
if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform))
|
||||||
|
memset(var->regs, 0, sizeof(var->regs));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ps_1_* outputs are special and go in temp register 0. */
|
/* ps_1_* outputs are special and go in temp register 0. */
|
||||||
if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
||||||
@ -5127,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
|
|||||||
|
|
||||||
for (i = 0; i < entry_func->parameters.count; ++i)
|
for (i = 0; i < entry_func->parameters.count; ++i)
|
||||||
{
|
{
|
||||||
const struct hlsl_ir_var *var = entry_func->parameters.vars[i];
|
var = entry_func->parameters.vars[i];
|
||||||
|
|
||||||
if (var->is_output_semantic)
|
if (var->is_output_semantic)
|
||||||
{
|
{
|
||||||
record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read);
|
record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read);
|
||||||
@ -5138,8 +5163,9 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
|
|||||||
}
|
}
|
||||||
|
|
||||||
allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator);
|
allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator);
|
||||||
ctx->temp_count = allocator.reg_count;
|
|
||||||
vkd3d_free(allocator.allocations);
|
vkd3d_free(allocator.allocations);
|
||||||
|
|
||||||
|
return allocator.reg_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
|
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)
|
||||||
@ -7743,13 +7769,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct hlsl_ir_function
|
|||||||
compute_liveness(ctx, entry_func);
|
compute_liveness(ctx, entry_func);
|
||||||
mark_vars_usage(ctx);
|
mark_vars_usage(ctx);
|
||||||
|
|
||||||
transform_derefs(ctx, mark_indexable_vars, body);
|
|
||||||
|
|
||||||
calculate_resource_register_counts(ctx);
|
calculate_resource_register_counts(ctx);
|
||||||
|
|
||||||
allocate_register_reservations(ctx, &ctx->extern_vars);
|
allocate_register_reservations(ctx, &ctx->extern_vars);
|
||||||
allocate_register_reservations(ctx, &entry_func->extern_vars);
|
allocate_register_reservations(ctx, &entry_func->extern_vars);
|
||||||
allocate_temp_registers(ctx, entry_func);
|
|
||||||
allocate_semantic_registers(ctx, entry_func);
|
allocate_semantic_registers(ctx, entry_func);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7774,6 +7797,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
|
|||||||
|
|
||||||
if (profile->major_version < 4)
|
if (profile->major_version < 4)
|
||||||
{
|
{
|
||||||
|
mark_indexable_vars(ctx, entry_func);
|
||||||
|
allocate_temp_registers(ctx, entry_func);
|
||||||
allocate_const_registers(ctx, entry_func);
|
allocate_const_registers(ctx, entry_func);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -6387,7 +6387,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func,
|
static void write_sm4_shdr(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
||||||
struct sm4_stat *stat, struct dxbc_writer *dxbc)
|
struct sm4_stat *stat, struct dxbc_writer *dxbc)
|
||||||
{
|
{
|
||||||
const struct hlsl_profile_info *profile = ctx->profile;
|
const struct hlsl_profile_info *profile = ctx->profile;
|
||||||
@ -6399,6 +6399,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
|
|||||||
const struct hlsl_ir_var *var;
|
const struct hlsl_ir_var *var;
|
||||||
size_t token_count_position;
|
size_t token_count_position;
|
||||||
struct tpf_writer tpf;
|
struct tpf_writer tpf;
|
||||||
|
uint32_t temp_count;
|
||||||
|
|
||||||
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
|
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
|
||||||
{
|
{
|
||||||
@ -6413,6 +6414,11 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
|
|||||||
VKD3D_SM4_LIB,
|
VKD3D_SM4_LIB,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mark_indexable_vars(ctx, entry_func);
|
||||||
|
temp_count = allocate_temp_registers(ctx, entry_func);
|
||||||
|
if (ctx->result)
|
||||||
|
return;
|
||||||
|
|
||||||
tpf_writer_init(&tpf, ctx, stat, &buffer);
|
tpf_writer_init(&tpf, ctx, stat, &buffer);
|
||||||
|
|
||||||
extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
|
extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
|
||||||
@ -6450,8 +6456,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_d
|
|||||||
if (profile->type == VKD3D_SHADER_TYPE_COMPUTE)
|
if (profile->type == VKD3D_SHADER_TYPE_COMPUTE)
|
||||||
write_sm4_dcl_thread_group(&tpf, ctx->thread_count);
|
write_sm4_dcl_thread_group(&tpf, ctx->thread_count);
|
||||||
|
|
||||||
if (ctx->temp_count)
|
if (temp_count)
|
||||||
write_sm4_dcl_temps(&tpf, ctx->temp_count);
|
write_sm4_dcl_temps(&tpf, temp_count);
|
||||||
|
|
||||||
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
|
LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user