mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2024-11-21 16:46:41 -08:00
vkd3d-shader/ir: Convert SSAs to temps only if the block of origin does not dominate all uses.
This commit is contained in:
parent
1c61776c18
commit
cfcd57209b
Notes:
Alexandre Julliard
2024-04-09 15:44:53 -05:00
Approved-by: Giovanni Mascellani (@giomasce) Approved-by: Henri Verbeet (@hverbeet) Approved-by: Alexandre Julliard (@julliard) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/749
@ -2686,36 +2686,43 @@ fail:
|
||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program,
|
||||
struct vkd3d_shader_src_param *src);
|
||||
struct ssas_to_temps_alloc
|
||||
{
|
||||
unsigned int *table;
|
||||
unsigned int next_temp_idx;
|
||||
};
|
||||
|
||||
static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count)
|
||||
{
|
||||
size_t i = ssa_count * sizeof(*alloc->table);
|
||||
|
||||
if (!(alloc->table = vkd3d_malloc(i)))
|
||||
{
|
||||
ERR("Failed to allocate SSA table.\n");
|
||||
return false;
|
||||
}
|
||||
memset(alloc->table, 0xff, i);
|
||||
|
||||
alloc->next_temp_idx = temp_count;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This is idempotent: it can be safely applied more than once on the
|
||||
* same register. */
|
||||
static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg)
|
||||
static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc,
|
||||
struct vkd3d_shader_register *reg)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (reg->type == VKD3DSPR_SSA)
|
||||
if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX)
|
||||
{
|
||||
reg->type = VKD3DSPR_TEMP;
|
||||
reg->idx[0].offset += program->temp_count;
|
||||
reg->idx[0].offset = alloc->table[reg->idx[0].offset];
|
||||
}
|
||||
|
||||
for (i = 0; i < reg->idx_count; ++i)
|
||||
if (reg->idx[i].rel_addr)
|
||||
materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr);
|
||||
}
|
||||
|
||||
static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program,
|
||||
struct vkd3d_shader_dst_param *dst)
|
||||
{
|
||||
materialize_ssas_to_temps_process_reg(program, &dst->reg);
|
||||
}
|
||||
|
||||
static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program,
|
||||
struct vkd3d_shader_src_param *src)
|
||||
{
|
||||
materialize_ssas_to_temps_process_reg(program, &src->reg);
|
||||
materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg);
|
||||
}
|
||||
|
||||
struct ssas_to_temps_block_info
|
||||
@ -2740,11 +2747,12 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl
|
||||
vkd3d_free(block_info);
|
||||
}
|
||||
|
||||
static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program)
|
||||
static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program)
|
||||
{
|
||||
size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i;
|
||||
struct ssas_to_temps_block_info *info, *block_info = NULL;
|
||||
struct vkd3d_shader_instruction *instructions = NULL;
|
||||
struct ssas_to_temps_alloc alloc = {0};
|
||||
unsigned int current_label = 0;
|
||||
|
||||
if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info))))
|
||||
@ -2753,15 +2761,22 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
|
||||
goto fail;
|
||||
|
||||
for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i)
|
||||
{
|
||||
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
||||
unsigned int j;
|
||||
unsigned int j, temp_idx;
|
||||
|
||||
/* Only phi src/dst SSA values need be converted here. Structurisation may
|
||||
* introduce new cases of undominated SSA use, which will be handled later. */
|
||||
if (ins->handler_idx != VKD3DSIH_PHI)
|
||||
continue;
|
||||
++phi_count;
|
||||
|
||||
temp_idx = alloc.next_temp_idx++;
|
||||
|
||||
for (j = 0; j < ins->src_count; j += 2)
|
||||
{
|
||||
struct phi_incoming_to_temp *incoming;
|
||||
@ -2780,12 +2795,17 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
||||
incoming->src = &ins->src[j];
|
||||
incoming->dst = ins->dst;
|
||||
|
||||
alloc.table[ins->dst->reg.idx[0].offset] = temp_idx;
|
||||
|
||||
++incoming_count;
|
||||
}
|
||||
|
||||
materialize_ssas_to_temps_process_dst_param(program, ins->dst);
|
||||
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg);
|
||||
}
|
||||
|
||||
if (!phi_count)
|
||||
goto done;
|
||||
|
||||
if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count))
|
||||
goto fail;
|
||||
|
||||
@ -2795,10 +2815,10 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
||||
size_t j;
|
||||
|
||||
for (j = 0; j < ins->dst_count; ++j)
|
||||
materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]);
|
||||
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
|
||||
|
||||
for (j = 0; j < ins->src_count; ++j)
|
||||
materialize_ssas_to_temps_process_src_param(program, &ins->src[j]);
|
||||
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
|
||||
|
||||
switch (ins->handler_idx)
|
||||
{
|
||||
@ -2836,16 +2856,17 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
||||
program->instructions.elements = instructions;
|
||||
program->instructions.capacity = ins_capacity;
|
||||
program->instructions.count = ins_count;
|
||||
program->temp_count += program->ssa_count;
|
||||
program->ssa_count = 0;
|
||||
|
||||
program->temp_count = alloc.next_temp_idx;
|
||||
done:
|
||||
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
||||
vkd3d_free(alloc.table);
|
||||
|
||||
return VKD3D_OK;
|
||||
|
||||
fail:
|
||||
vkd3d_free(instructions);
|
||||
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
||||
vkd3d_free(alloc.table);
|
||||
|
||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
@ -4514,6 +4535,98 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc,
|
||||
struct vsir_block *block, struct vsir_block **origin_blocks)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (!register_is_ssa(reg))
|
||||
return;
|
||||
|
||||
i = reg->idx[0].offset;
|
||||
if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block))
|
||||
alloc->table[i] = alloc->next_temp_idx++;
|
||||
|
||||
for (i = 0; i < reg->idx_count; ++i)
|
||||
if (reg->idx[i].rel_addr)
|
||||
register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks);
|
||||
}
|
||||
|
||||
/* Drivers are not necessarily optimised to handle very large numbers of temps. For example,
|
||||
* using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV.
|
||||
* This can also result in the backend emitting less code because temps typically need an
|
||||
* access chain and a load/store. Conversion of phi SSA values to temps should eliminate all
|
||||
* undominated SSA use, but structurisation may create new occurrences. */
|
||||
static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg)
|
||||
{
|
||||
struct vsir_program *program = cfg->program;
|
||||
struct ssas_to_temps_alloc alloc = {0};
|
||||
struct vsir_block **origin_blocks;
|
||||
unsigned int j;
|
||||
size_t i;
|
||||
|
||||
if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks))))
|
||||
{
|
||||
ERR("Failed to allocate origin block array.\n");
|
||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
|
||||
{
|
||||
vkd3d_free(origin_blocks);
|
||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
for (i = 0; i < cfg->block_count; ++i)
|
||||
{
|
||||
struct vsir_block *block = &cfg->blocks[i];
|
||||
struct vkd3d_shader_instruction *ins;
|
||||
|
||||
for (ins = block->begin; ins <= block->end; ++ins)
|
||||
{
|
||||
for (j = 0; j < ins->dst_count; ++j)
|
||||
{
|
||||
if (register_is_ssa(&ins->dst[j].reg))
|
||||
origin_blocks[ins->dst[j].reg.idx[0].offset] = block;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < cfg->block_count; ++i)
|
||||
{
|
||||
struct vsir_block *block = &cfg->blocks[i];
|
||||
struct vkd3d_shader_instruction *ins;
|
||||
|
||||
for (ins = block->begin; ins <= block->end; ++ins)
|
||||
{
|
||||
for (j = 0; j < ins->src_count; ++j)
|
||||
register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
if (alloc.next_temp_idx == program->temp_count)
|
||||
goto done;
|
||||
|
||||
TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count);
|
||||
|
||||
for (i = 0; i < program->instructions.count; ++i)
|
||||
{
|
||||
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
||||
|
||||
for (j = 0; j < ins->dst_count; ++j)
|
||||
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
|
||||
|
||||
for (j = 0; j < ins->src_count; ++j)
|
||||
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
|
||||
}
|
||||
|
||||
program->temp_count = alloc.next_temp_idx;
|
||||
done:
|
||||
vkd3d_free(origin_blocks);
|
||||
vkd3d_free(alloc.table);
|
||||
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
struct validation_context
|
||||
{
|
||||
struct vkd3d_shader_message_context *message_context;
|
||||
@ -5396,7 +5509,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
|
||||
if ((result = lower_switch_to_if_ladder(program)) < 0)
|
||||
return result;
|
||||
|
||||
if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0)
|
||||
if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0)
|
||||
return result;
|
||||
|
||||
if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0)
|
||||
@ -5441,6 +5554,20 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
|
||||
}
|
||||
|
||||
vsir_cfg_cleanup(&cfg);
|
||||
|
||||
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
||||
return result;
|
||||
|
||||
if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0)
|
||||
return result;
|
||||
vsir_cfg_compute_dominators(&cfg);
|
||||
|
||||
result = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg);
|
||||
|
||||
vsir_cfg_cleanup(&cfg);
|
||||
|
||||
if (result < 0)
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -5470,10 +5597,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
|
||||
|
||||
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0)
|
||||
return result;
|
||||
}
|
||||
|
||||
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
||||
return result;
|
||||
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
||||
return result;
|
||||
}
|
||||
|
||||
if (TRACE_ON())
|
||||
vkd3d_shader_trace(program);
|
||||
|
Loading…
Reference in New Issue
Block a user