mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-01-28 13:05:02 -08:00
vkd3d-shader/ir: Convert SSAs to temps only if the block of origin does not dominate all uses.
This commit is contained in:
parent
1c61776c18
commit
cfcd57209b
Notes:
Alexandre Julliard
2024-04-09 15:44:53 -05:00
Approved-by: Giovanni Mascellani (@giomasce) Approved-by: Henri Verbeet (@hverbeet) Approved-by: Alexandre Julliard (@julliard) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/749
@ -2686,36 +2686,43 @@ fail:
|
|||||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program,
|
struct ssas_to_temps_alloc
|
||||||
struct vkd3d_shader_src_param *src);
|
{
|
||||||
|
unsigned int *table;
|
||||||
|
unsigned int next_temp_idx;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count)
|
||||||
|
{
|
||||||
|
size_t i = ssa_count * sizeof(*alloc->table);
|
||||||
|
|
||||||
|
if (!(alloc->table = vkd3d_malloc(i)))
|
||||||
|
{
|
||||||
|
ERR("Failed to allocate SSA table.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
memset(alloc->table, 0xff, i);
|
||||||
|
|
||||||
|
alloc->next_temp_idx = temp_count;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* This is idempotent: it can be safely applied more than once on the
|
/* This is idempotent: it can be safely applied more than once on the
|
||||||
* same register. */
|
* same register. */
|
||||||
static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg)
|
static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc,
|
||||||
|
struct vkd3d_shader_register *reg)
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
if (reg->type == VKD3DSPR_SSA)
|
if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX)
|
||||||
{
|
{
|
||||||
reg->type = VKD3DSPR_TEMP;
|
reg->type = VKD3DSPR_TEMP;
|
||||||
reg->idx[0].offset += program->temp_count;
|
reg->idx[0].offset = alloc->table[reg->idx[0].offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < reg->idx_count; ++i)
|
for (i = 0; i < reg->idx_count; ++i)
|
||||||
if (reg->idx[i].rel_addr)
|
if (reg->idx[i].rel_addr)
|
||||||
materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr);
|
materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg);
|
||||||
}
|
|
||||||
|
|
||||||
static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program,
|
|
||||||
struct vkd3d_shader_dst_param *dst)
|
|
||||||
{
|
|
||||||
materialize_ssas_to_temps_process_reg(program, &dst->reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program,
|
|
||||||
struct vkd3d_shader_src_param *src)
|
|
||||||
{
|
|
||||||
materialize_ssas_to_temps_process_reg(program, &src->reg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ssas_to_temps_block_info
|
struct ssas_to_temps_block_info
|
||||||
@ -2740,11 +2747,12 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl
|
|||||||
vkd3d_free(block_info);
|
vkd3d_free(block_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program)
|
static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program)
|
||||||
{
|
{
|
||||||
size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i;
|
size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i;
|
||||||
struct ssas_to_temps_block_info *info, *block_info = NULL;
|
struct ssas_to_temps_block_info *info, *block_info = NULL;
|
||||||
struct vkd3d_shader_instruction *instructions = NULL;
|
struct vkd3d_shader_instruction *instructions = NULL;
|
||||||
|
struct ssas_to_temps_alloc alloc = {0};
|
||||||
unsigned int current_label = 0;
|
unsigned int current_label = 0;
|
||||||
|
|
||||||
if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info))))
|
if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info))))
|
||||||
@ -2753,15 +2761,22 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i)
|
for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i)
|
||||||
{
|
{
|
||||||
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
||||||
unsigned int j;
|
unsigned int j, temp_idx;
|
||||||
|
|
||||||
|
/* Only phi src/dst SSA values need be converted here. Structurisation may
|
||||||
|
* introduce new cases of undominated SSA use, which will be handled later. */
|
||||||
if (ins->handler_idx != VKD3DSIH_PHI)
|
if (ins->handler_idx != VKD3DSIH_PHI)
|
||||||
continue;
|
continue;
|
||||||
++phi_count;
|
++phi_count;
|
||||||
|
|
||||||
|
temp_idx = alloc.next_temp_idx++;
|
||||||
|
|
||||||
for (j = 0; j < ins->src_count; j += 2)
|
for (j = 0; j < ins->src_count; j += 2)
|
||||||
{
|
{
|
||||||
struct phi_incoming_to_temp *incoming;
|
struct phi_incoming_to_temp *incoming;
|
||||||
@ -2780,12 +2795,17 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
|||||||
incoming->src = &ins->src[j];
|
incoming->src = &ins->src[j];
|
||||||
incoming->dst = ins->dst;
|
incoming->dst = ins->dst;
|
||||||
|
|
||||||
|
alloc.table[ins->dst->reg.idx[0].offset] = temp_idx;
|
||||||
|
|
||||||
++incoming_count;
|
++incoming_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
materialize_ssas_to_temps_process_dst_param(program, ins->dst);
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!phi_count)
|
||||||
|
goto done;
|
||||||
|
|
||||||
if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count))
|
if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
@ -2795,10 +2815,10 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
|||||||
size_t j;
|
size_t j;
|
||||||
|
|
||||||
for (j = 0; j < ins->dst_count; ++j)
|
for (j = 0; j < ins->dst_count; ++j)
|
||||||
materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]);
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
|
||||||
|
|
||||||
for (j = 0; j < ins->src_count; ++j)
|
for (j = 0; j < ins->src_count; ++j)
|
||||||
materialize_ssas_to_temps_process_src_param(program, &ins->src[j]);
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
|
||||||
|
|
||||||
switch (ins->handler_idx)
|
switch (ins->handler_idx)
|
||||||
{
|
{
|
||||||
@ -2836,16 +2856,17 @@ static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_prog
|
|||||||
program->instructions.elements = instructions;
|
program->instructions.elements = instructions;
|
||||||
program->instructions.capacity = ins_capacity;
|
program->instructions.capacity = ins_capacity;
|
||||||
program->instructions.count = ins_count;
|
program->instructions.count = ins_count;
|
||||||
program->temp_count += program->ssa_count;
|
program->temp_count = alloc.next_temp_idx;
|
||||||
program->ssa_count = 0;
|
done:
|
||||||
|
|
||||||
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
||||||
|
vkd3d_free(alloc.table);
|
||||||
|
|
||||||
return VKD3D_OK;
|
return VKD3D_OK;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
vkd3d_free(instructions);
|
vkd3d_free(instructions);
|
||||||
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
||||||
|
vkd3d_free(alloc.table);
|
||||||
|
|
||||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||||
}
|
}
|
||||||
@ -4514,6 +4535,98 @@ fail:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc,
|
||||||
|
struct vsir_block *block, struct vsir_block **origin_blocks)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
if (!register_is_ssa(reg))
|
||||||
|
return;
|
||||||
|
|
||||||
|
i = reg->idx[0].offset;
|
||||||
|
if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block))
|
||||||
|
alloc->table[i] = alloc->next_temp_idx++;
|
||||||
|
|
||||||
|
for (i = 0; i < reg->idx_count; ++i)
|
||||||
|
if (reg->idx[i].rel_addr)
|
||||||
|
register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Drivers are not necessarily optimised to handle very large numbers of temps. For example,
|
||||||
|
* using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV.
|
||||||
|
* This can also result in the backend emitting less code because temps typically need an
|
||||||
|
* access chain and a load/store. Conversion of phi SSA values to temps should eliminate all
|
||||||
|
* undominated SSA use, but structurisation may create new occurrences. */
|
||||||
|
static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg)
|
||||||
|
{
|
||||||
|
struct vsir_program *program = cfg->program;
|
||||||
|
struct ssas_to_temps_alloc alloc = {0};
|
||||||
|
struct vsir_block **origin_blocks;
|
||||||
|
unsigned int j;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks))))
|
||||||
|
{
|
||||||
|
ERR("Failed to allocate origin block array.\n");
|
||||||
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
|
||||||
|
{
|
||||||
|
vkd3d_free(origin_blocks);
|
||||||
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < cfg->block_count; ++i)
|
||||||
|
{
|
||||||
|
struct vsir_block *block = &cfg->blocks[i];
|
||||||
|
struct vkd3d_shader_instruction *ins;
|
||||||
|
|
||||||
|
for (ins = block->begin; ins <= block->end; ++ins)
|
||||||
|
{
|
||||||
|
for (j = 0; j < ins->dst_count; ++j)
|
||||||
|
{
|
||||||
|
if (register_is_ssa(&ins->dst[j].reg))
|
||||||
|
origin_blocks[ins->dst[j].reg.idx[0].offset] = block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < cfg->block_count; ++i)
|
||||||
|
{
|
||||||
|
struct vsir_block *block = &cfg->blocks[i];
|
||||||
|
struct vkd3d_shader_instruction *ins;
|
||||||
|
|
||||||
|
for (ins = block->begin; ins <= block->end; ++ins)
|
||||||
|
{
|
||||||
|
for (j = 0; j < ins->src_count; ++j)
|
||||||
|
register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alloc.next_temp_idx == program->temp_count)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count);
|
||||||
|
|
||||||
|
for (i = 0; i < program->instructions.count; ++i)
|
||||||
|
{
|
||||||
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
||||||
|
|
||||||
|
for (j = 0; j < ins->dst_count; ++j)
|
||||||
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
|
||||||
|
|
||||||
|
for (j = 0; j < ins->src_count; ++j)
|
||||||
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
program->temp_count = alloc.next_temp_idx;
|
||||||
|
done:
|
||||||
|
vkd3d_free(origin_blocks);
|
||||||
|
vkd3d_free(alloc.table);
|
||||||
|
|
||||||
|
return VKD3D_OK;
|
||||||
|
}
|
||||||
|
|
||||||
struct validation_context
|
struct validation_context
|
||||||
{
|
{
|
||||||
struct vkd3d_shader_message_context *message_context;
|
struct vkd3d_shader_message_context *message_context;
|
||||||
@ -5396,7 +5509,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
|
|||||||
if ((result = lower_switch_to_if_ladder(program)) < 0)
|
if ((result = lower_switch_to_if_ladder(program)) < 0)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0)
|
if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0)
|
if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0)
|
||||||
@ -5441,6 +5554,20 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
vsir_cfg_cleanup(&cfg);
|
vsir_cfg_cleanup(&cfg);
|
||||||
|
|
||||||
|
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0)
|
||||||
|
return result;
|
||||||
|
vsir_cfg_compute_dominators(&cfg);
|
||||||
|
|
||||||
|
result = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg);
|
||||||
|
|
||||||
|
vsir_cfg_cleanup(&cfg);
|
||||||
|
|
||||||
|
if (result < 0)
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -5470,10 +5597,10 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
|
|||||||
|
|
||||||
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0)
|
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0)
|
||||||
return result;
|
return result;
|
||||||
}
|
|
||||||
|
|
||||||
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
||||||
return result;
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
if (TRACE_ON())
|
if (TRACE_ON())
|
||||||
vkd3d_shader_trace(program);
|
vkd3d_shader_trace(program);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user