vkd3d-shader/hlsl: Do not clone the entire program for loop unrolling.

This commit is contained in:
Victor Chiletto 2024-10-14 10:14:13 -03:00 committed by Henri Verbeet
parent 0a8c4a6fa2
commit de3a365fea
Notes: Henri Verbeet 2024-12-12 17:48:02 +01:00
Approved-by: Francisco Casas (@fcasas)
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1053

View File

@ -10068,12 +10068,13 @@ static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *blo
*index = current_index; *index = current_index;
} }
static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop)
struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop)
{ {
unsigned int max_iterations, i, index; unsigned int max_iterations, i, index;
struct copy_propagation_state state; struct copy_propagation_state state;
struct hlsl_block draft;
hlsl_block_init(&draft);
copy_propagation_state_init(&state, ctx); copy_propagation_state_init(&state, ctx);
index = 2; index = 2;
@ -10116,7 +10117,7 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *
if (type == HLSL_IR_JUMP_BREAK) if (type == HLSL_IR_JUMP_BREAK)
{ {
list_move_before(&loop->node.entry, &tmp_dst.instrs); hlsl_block_add_block(&draft, &tmp_dst);
hlsl_block_cleanup(&tmp_dst); hlsl_block_cleanup(&tmp_dst);
break; break;
} }
@ -10127,7 +10128,7 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *
copy_propagation_pop_scope(&state); copy_propagation_pop_scope(&state);
copy_propagation_push_scope(&state, ctx); copy_propagation_push_scope(&state, ctx);
loop_unrolling_simplify(ctx, &tmp_dst, &state, &index); loop_unrolling_simplify(ctx, &tmp_dst, &state, &index);
list_move_before(&loop->node.entry, &tmp_dst.instrs); hlsl_block_add_block(&draft, &tmp_dst);
hlsl_block_cleanup(&tmp_dst); hlsl_block_cleanup(&tmp_dst);
} }
@ -10142,6 +10143,8 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *
goto fail; goto fail;
} }
list_move_before(&loop->node.entry, &draft.instrs);
hlsl_block_cleanup(&draft);
list_remove(&loop->node.entry); list_remove(&loop->node.entry);
hlsl_free_instr(&loop->node); hlsl_free_instr(&loop->node);
copy_propagation_state_destroy(&state); copy_propagation_state_destroy(&state);
@ -10150,6 +10153,7 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *
fail: fail:
copy_propagation_state_destroy(&state); copy_propagation_state_destroy(&state);
hlsl_block_cleanup(&draft);
return false; return false;
} }
@ -10173,8 +10177,7 @@ fail:
* "copyprop from the beginning of the program up to the instruction we're * "copyprop from the beginning of the program up to the instruction we're
* currently processing" from the callback]; we'd have to use a dedicated * currently processing" from the callback]; we'd have to use a dedicated
* recursive function instead. */ * recursive function instead. */
static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block)
struct hlsl_block **containing_block)
{ {
struct hlsl_ir_node *instr; struct hlsl_ir_node *instr;
@ -10187,14 +10190,11 @@ static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx
struct hlsl_ir_loop *nested_loop; struct hlsl_ir_loop *nested_loop;
struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body)))
return nested_loop; return nested_loop;
if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
{
*containing_block = block;
return loop; return loop;
}
break; break;
} }
@ -10203,9 +10203,9 @@ static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx
struct hlsl_ir_loop *loop; struct hlsl_ir_loop *loop;
struct hlsl_ir_if *iff = hlsl_ir_if(instr); struct hlsl_ir_if *iff = hlsl_ir_if(instr);
if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block)))
return loop; return loop;
if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block)))
return loop; return loop;
break; break;
@ -10218,7 +10218,7 @@ static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
{ {
if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body)))
return loop; return loop;
} }
@ -10247,28 +10247,13 @@ static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *bloc
while (true) while (true)
{ {
struct hlsl_block clone, *containing_block; struct hlsl_ir_loop *loop = NULL;
struct hlsl_ir_loop *loop, *cloned_loop;
if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block)))
return; return;
if (!hlsl_clone_block(ctx, &clone, block)) if (!loop_unrolling_unroll_loop(ctx, block, loop))
return;
cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block);
VKD3D_ASSERT(cloned_loop);
if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop))
{
hlsl_block_cleanup(&clone);
loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP;
continue;
}
hlsl_block_cleanup(block);
hlsl_block_init(block);
hlsl_block_add_block(block, &clone);
} }
} }