vkd3d-shader/hlsl: Unroll loops with conditional jumps.

This commit is contained in:
Victor Chiletto 2024-10-21 17:17:23 -03:00 committed by Henri Verbeet
parent 351d58a95b
commit a1d995e740
Notes: Henri Verbeet 2024-12-12 17:48:02 +01:00
Approved-by: Francisco Casas (@fcasas)
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1053
4 changed files with 205 additions and 87 deletions

View File

@ -9991,35 +9991,129 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
generate_vsir_scan_global_flags(ctx, program, func);
}
static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_block **found_block)
static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc)
{
struct hlsl_ir_node *node;
struct hlsl_ir_node *const_node, *store;
LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
if (!(const_node = hlsl_new_bool_constant(ctx, val, loc)))
return false;
hlsl_block_add_instr(block, const_node);
if (!(store = hlsl_new_simple_store(ctx, var, const_node)))
return false;
hlsl_block_add_instr(block, store);
return true;
}
static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued);
static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node,
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
{
struct hlsl_ir_jump *jump;
struct hlsl_ir_var *var;
struct hlsl_block draft;
struct hlsl_ir_if *iff;
if (node->type == HLSL_IR_IF)
{
struct hlsl_ir_if *iff = hlsl_ir_if(node);
struct hlsl_ir_jump *jump = NULL;
if ((jump = loop_unrolling_find_jump(&iff->then_block, found_block)))
return jump;
if ((jump = loop_unrolling_find_jump(&iff->else_block, found_block)))
return jump;
iff = hlsl_ir_if(node);
if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued))
return true;
if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued))
return true;
return false;
}
else if (node->type == HLSL_IR_JUMP)
if (node->type == HLSL_IR_JUMP)
{
struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
jump = hlsl_ir_jump(node);
if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK)
return false;
if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE)
hlsl_block_init(&draft);
if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
var = loop_continued;
else
var = loop_broken;
if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc))
return false;
list_move_before(&jump->node.entry, &draft.instrs);
list_remove(&jump->node.entry);
hlsl_free_instr(&jump->node);
return true;
}
return false;
}
static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx,
struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc)
{
*found_block = block;
return jump;
}
}
}
struct hlsl_ir_node *cond, *iff;
struct hlsl_block then_block;
struct hlsl_ir_load *load;
hlsl_block_init(&then_block);
if (!(load = hlsl_new_var_load(ctx, var, loc)))
return NULL;
hlsl_block_add_instr(dst, &load->node);
if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc)))
return NULL;
hlsl_block_add_instr(dst, cond);
if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc)))
return NULL;
hlsl_block_add_instr(dst, iff);
return hlsl_ir_if(iff);
}
static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
{
struct hlsl_ir_node *node, *next;
LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry)
{
struct hlsl_ir_if *broken_check, *continued_check;
struct hlsl_block draft;
if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued))
continue;
if (&next->entry == &block->instrs)
return true;
hlsl_block_init(&draft);
broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc);
continued_check = loop_unrolling_generate_var_check(ctx,
&broken_check->then_block, loop_continued, &next->loc);
list_move_before(&next->entry, &draft.instrs);
list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs));
return true;
}
return false;
}
static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
{
while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued));
}
static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop)
@ -10069,68 +10163,89 @@ static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *blo
*index = current_index;
}
static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var)
{
struct copy_propagation_value *v;
if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX))
|| v->node->type != HLSL_IR_CONSTANT)
return false;
return hlsl_ir_constant(v->node)->value.u[0].u;
}
static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop)
{
struct hlsl_block draft, tmp_dst, loop_body;
struct hlsl_ir_var *broken, *continued;
unsigned int max_iterations, i, index;
struct copy_propagation_state state;
struct hlsl_block draft;
struct hlsl_ir_if *target_if;
if (!(broken = hlsl_new_synthetic_var(ctx, "broken",
hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
goto fail;
if (!(continued = hlsl_new_synthetic_var(ctx, "continued",
hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
goto fail;
hlsl_block_init(&draft);
copy_propagation_state_init(&state, ctx);
hlsl_block_init(&tmp_dst);
max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
copy_propagation_state_init(&state, ctx);
index = 2;
state.stop = &loop->node;
loop_unrolling_simplify(ctx, block, &state, &index);
state.stopped = false;
index = loop->node.index;
max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc))
goto fail;
hlsl_block_add_block(&draft, &tmp_dst);
if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
goto fail;
hlsl_block_add_block(&draft, &tmp_dst);
if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
goto fail;
state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry);
hlsl_block_add_block(&draft, &tmp_dst);
copy_propagation_push_scope(&state, ctx);
loop_unrolling_simplify(ctx, &draft, &state, &index);
/* As an optimization, we only remove jumps from the loop's body once. */
if (!hlsl_clone_block(ctx, &loop_body, &loop->body))
goto fail;
loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued);
for (i = 0; i < max_iterations; ++i)
{
struct hlsl_block tmp_dst, *jump_block;
struct hlsl_ir_jump *jump = NULL;
copy_propagation_push_scope(&state, ctx);
if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body))
if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
goto fail;
hlsl_block_add_block(&target_if->then_block, &tmp_dst);
loop_unrolling_simplify(ctx, &tmp_dst, &state, &index);
if ((jump = loop_unrolling_find_jump(&tmp_dst, &jump_block)))
{
enum hlsl_ir_jump_type type = jump->type;
struct hlsl_block dump;
if (jump_block != &tmp_dst)
{
if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
"Unable to unroll loop, unrolling loops with conditional jumps is currently not supported.");
hlsl_block_cleanup(&tmp_dst);
if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body))
goto fail;
}
hlsl_block_add_block(&target_if->then_block, &tmp_dst);
hlsl_block_init(&dump);
list_move_slice_tail(&dump.instrs, &jump->node.entry, list_tail(&tmp_dst.instrs));
hlsl_block_cleanup(&dump);
loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index);
if (type == HLSL_IR_JUMP_BREAK)
{
hlsl_block_add_block(&draft, &tmp_dst);
hlsl_block_cleanup(&tmp_dst);
if (loop_unrolling_check_val(&state, broken))
break;
}
}
/* We have to run copy propagation again as the state might have
* references to nodes that were deleted above. */
copy_propagation_pop_scope(&state);
copy_propagation_push_scope(&state, ctx);
loop_unrolling_simplify(ctx, &tmp_dst, &state, &index);
if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
goto fail;
hlsl_block_add_block(&draft, &tmp_dst);
hlsl_block_cleanup(&tmp_dst);
if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter))
goto fail;
hlsl_block_add_block(&target_if->then_block, &tmp_dst);
}
/* Native will not emit an error if max_iterations has been reached with an
@ -10144,15 +10259,18 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *
goto fail;
}
hlsl_block_cleanup(&loop_body);
copy_propagation_state_destroy(&state);
list_move_before(&loop->node.entry, &draft.instrs);
hlsl_block_cleanup(&draft);
list_remove(&loop->node.entry);
hlsl_free_instr(&loop->node);
copy_propagation_state_destroy(&state);
return true;
fail:
hlsl_block_cleanup(&loop_body);
copy_propagation_state_destroy(&state);
hlsl_block_cleanup(&draft);
@ -10286,9 +10404,9 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc
} while (progress);
hlsl_transform_ir(ctx, split_matrix_copies, block, NULL);
hlsl_transform_ir(ctx, unroll_loops, block, block);
resolve_continues(ctx, block, NULL);
hlsl_transform_ir(ctx, resolve_loops, block, NULL);
hlsl_transform_ir(ctx, unroll_loops, block, block);
}
static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)

View File

@ -5,7 +5,7 @@ void main(float4 pos : position, out float tex : texcoord, out float4 out_pos :
out_pos = pos;
}
[pixel shader todo(sm<6)]
[pixel shader todo(sm<4)]
float4 main(float tex : texcoord) : sv_target
{
int i;
@ -23,7 +23,7 @@ float4 main(float tex : texcoord) : sv_target
}
[test]
todo(sm<6) draw quad
todo(sm<4) draw quad
probe ( 0, 0, 159, 480) rgba (10.0, 35.0, 0.0, 0.0)
probe (161, 0, 479, 480) rgba (10.0, 38.0, 0.0, 0.0)
probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0)

View File

@ -235,23 +235,23 @@ float4 main() : sv_target
[test]
uniform 0 float 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.2, 0.3, 0.3) 1
uniform 0 float 0.1
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.3, 0.3, 0.3) 1
uniform 0 float 0.3
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.5, 0.3, 0.3) 1
uniform 0 float 0.7
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.9, 0.7, 0.6) 1
uniform 0 float 0.9
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.4, 0.1, 0.7, 0.6) 1
[pixel shader todo(sm<4)]
@ -291,21 +291,21 @@ float4 main() : sv_target
uniform 0 float4 0.3 0.0 0.0 0.0
uniform 4 float4 0.0 0.0 0.0 0.0
uniform 8 float4 0.1 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.2, 0.6, 0.6) 1
uniform 4 float4 0.35 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.3, 0.6, 0.6) 1
uniform 8 float4 0.5 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 4 float4 2.0 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.4, 0.1, 0.6, 0.6) 1

View File

@ -160,23 +160,23 @@ void main(out float4 ret : sv_target)
[test]
uniform 0 float 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 0 float 0.1
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 0 float 0.3
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float 0.7
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.8, 0.8, 0.8, 0.8) 1
uniform 0 float 0.9
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.9, 0.9, 0.9, 0.9) 1
[pixel shader todo(sm<4)]
@ -236,21 +236,21 @@ void main(out float4 ret : sv_target)
uniform 0 float4 0.3 0.0 0.0 0.0
uniform 4 float4 0.0 0.0 0.0 0.0
uniform 8 float4 0.1 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 4 float4 0.35 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 8 float4 0.5 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 4 float4 2.0 0.0 0.0 0.0
todo(sm<4 | msl) draw quad
todo(sm<4) draw quad
probe (0, 0) rgba (0.9, 0.9, 0.9, 0.9) 1