mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-01-28 13:05:02 -08:00
vkd3d-shader/hlsl: Unroll loops with conditional jumps.
This commit is contained in:
parent
351d58a95b
commit
a1d995e740
Notes:
Henri Verbeet
2024-12-12 17:48:02 +01:00
Approved-by: Francisco Casas (@fcasas) Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1053
@ -9991,35 +9991,129 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
|
||||
generate_vsir_scan_global_flags(ctx, program, func);
|
||||
}
|
||||
|
||||
static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_block **found_block)
|
||||
static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
|
||||
bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc)
|
||||
{
|
||||
struct hlsl_ir_node *node;
|
||||
struct hlsl_ir_node *const_node, *store;
|
||||
|
||||
LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
|
||||
if (!(const_node = hlsl_new_bool_constant(ctx, val, loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, const_node);
|
||||
|
||||
if (!(store = hlsl_new_simple_store(ctx, var, const_node)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, store);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued);
|
||||
|
||||
static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node,
|
||||
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
|
||||
{
|
||||
struct hlsl_ir_jump *jump;
|
||||
struct hlsl_ir_var *var;
|
||||
struct hlsl_block draft;
|
||||
struct hlsl_ir_if *iff;
|
||||
|
||||
if (node->type == HLSL_IR_IF)
|
||||
{
|
||||
if (node->type == HLSL_IR_IF)
|
||||
{
|
||||
struct hlsl_ir_if *iff = hlsl_ir_if(node);
|
||||
struct hlsl_ir_jump *jump = NULL;
|
||||
|
||||
if ((jump = loop_unrolling_find_jump(&iff->then_block, found_block)))
|
||||
return jump;
|
||||
if ((jump = loop_unrolling_find_jump(&iff->else_block, found_block)))
|
||||
return jump;
|
||||
}
|
||||
else if (node->type == HLSL_IR_JUMP)
|
||||
{
|
||||
struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
|
||||
|
||||
if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE)
|
||||
{
|
||||
*found_block = block;
|
||||
return jump;
|
||||
}
|
||||
}
|
||||
iff = hlsl_ir_if(node);
|
||||
if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued))
|
||||
return true;
|
||||
if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
if (node->type == HLSL_IR_JUMP)
|
||||
{
|
||||
jump = hlsl_ir_jump(node);
|
||||
if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK)
|
||||
return false;
|
||||
|
||||
hlsl_block_init(&draft);
|
||||
|
||||
if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
|
||||
var = loop_continued;
|
||||
else
|
||||
var = loop_broken;
|
||||
|
||||
if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc))
|
||||
return false;
|
||||
|
||||
list_move_before(&jump->node.entry, &draft.instrs);
|
||||
list_remove(&jump->node.entry);
|
||||
hlsl_free_instr(&jump->node);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx,
|
||||
struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc)
|
||||
{
|
||||
struct hlsl_ir_node *cond, *iff;
|
||||
struct hlsl_block then_block;
|
||||
struct hlsl_ir_load *load;
|
||||
|
||||
hlsl_block_init(&then_block);
|
||||
|
||||
if (!(load = hlsl_new_var_load(ctx, var, loc)))
|
||||
return NULL;
|
||||
hlsl_block_add_instr(dst, &load->node);
|
||||
|
||||
if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc)))
|
||||
return NULL;
|
||||
hlsl_block_add_instr(dst, cond);
|
||||
|
||||
if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc)))
|
||||
return NULL;
|
||||
hlsl_block_add_instr(dst, iff);
|
||||
|
||||
return hlsl_ir_if(iff);
|
||||
}
|
||||
|
||||
static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
|
||||
{
|
||||
struct hlsl_ir_node *node, *next;
|
||||
|
||||
LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry)
|
||||
{
|
||||
struct hlsl_ir_if *broken_check, *continued_check;
|
||||
struct hlsl_block draft;
|
||||
|
||||
if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued))
|
||||
continue;
|
||||
|
||||
if (&next->entry == &block->instrs)
|
||||
return true;
|
||||
|
||||
hlsl_block_init(&draft);
|
||||
|
||||
broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc);
|
||||
continued_check = loop_unrolling_generate_var_check(ctx,
|
||||
&broken_check->then_block, loop_continued, &next->loc);
|
||||
|
||||
list_move_before(&next->entry, &draft.instrs);
|
||||
|
||||
list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||
struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
|
||||
{
|
||||
while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued));
|
||||
}
|
||||
|
||||
static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop)
|
||||
@ -10069,69 +10163,90 @@ static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *blo
|
||||
*index = current_index;
|
||||
}
|
||||
|
||||
static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var)
|
||||
{
|
||||
struct copy_propagation_value *v;
|
||||
|
||||
if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX))
|
||||
|| v->node->type != HLSL_IR_CONSTANT)
|
||||
return false;
|
||||
|
||||
return hlsl_ir_constant(v->node)->value.u[0].u;
|
||||
}
|
||||
|
||||
static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop)
|
||||
{
|
||||
struct hlsl_block draft, tmp_dst, loop_body;
|
||||
struct hlsl_ir_var *broken, *continued;
|
||||
unsigned int max_iterations, i, index;
|
||||
struct copy_propagation_state state;
|
||||
struct hlsl_block draft;
|
||||
struct hlsl_ir_if *target_if;
|
||||
|
||||
if (!(broken = hlsl_new_synthetic_var(ctx, "broken",
|
||||
hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
|
||||
goto fail;
|
||||
|
||||
if (!(continued = hlsl_new_synthetic_var(ctx, "continued",
|
||||
hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
|
||||
goto fail;
|
||||
|
||||
hlsl_block_init(&draft);
|
||||
copy_propagation_state_init(&state, ctx);
|
||||
hlsl_block_init(&tmp_dst);
|
||||
|
||||
max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
|
||||
copy_propagation_state_init(&state, ctx);
|
||||
index = 2;
|
||||
state.stop = &loop->node;
|
||||
loop_unrolling_simplify(ctx, block, &state, &index);
|
||||
state.stopped = false;
|
||||
index = loop->node.index;
|
||||
|
||||
max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
|
||||
if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc))
|
||||
goto fail;
|
||||
hlsl_block_add_block(&draft, &tmp_dst);
|
||||
|
||||
if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
|
||||
goto fail;
|
||||
hlsl_block_add_block(&draft, &tmp_dst);
|
||||
|
||||
if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
|
||||
goto fail;
|
||||
state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry);
|
||||
hlsl_block_add_block(&draft, &tmp_dst);
|
||||
|
||||
copy_propagation_push_scope(&state, ctx);
|
||||
loop_unrolling_simplify(ctx, &draft, &state, &index);
|
||||
|
||||
/* As an optimization, we only remove jumps from the loop's body once. */
|
||||
if (!hlsl_clone_block(ctx, &loop_body, &loop->body))
|
||||
goto fail;
|
||||
loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued);
|
||||
|
||||
for (i = 0; i < max_iterations; ++i)
|
||||
{
|
||||
struct hlsl_block tmp_dst, *jump_block;
|
||||
struct hlsl_ir_jump *jump = NULL;
|
||||
|
||||
copy_propagation_push_scope(&state, ctx);
|
||||
|
||||
if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body))
|
||||
if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
|
||||
goto fail;
|
||||
hlsl_block_add_block(&target_if->then_block, &tmp_dst);
|
||||
|
||||
loop_unrolling_simplify(ctx, &tmp_dst, &state, &index);
|
||||
if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body))
|
||||
goto fail;
|
||||
hlsl_block_add_block(&target_if->then_block, &tmp_dst);
|
||||
|
||||
if ((jump = loop_unrolling_find_jump(&tmp_dst, &jump_block)))
|
||||
{
|
||||
enum hlsl_ir_jump_type type = jump->type;
|
||||
struct hlsl_block dump;
|
||||
loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index);
|
||||
|
||||
if (jump_block != &tmp_dst)
|
||||
{
|
||||
if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
|
||||
hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
|
||||
"Unable to unroll loop, unrolling loops with conditional jumps is currently not supported.");
|
||||
hlsl_block_cleanup(&tmp_dst);
|
||||
goto fail;
|
||||
}
|
||||
if (loop_unrolling_check_val(&state, broken))
|
||||
break;
|
||||
|
||||
hlsl_block_init(&dump);
|
||||
list_move_slice_tail(&dump.instrs, &jump->node.entry, list_tail(&tmp_dst.instrs));
|
||||
hlsl_block_cleanup(&dump);
|
||||
|
||||
if (type == HLSL_IR_JUMP_BREAK)
|
||||
{
|
||||
hlsl_block_add_block(&draft, &tmp_dst);
|
||||
hlsl_block_cleanup(&tmp_dst);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* We have to run copy propagation again as the state might have
|
||||
* references to nodes that were deleted above. */
|
||||
copy_propagation_pop_scope(&state);
|
||||
copy_propagation_push_scope(&state, ctx);
|
||||
loop_unrolling_simplify(ctx, &tmp_dst, &state, &index);
|
||||
if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
|
||||
goto fail;
|
||||
hlsl_block_add_block(&draft, &tmp_dst);
|
||||
hlsl_block_cleanup(&tmp_dst);
|
||||
}
|
||||
|
||||
if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter))
|
||||
goto fail;
|
||||
hlsl_block_add_block(&target_if->then_block, &tmp_dst);
|
||||
}
|
||||
|
||||
/* Native will not emit an error if max_iterations has been reached with an
|
||||
* explicit limit. It also will not insert a loop if there are iterations left
|
||||
@ -10144,15 +10259,18 @@ static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *
|
||||
goto fail;
|
||||
}
|
||||
|
||||
hlsl_block_cleanup(&loop_body);
|
||||
copy_propagation_state_destroy(&state);
|
||||
|
||||
list_move_before(&loop->node.entry, &draft.instrs);
|
||||
hlsl_block_cleanup(&draft);
|
||||
list_remove(&loop->node.entry);
|
||||
hlsl_free_instr(&loop->node);
|
||||
copy_propagation_state_destroy(&state);
|
||||
|
||||
return true;
|
||||
|
||||
fail:
|
||||
hlsl_block_cleanup(&loop_body);
|
||||
copy_propagation_state_destroy(&state);
|
||||
hlsl_block_cleanup(&draft);
|
||||
|
||||
@ -10286,9 +10404,9 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc
|
||||
} while (progress);
|
||||
hlsl_transform_ir(ctx, split_matrix_copies, block, NULL);
|
||||
|
||||
hlsl_transform_ir(ctx, unroll_loops, block, block);
|
||||
resolve_continues(ctx, block, NULL);
|
||||
hlsl_transform_ir(ctx, resolve_loops, block, NULL);
|
||||
hlsl_transform_ir(ctx, unroll_loops, block, block);
|
||||
}
|
||||
|
||||
static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
|
||||
|
@ -5,7 +5,7 @@ void main(float4 pos : position, out float tex : texcoord, out float4 out_pos :
|
||||
out_pos = pos;
|
||||
}
|
||||
|
||||
[pixel shader todo(sm<6)]
|
||||
[pixel shader todo(sm<4)]
|
||||
float4 main(float tex : texcoord) : sv_target
|
||||
{
|
||||
int i;
|
||||
@ -23,7 +23,7 @@ float4 main(float tex : texcoord) : sv_target
|
||||
}
|
||||
|
||||
[test]
|
||||
todo(sm<6) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe ( 0, 0, 159, 480) rgba (10.0, 35.0, 0.0, 0.0)
|
||||
probe (161, 0, 479, 480) rgba (10.0, 38.0, 0.0, 0.0)
|
||||
probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0)
|
||||
|
@ -235,23 +235,23 @@ float4 main() : sv_target
|
||||
|
||||
[test]
|
||||
uniform 0 float 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.2, 0.3, 0.3) 1
|
||||
|
||||
uniform 0 float 0.1
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.3, 0.3, 0.3) 1
|
||||
|
||||
uniform 0 float 0.3
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.5, 0.3, 0.3) 1
|
||||
|
||||
uniform 0 float 0.7
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.9, 0.7, 0.6) 1
|
||||
|
||||
uniform 0 float 0.9
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.4, 0.1, 0.7, 0.6) 1
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
@ -291,21 +291,21 @@ float4 main() : sv_target
|
||||
uniform 0 float4 0.3 0.0 0.0 0.0
|
||||
uniform 4 float4 0.0 0.0 0.0 0.0
|
||||
uniform 8 float4 0.1 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.2, 0.6, 0.6) 1
|
||||
|
||||
uniform 4 float4 0.35 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.3, 0.6, 0.6) 1
|
||||
|
||||
uniform 8 float4 0.5 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.5, 0.6, 0.6) 1
|
||||
|
||||
uniform 0 float4 1.0 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.3, 0.5, 0.6, 0.6) 1
|
||||
|
||||
uniform 4 float4 2.0 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.4, 0.1, 0.6, 0.6) 1
|
||||
|
@ -160,23 +160,23 @@ void main(out float4 ret : sv_target)
|
||||
|
||||
[test]
|
||||
uniform 0 float 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.1, 0.1, 0.1, 0.1) 1
|
||||
|
||||
uniform 0 float 0.1
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.2, 0.2, 0.2, 0.2) 1
|
||||
|
||||
uniform 0 float 0.3
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.4, 0.4, 0.4, 0.4) 1
|
||||
|
||||
uniform 0 float 0.7
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.8, 0.8, 0.8, 0.8) 1
|
||||
|
||||
uniform 0 float 0.9
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.9, 0.9, 0.9, 0.9) 1
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
@ -236,21 +236,21 @@ void main(out float4 ret : sv_target)
|
||||
uniform 0 float4 0.3 0.0 0.0 0.0
|
||||
uniform 4 float4 0.0 0.0 0.0 0.0
|
||||
uniform 8 float4 0.1 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.1, 0.1, 0.1, 0.1) 1
|
||||
|
||||
uniform 4 float4 0.35 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.2, 0.2, 0.2, 0.2) 1
|
||||
|
||||
uniform 8 float4 0.5 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.4, 0.4, 0.4, 0.4) 1
|
||||
|
||||
uniform 0 float4 1.0 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.4, 0.4, 0.4, 0.4) 1
|
||||
|
||||
uniform 4 float4 2.0 0.0 0.0 0.0
|
||||
todo(sm<4 | msl) draw quad
|
||||
todo(sm<4) draw quad
|
||||
probe (0, 0) rgba (0.9, 0.9, 0.9, 0.9) 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user