mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-01-28 13:05:02 -08:00
vkd3d-shader/hlsl: Implement loop unrolling.
Based on a patch by Nikolay Sivov. Co-authored-by: Nikolay Sivov <nsivov@codeweavers.com>
This commit is contained in:
parent
7edd7dcf79
commit
2034a8bab9
Notes:
Henri Verbeet
2024-07-11 00:40:50 +02:00
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/786
@ -5710,6 +5710,222 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
|
|||||||
sm1_generate_vsir_signature(ctx, program);
|
sm1_generate_vsir_signature(ctx, program);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point,
|
||||||
|
struct hlsl_block **found_block)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_node *node;
|
||||||
|
|
||||||
|
LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
|
||||||
|
{
|
||||||
|
if (node == stop_point)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (node->type == HLSL_IR_IF)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_if *iff = hlsl_ir_if(node);
|
||||||
|
struct hlsl_ir_jump *jump = NULL;
|
||||||
|
|
||||||
|
if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block)))
|
||||||
|
return jump;
|
||||||
|
if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block)))
|
||||||
|
return jump;
|
||||||
|
}
|
||||||
|
else if (node->type == HLSL_IR_JUMP)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
|
||||||
|
|
||||||
|
if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE)
|
||||||
|
{
|
||||||
|
*found_block = block;
|
||||||
|
return jump;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop)
|
||||||
|
{
|
||||||
|
/* Always use the explicit limit if it has been passed. */
|
||||||
|
if (loop->unroll_limit)
|
||||||
|
return loop->unroll_limit;
|
||||||
|
|
||||||
|
/* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */
|
||||||
|
if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
|
||||||
|
return 1024;
|
||||||
|
|
||||||
|
/* SM4 limits implicit unrolling to 254 iterations. */
|
||||||
|
if (hlsl_version_ge(ctx, 4, 0))
|
||||||
|
return 254;
|
||||||
|
|
||||||
|
/* SM<3 implicitly unrolls up to 1024 iterations. */
|
||||||
|
return 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||||
|
struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop)
|
||||||
|
{
|
||||||
|
unsigned int max_iterations, i;
|
||||||
|
|
||||||
|
max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
|
||||||
|
|
||||||
|
for (i = 0; i < max_iterations; ++i)
|
||||||
|
{
|
||||||
|
struct hlsl_block tmp_dst, *jump_block;
|
||||||
|
struct hlsl_ir_jump *jump = NULL;
|
||||||
|
|
||||||
|
if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body))
|
||||||
|
return false;
|
||||||
|
list_move_before(&loop->node.entry, &tmp_dst.instrs);
|
||||||
|
hlsl_block_cleanup(&tmp_dst);
|
||||||
|
|
||||||
|
hlsl_run_const_passes(ctx, block);
|
||||||
|
|
||||||
|
if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block)))
|
||||||
|
{
|
||||||
|
enum hlsl_ir_jump_type type = jump->type;
|
||||||
|
|
||||||
|
if (jump_block != loop_parent)
|
||||||
|
{
|
||||||
|
if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
|
||||||
|
hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
|
||||||
|
"Unable to unroll loop, unrolling loops with conditional jumps is currently not supported.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry));
|
||||||
|
hlsl_block_cleanup(&tmp_dst);
|
||||||
|
|
||||||
|
if (type == HLSL_IR_JUMP_BREAK)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Native will not emit an error if max_iterations has been reached with an
|
||||||
|
* explicit limit. It also will not insert a loop if there are iterations left
|
||||||
|
* i.e [unroll(4)] for (i = 0; i < 8; ++i)) */
|
||||||
|
if (!loop->unroll_limit && i == max_iterations)
|
||||||
|
{
|
||||||
|
if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
|
||||||
|
hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
|
||||||
|
"Unable to unroll loop, maximum iterations reached (%u).", max_iterations);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_remove(&loop->node.entry);
|
||||||
|
hlsl_free_instr(&loop->node);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* loop_unrolling_find_unrollable_loop() is not the normal way to do things;
|
||||||
|
* normal passes simply iterate over the whole block and apply a transformation
|
||||||
|
* to every relevant instruction. However, loop unrolling can fail, and we want
|
||||||
|
* to leave the loop in its previous state in that case. That isn't a problem by
|
||||||
|
* itself, except that loop unrolling needs copy-prop in order to work properly,
|
||||||
|
* and copy-prop state at the time of the loop depends on the rest of the program
|
||||||
|
* up to that point. This means we need to clone the whole program, and at that
|
||||||
|
* point we have to search it again anyway to find the clone of the loop we were
|
||||||
|
* going to unroll.
|
||||||
|
*
|
||||||
|
* FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop
|
||||||
|
* up until the loop instruction, clone just that loop, then use copyprop again
|
||||||
|
* with the saved state after unrolling. However, copyprop currently isn't built
|
||||||
|
* for that yet [notably, it still relies on indices]. Note also this still doesn't
|
||||||
|
* really let us use transform_ir() anyway [since we don't have a good way to say
|
||||||
|
* "copyprop from the beginning of the program up to the instruction we're
|
||||||
|
* currently processing" from the callback]; we'd have to use a dedicated
|
||||||
|
* recursive function instead. */
|
||||||
|
static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block,
|
||||||
|
struct hlsl_block **containing_block)
|
||||||
|
{
|
||||||
|
struct hlsl_ir_node *instr;
|
||||||
|
|
||||||
|
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
|
||||||
|
{
|
||||||
|
switch (instr->type)
|
||||||
|
{
|
||||||
|
case HLSL_IR_LOOP:
|
||||||
|
{
|
||||||
|
struct hlsl_ir_loop *nested_loop;
|
||||||
|
struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
|
||||||
|
|
||||||
|
if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block)))
|
||||||
|
return nested_loop;
|
||||||
|
|
||||||
|
if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL)
|
||||||
|
{
|
||||||
|
*containing_block = block;
|
||||||
|
return loop;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case HLSL_IR_IF:
|
||||||
|
{
|
||||||
|
struct hlsl_ir_loop *loop;
|
||||||
|
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
|
||||||
|
|
||||||
|
if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block)))
|
||||||
|
return loop;
|
||||||
|
if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block)))
|
||||||
|
return loop;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case HLSL_IR_SWITCH:
|
||||||
|
{
|
||||||
|
struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
|
||||||
|
struct hlsl_ir_switch_case *c;
|
||||||
|
struct hlsl_ir_loop *loop;
|
||||||
|
|
||||||
|
LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
|
||||||
|
{
|
||||||
|
if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block)))
|
||||||
|
return loop;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
struct hlsl_block clone, *containing_block;
|
||||||
|
struct hlsl_ir_loop *loop, *cloned_loop;
|
||||||
|
|
||||||
|
if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!hlsl_clone_block(ctx, &clone, block))
|
||||||
|
return;
|
||||||
|
|
||||||
|
cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block);
|
||||||
|
assert(cloned_loop);
|
||||||
|
|
||||||
|
if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop))
|
||||||
|
{
|
||||||
|
hlsl_block_cleanup(&clone);
|
||||||
|
loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
hlsl_block_cleanup(block);
|
||||||
|
hlsl_block_init(block);
|
||||||
|
hlsl_block_add_block(block, &clone);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
|
||||||
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out)
|
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out)
|
||||||
{
|
{
|
||||||
@ -5796,6 +6012,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
|
|||||||
hlsl_transform_ir(ctx, lower_discard_neg, body, NULL);
|
hlsl_transform_ir(ctx, lower_discard_neg, body, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transform_unroll_loops(ctx, body);
|
||||||
hlsl_run_const_passes(ctx, body);
|
hlsl_run_const_passes(ctx, body);
|
||||||
|
|
||||||
remove_unreachable_code(ctx, body);
|
remove_unreachable_code(ctx, body);
|
||||||
|
@ -152,6 +152,7 @@ enum vkd3d_shader_error
|
|||||||
VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029,
|
VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029,
|
||||||
VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030,
|
VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030,
|
||||||
VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031,
|
VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031,
|
||||||
|
VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032,
|
||||||
|
|
||||||
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300,
|
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300,
|
||||||
VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
|
VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
|
||||||
|
@ -5,7 +5,7 @@ void main(float4 pos : position, out float tex : texcoord, out float4 out_pos :
|
|||||||
out_pos = pos;
|
out_pos = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader todo(sm<6)]
|
||||||
float4 main(float tex : texcoord) : sv_target
|
float4 main(float tex : texcoord) : sv_target
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -23,7 +23,7 @@ float4 main(float tex : texcoord) : sv_target
|
|||||||
}
|
}
|
||||||
|
|
||||||
[test]
|
[test]
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(sm<6 | glsl) draw quad
|
||||||
probe ( 0, 0, 159, 480) rgba (10.0, 35.0, 0.0, 0.0)
|
probe ( 0, 0, 159, 480) rgba (10.0, 35.0, 0.0, 0.0)
|
||||||
probe (161, 0, 479, 480) rgba (10.0, 38.0, 0.0, 0.0)
|
probe (161, 0, 479, 480) rgba (10.0, 38.0, 0.0, 0.0)
|
||||||
probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0)
|
probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0)
|
||||||
@ -63,7 +63,7 @@ probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0)
|
|||||||
[require]
|
[require]
|
||||||
% Reset requirements
|
% Reset requirements
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader]
|
||||||
float4 main(float tex : texcoord) : sv_target
|
float4 main(float tex : texcoord) : sv_target
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -76,7 +76,7 @@ float4 main(float tex : texcoord) : sv_target
|
|||||||
}
|
}
|
||||||
|
|
||||||
[test]
|
[test]
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (10.0, 45.0, 0.0, 0.0)
|
probe (0, 0) rgba (10.0, 45.0, 0.0, 0.0)
|
||||||
|
|
||||||
[pixel shader fail(sm<6)]
|
[pixel shader fail(sm<6)]
|
||||||
|
@ -143,7 +143,7 @@ uniform 0 float 0.9
|
|||||||
todo(sm<4 | glsl) draw quad
|
todo(sm<4 | glsl) draw quad
|
||||||
probe (0, 0) rgba (1.0, 0.9, 1.0, 0.6) 1
|
probe (0, 0) rgba (1.0, 0.9, 1.0, 0.6) 1
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader]
|
||||||
float func(out float o)
|
float func(out float o)
|
||||||
{
|
{
|
||||||
o = 0.1;
|
o = 0.1;
|
||||||
@ -181,7 +181,7 @@ float4 main() : sv_target
|
|||||||
}
|
}
|
||||||
|
|
||||||
[test]
|
[test]
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (0.4, 0.3, 0.3, 0.9) 1
|
probe (0, 0) rgba (0.4, 0.3, 0.3, 0.9) 1
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader todo(sm<4)]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
% TODO: dxcompiler emits no loops for any of these test shaders.
|
% TODO: dxcompiler emits no loops for any of these test shaders.
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader]
|
||||||
float a;
|
float a;
|
||||||
|
|
||||||
float4 main() : sv_target
|
float4 main() : sv_target
|
||||||
@ -18,11 +18,11 @@ float4 main() : sv_target
|
|||||||
|
|
||||||
[test]
|
[test]
|
||||||
uniform 0 float 5.0
|
uniform 0 float 5.0
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (50.0, 50.0, 50.0, 50.0)
|
probe (0, 0) rgba (50.0, 50.0, 50.0, 50.0)
|
||||||
|
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader]
|
||||||
float a;
|
float a;
|
||||||
|
|
||||||
float4 main() : sv_target
|
float4 main() : sv_target
|
||||||
@ -41,10 +41,10 @@ float4 main() : sv_target
|
|||||||
|
|
||||||
[test]
|
[test]
|
||||||
uniform 0 float 4.0
|
uniform 0 float 4.0
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (20.0, 20.0, 20.0, 20.0)
|
probe (0, 0) rgba (20.0, 20.0, 20.0, 20.0)
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader]
|
||||||
float a;
|
float a;
|
||||||
|
|
||||||
float4 main() : sv_target
|
float4 main() : sv_target
|
||||||
@ -70,10 +70,10 @@ float4 main() : sv_target
|
|||||||
|
|
||||||
[test]
|
[test]
|
||||||
uniform 0 float 4.0
|
uniform 0 float 4.0
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (409.1, 409.1, 409.1, 409.1)
|
probe (0, 0) rgba (409.1, 409.1, 409.1, 409.1)
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader]
|
||||||
float a;
|
float a;
|
||||||
|
|
||||||
float4 main() : sv_target
|
float4 main() : sv_target
|
||||||
@ -100,7 +100,7 @@ float4 main() : sv_target
|
|||||||
|
|
||||||
[test]
|
[test]
|
||||||
uniform 0 float 4.0
|
uniform 0 float 4.0
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (410.1, 410.1, 410.1, 410.1)
|
probe (0, 0) rgba (410.1, 410.1, 410.1, 410.1)
|
||||||
|
|
||||||
% loop attribute by itself
|
% loop attribute by itself
|
||||||
|
@ -124,7 +124,7 @@ uniform 0 float 0.9
|
|||||||
todo(sm<4 | glsl) draw quad
|
todo(sm<4 | glsl) draw quad
|
||||||
probe (0, 0) rgba (0.4, 0.5, 0.6, 0.7) 1
|
probe (0, 0) rgba (0.4, 0.5, 0.6, 0.7) 1
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader]
|
||||||
void main(out float4 ret : sv_target)
|
void main(out float4 ret : sv_target)
|
||||||
{
|
{
|
||||||
ret = float4(0.1, 0.2, 0.3, 0.4);
|
ret = float4(0.1, 0.2, 0.3, 0.4);
|
||||||
@ -138,7 +138,7 @@ void main(out float4 ret : sv_target)
|
|||||||
}
|
}
|
||||||
|
|
||||||
[test]
|
[test]
|
||||||
todo(sm<4 | glsl) draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (0.2, 0.4, 0.6, 0.8)
|
probe (0, 0) rgba (0.2, 0.4, 0.6, 0.8)
|
||||||
|
|
||||||
[pixel shader todo(sm<4)]
|
[pixel shader todo(sm<4)]
|
||||||
|
@ -124,7 +124,7 @@ float4 main(float4 pos : sv_position) : sv_target
|
|||||||
shader model >= 4.0
|
shader model >= 4.0
|
||||||
shader model < 4.1
|
shader model < 4.1
|
||||||
|
|
||||||
[pixel shader todo]
|
[pixel shader]
|
||||||
Texture2DMS<float4, 1> t;
|
Texture2DMS<float4, 1> t;
|
||||||
|
|
||||||
float4 main(float4 pos : sv_position) : sv_target
|
float4 main(float4 pos : sv_position) : sv_target
|
||||||
@ -139,7 +139,7 @@ float4 main(float4 pos : sv_position) : sv_target
|
|||||||
}
|
}
|
||||||
|
|
||||||
[test]
|
[test]
|
||||||
todo draw quad
|
todo(glsl) draw quad
|
||||||
probe (0, 0) rgba (0.1, 0.2, 0.3, 0.4)
|
probe (0, 0) rgba (0.1, 0.2, 0.3, 0.4)
|
||||||
probe (1, 0) rgba (0.5, 0.7, 0.6, 0.8)
|
probe (1, 0) rgba (0.5, 0.7, 0.6, 0.8)
|
||||||
probe (0, 1) rgba (0.6, 0.5, 0.2, 0.1)
|
probe (0, 1) rgba (0.6, 0.5, 0.2, 0.1)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user