vkd3d-shader/ir: Introduce a simple control flow graph structurizer.

The structurizer is implemented along the lines of what is usually called
the "structured program theorem": the control flow is completely
virtualized by mean of an additional TEMP register which stores the
block index which is currently running. The whole program is then
converted to a huge switch construction enclosed in a loop, executing
at each iteration the appropriate block and updating the register
depending on block jump instruction.

The algorithm's generality is also its major weakness: it accepts any
input program, even if its CFG is not reducible, but the output
program lacks any useful convergence information. It satisfies the
letter of the SPIR-V requirements, but it is expected that it will
be very inefficient to run on a GPU (unless a downstream compiler is
able to devirtualize the control flow and do a proper convergence
analysis pass). The algorithm is however very simple, and good enough
to at least pass tests, enabling further development. A better
alternative is expected to be upstreamed incrementally.

Side note: the structured program theorem is often called the
Böhm-Jacopini theorem; Böhm and Jacopini did indeed prove a variation
of it, but their algorithm is different from what is commontly attributed
to them and implemented here, so I opted for not using their name.
This commit is contained in:
Giovanni Mascellani 2024-01-16 23:56:43 +01:00 committed by Alexandre Julliard
parent 19aef21369
commit 51f13391e6
Notes: Alexandre Julliard 2024-02-06 23:42:19 +01:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Conor McCarthy (@cmccarthy)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/607
8 changed files with 209 additions and 68 deletions

View File

@ -470,6 +470,25 @@ static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned
dst->reg.idx[0].offset = idx;
}
static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx)
{
vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
dst->reg.idx[0].offset = idx;
dst->write_mask = VKD3DSP_WRITEMASK_0;
}
static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx)
{
vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
src->reg.idx[0].offset = idx;
}
static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value)
{
vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
src->reg.u.immconst_u32[0] = value;
}
void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
enum vkd3d_shader_opcode handler_idx)
{
@ -2871,6 +2890,125 @@ fail:
return VKD3D_ERROR_OUT_OF_MEMORY;
}
static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser)
{
const unsigned int block_temp_idx = parser->program.temp_count;
struct vkd3d_shader_instruction *instructions = NULL;
const struct vkd3d_shader_location no_loc = {0};
size_t ins_capacity = 0, ins_count = 0, i;
bool first_label_found = false;
if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count))
goto fail;
for (i = 0; i < parser->program.instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i];
switch (ins->handler_idx)
{
case VKD3DSIH_PHI:
case VKD3DSIH_SWITCH_MONOLITHIC:
vkd3d_unreachable();
case VKD3DSIH_LABEL:
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4))
goto fail;
if (!first_label_found)
{
first_label_found = true;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1))
goto fail;
dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx);
src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0]));
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0))
goto fail;
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1))
goto fail;
src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx);
ins_count++;
}
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1))
goto fail;
src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0]));
ins_count++;
break;
case VKD3DSIH_BRANCH:
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2))
goto fail;
if (vsir_register_is_label(&ins->src[0].reg))
{
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1))
goto fail;
dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx);
src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0]));
ins_count++;
}
else
{
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3))
goto fail;
dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx);
instructions[ins_count].src[0] = ins->src[0];
src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1]));
src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2]));
ins_count++;
}
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0))
goto fail;
ins_count++;
break;
case VKD3DSIH_RET:
default:
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
goto fail;
instructions[ins_count++] = *ins;
break;
}
}
assert(first_label_found);
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3))
goto fail;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0))
goto fail;
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0))
goto fail;
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0))
goto fail;
ins_count++;
vkd3d_free(parser->program.instructions.elements);
parser->program.instructions.elements = instructions;
parser->program.instructions.capacity = ins_capacity;
parser->program.instructions.count = ins_count;
parser->program.temp_count += 1;
return VKD3D_OK;
fail:
vkd3d_free(instructions);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
const struct vkd3d_shader_compile_info *compile_info)
{
@ -2889,6 +3027,9 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
if ((result = materialize_ssas_to_temps(parser)) < 0)
return result;
if ((result = simple_structurizer_run(parser)) < 0)
return result;
}
else
{
@ -2916,13 +3057,13 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
remove_dead_code(&parser->program);
if ((result = flatten_control_flow_constructs(parser)) < 0)
return result;
if ((result = normalise_combined_samplers(parser)) < 0)
return result;
}
if ((result = flatten_control_flow_constructs(parser)) < 0)
return result;
if (TRACE_ON())
vkd3d_shader_trace(&parser->program);

View File

@ -88,7 +88,7 @@ float4 main() : sv_target
[test]
uniform 0 float4 0.0 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.9, 0.8, 0.7, 0.6)
[pixel shader]

View File

@ -54,7 +54,7 @@ float4 main(float tex : texcoord) : sv_target
[test]
uniform 0 uint4 10 0 0 0
todo(sm>=6) draw quad
draw quad
probe ( 0, 0, 159, 480) rgba (10.0, 35.0, 0.0, 0.0)
probe (161, 0, 479, 480) rgba (10.0, 38.0, 0.0, 0.0)
probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0)

View File

@ -79,16 +79,16 @@ float4 main() : sv_target
[test]
uniform 0 float 0.1
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.3, 0.2, 0.6, 0.3) 1
uniform 0 float 0.4
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.6, 0.5, 0.6, 0.3) 1
uniform 0 float 0.6
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.6, 0.5, 0.4, 0.5) 1
uniform 0 float 0.8
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.8, 0.7, 0.4, 0.5) 1
[pixel shader todo(sm<4)]
@ -134,13 +134,13 @@ float4 main() : sv_target
[test]
uniform 0 float 0.1
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.2, 0.1, 0.2, 0.1) 1
uniform 0 float 0.5
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.5, 0.4, 1.0, 0.9) 1
uniform 0 float 0.9
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (1.0, 0.9, 1.0, 0.6) 1
[pixel shader todo(sm<4)]
@ -235,23 +235,23 @@ float4 main() : sv_target
[test]
uniform 0 float 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.3, 0.2, 0.3, 0.3) 1
uniform 0 float 0.1
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.3, 0.3, 0.3, 0.3) 1
uniform 0 float 0.3
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.3, 0.5, 0.3, 0.3) 1
uniform 0 float 0.7
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.3, 0.9, 0.7, 0.6) 1
uniform 0 float 0.9
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.4, 0.1, 0.7, 0.6) 1
[pixel shader todo(sm<4)]
@ -291,21 +291,21 @@ float4 main() : sv_target
uniform 0 float4 0.3 0.0 0.0 0.0
uniform 4 float4 0.0 0.0 0.0 0.0
uniform 8 float4 0.1 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm>=6) probe all rgba (0.3, 0.2, 0.6, 0.6) 1
todo(sm<4) draw quad
probe all rgba (0.3, 0.2, 0.6, 0.6) 1
uniform 4 float4 0.35 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm>=6) probe all rgba (0.3, 0.3, 0.6, 0.6) 1
todo(sm<4) draw quad
probe all rgba (0.3, 0.3, 0.6, 0.6) 1
uniform 8 float4 0.5 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
todo(sm<4) draw quad
probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
todo(sm<4) draw quad
probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 4 float4 2.0 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm>=6) probe all rgba (0.4, 0.1, 0.6, 0.6) 1
todo(sm<4) draw quad
probe all rgba (0.4, 0.1, 0.6, 0.6) 1

View File

@ -118,7 +118,7 @@ float4 main() : sv_target
}
[test]
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (10.0, 10.0, 10.0, 10.0)
[pixel shader todo(sm<4)]
@ -137,7 +137,7 @@ float4 main() : sv_target
}
[test]
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (10.0, 10.0, 10.0, 10.0)
[pixel shader todo(sm<4)]
@ -156,7 +156,7 @@ float4 main() : sv_target
}
[test]
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (10.0, 10.0, 10.0, 10.0)
% unroll can't be used with fastopt or loop

View File

@ -89,13 +89,13 @@ void main(out float4 ret : sv_target)
[test]
uniform 0 float 0.1
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.1, 0.2, 0.3, 0.4) 1
uniform 0 float 0.5
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.2, 0.3, 0.4, 0.5) 1
uniform 0 float 0.9
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.5, 0.6, 0.7, 0.8) 1
[pixel shader todo(sm<4)]
@ -115,13 +115,13 @@ void main(out float4 ret : sv_target)
[test]
uniform 0 float 0.1
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.1, 0.2, 0.3, 0.4) 1
uniform 0 float 0.5
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.5, 0.6, 0.7, 0.8) 1
uniform 0 float 0.9
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.4, 0.5, 0.6, 0.7) 1
[pixel shader todo(sm<4)]
@ -160,23 +160,23 @@ void main(out float4 ret : sv_target)
[test]
uniform 0 float 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 0 float 0.1
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 0 float 0.3
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float 0.7
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.8, 0.8, 0.8, 0.8) 1
uniform 0 float 0.9
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.9, 0.9, 0.9, 0.9) 1
[pixel shader todo(sm<4)]
@ -236,21 +236,21 @@ void main(out float4 ret : sv_target)
uniform 0 float4 0.3 0.0 0.0 0.0
uniform 4 float4 0.0 0.0 0.0 0.0
uniform 8 float4 0.1 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 4 float4 0.35 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 8 float4 0.5 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 4 float4 2.0 0.0 0.0 0.0
todo(sm<4 | sm>=6) draw quad
todo(sm<4) draw quad
probe all rgba (0.9, 0.9, 0.9, 0.9) 1

View File

@ -14,10 +14,10 @@ float4 main() : sv_target
[test]
uniform 0 float4 2.0 3.0 4.0 5.0
todo draw quad
draw quad
probe all rgba (2.0, 3.0, 4.0, 5.0)
uniform 0 float4 0.0 10.0 11.0 12.0
todo draw quad
draw quad
probe all rgba (-1.0, 9.0, 10.0, 11.0)

View File

@ -116,10 +116,10 @@ float4 main() : sv_target
[test]
uniform 0 uint4 2 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.1, 2.1, 3.1, 4.1)
uniform 0 uint4 1 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
% floats are accepted
@ -145,10 +145,10 @@ float4 main() : sv_target
[test]
uniform 0 uint4 2 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.1, 2.1, 3.1, 4.1)
uniform 0 uint4 1 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader fail(sm>=6)]
@ -173,10 +173,10 @@ float4 main() : sv_target
[test]
uniform 0 float4 2.0 0.0 0.0 0.0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.1, 2.1, 3.1, 4.1)
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader fail]
@ -374,13 +374,13 @@ float4 main() : sv_target
[test]
uniform 0 uint4 2 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.1, 2.1, 3.1, 4.1)
uniform 0 uint4 1 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.2, 2.2, 3.2, 4.2)
uniform 0 uint4 0 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
% switch breaks within a loop
@ -412,7 +412,7 @@ float4 main() : sv_target
[test]
uniform 0 uint4 2 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (5.0, 6.0, 7.0, 8.0)
% default case placement
@ -443,13 +443,13 @@ float4 main() : sv_target
[test]
uniform 0 uint4 0 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (4.0, 5.0, 6.0, 7.0)
uniform 0 uint4 2 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (2.0, 3.0, 4.0, 5.0)
uniform 0 uint4 3 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (4.0, 5.0, 6.0, 7.0)
[pixel shader]
@ -480,13 +480,13 @@ float4 main() : sv_target
[test]
uniform 0 uint4 3 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
uniform 0 uint4 0 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (4.0, 5.0, 6.0, 7.0)
uniform 0 uint4 5 0 0 0
todo(sm>=6) draw quad
draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)
% 'continue' is not supported in switches
@ -546,10 +546,10 @@ float4 main() : sv_target
[test]
uniform 0 uint4 0 0 3 1
todo(sm>=6) draw quad
draw quad
probe all rgba (10.0, 11.0, 12.0, 13.0)
uniform 0 uint4 1 0 3 1
todo(sm>=6) draw quad
draw quad
probe all rgba (7.0, 8.0, 9.0, 10.0)
% return from a switch nested in a loop
@ -580,8 +580,8 @@ float4 main() : sv_target
[test]
uniform 0 uint4 0 0 3 1
todo(sm>=6) draw quad
draw quad
probe all rgba (304.0, 305.0, 306.0, 307.0)
uniform 0 uint4 1 0 3 1
todo(sm>=6) draw quad
draw quad
probe all rgba (3.0, 4.0, 5.0, 6.0)