tests/hlsl/wave-reconvergence.shader_test

% Many of these tests fail on MoltenVK because of a bug in Metal, which causes
% SIMD operations (i.e., wave operations for Apple) to be hoisted out of the
% "if"statement, thereby changing which threads (i.e., wave groups for Apple)
% appear as active. This was reported to Apple as FB15624583.

[require]
shader model >= 6.0
format r32-uint uav-load

[uav 0]
format r32-uint
size (buffer, 4)

0x0 0x1 0x2 0x13

[uav 1]
format r32-uint
size (buffer, 4)

0x100 0x200 0x400 0x800

[uav 2]
format r32-uint
size (buffer, 20)

0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0

[compute shader]
RWBuffer<uint> u0 : register(u0);
RWBuffer<uint> u1 : register(u1);
RWBuffer<uint> u2 : register(u2);

    [numthreads(4, 1, 1)]
void main(uint id : SV_GroupIndex)
{
    const unsigned int POS_COUNT = 5;
    unsigned int i, pos = 0;
    /* If SPV_KHR_subgroup_uniform_control_flow is not supported,
     * SPIR-V doesn't guarantee reconvergence after the loop. */
    for (i = 0; i < 2; ++i)
    {
        u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 1;
        if (u0[id] & (1 << i))
        {
            /* In many programming languages this statement could be
             * moved after the loop. Not here, because the set of
             * active invocations is supposed to depend on whether
             * we're still in the loop (i.e., before reconverging the
             * invocations that diverged during the loop) or not. */
            u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 2;
            break;
        }
        else
        {
            u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 3;
        }
    }
    u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 4;
}

[test]
dispatch 1 1 1
probe uav 2 (0) u32(0xf01)
bug(mvk) probe uav 2 (1) u32(0x503)
probe uav 2 (2) u32(0x511)
bug(mvk) probe uav 2 (3) u32(0x113)
probe uav 2 (4) u32(0xf24)

probe uav 2 (5) u32(0xf01)
bug(mvk) probe uav 2 (6) u32(0xa02)
probe uav 2 (7) u32(0xf04)
probe uav 2 (8) u32(0)
probe uav 2 (9) u32(0)

probe uav 2 (10) u32(0xf01)
bug(mvk) probe uav 2 (11) u32(0x503)
probe uav 2 (12) u32(0x511)
bug(mvk) probe uav 2 (13) u32(0x412)
probe uav 2 (14) u32(0xf14)

probe uav 2 (15) u32(0xf01)
bug(mvk) probe uav 2 (16) u32(0xa02)
probe uav 2 (17) u32(0xf04)
probe uav 2 (18) u32(0)
probe uav 2 (19) u32(0)

[uav 2]
format r32-uint
size (buffer, 20)

0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0

[compute shader]
RWBuffer<uint> u0 : register(u0);
RWBuffer<uint> u1 : register(u1);
RWBuffer<uint> u2 : register(u2);

    [numthreads(4, 1, 1)]
void main(uint id : SV_GroupIndex)
{
    const unsigned int POS_COUNT = 5;
    unsigned int i, pos = 0;
    /* If SPV_KHR_maximal_reconvergence is not supported, SPIR-V
     * doesn't guarantee reconvergence after the loop even if
     * SPV_KHR_subgroup_uniform_control_flow is supported, because the
     * flow wasn't fully convergent when entering the loop. */
    if (!(u0[id] & 0x10))
    {
        for (i = 0; i < 2; ++i)
        {
            u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 1;
            if (u0[id] & (1 << i))
            {
                u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 2;
                break;
            }
            else
            {
                u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 3;
            }
        }
        u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 4;
    }
}

[test]
dispatch 1 1 1
probe uav 2 (0) u32(0x701)
bug(mvk) probe uav 2 (1) u32(0x503)
probe uav 2 (2) u32(0x511)
bug(mvk) probe uav 2 (3) u32(0x113)
probe uav 2 (4) u32(0x724)

probe uav 2 (5) u32(0x701)
bug(mvk) probe uav 2 (6) u32(0x202)
probe uav 2 (7) u32(0x704)
probe uav 2 (8) u32(0)
probe uav 2 (9) u32(0)

probe uav 2 (10) u32(0x701)
bug(mvk) probe uav 2 (11) u32(0x503)
probe uav 2 (12) u32(0x511)
bug(mvk) probe uav 2 (13) u32(0x412)
probe uav 2 (14) u32(0x714)

probe uav 2 (15) u32(0)
probe uav 2 (16) u32(0)
probe uav 2 (17) u32(0)
probe uav 2 (18) u32(0)
probe uav 2 (19) u32(0)
tests: Mark some tests in wave-reconvergence.shader_test as buggy on MoltenVK. 2024-11-05 21:56:26 +01:00			`% Many of these tests fail on MoltenVK because of a bug in Metal, which causes`
			`% SIMD operations (i.e., wave operations for Apple) to be hoisted out of the`
			`% "if"statement, thereby changing which threads (i.e., wave groups for Apple)`
			`% appear as active. This was reported to Apple as FB15624583.`

tests: Test wave reconvergence after selections and loops. Tangled instructions, like wave operations, are sensitive to how the invocations in a wave reconverge after having diverged. Here we test for some of those scenarios. 2024-04-30 13:54:39 +02:00			`[require]`
			`shader model >= 6.0`
tests/shader_runner: Explicitly require UAV load support. The used UAV formats are explicitly added in the [require] section of every test that uses them. Some of these tests were failing on Intel UHD graphics 770 because of missing support for additional UAV load types, explicitly requiring these formats allows these tests to be skipped. 2024-08-01 19:13:31 -04:00			`format r32-uint uav-load`
tests: Test wave reconvergence after selections and loops. Tangled instructions, like wave operations, are sensitive to how the invocations in a wave reconverge after having diverged. Here we test for some of those scenarios. 2024-04-30 13:54:39 +02:00
			`[uav 0]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-uint`
tests: Test wave reconvergence after selections and loops. Tangled instructions, like wave operations, are sensitive to how the invocations in a wave reconverge after having diverged. Here we test for some of those scenarios. 2024-04-30 13:54:39 +02:00			`size (buffer, 4)`

			`0x0 0x1 0x2 0x13`

			`[uav 1]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-uint`
tests: Test wave reconvergence after selections and loops. Tangled instructions, like wave operations, are sensitive to how the invocations in a wave reconverge after having diverged. Here we test for some of those scenarios. 2024-04-30 13:54:39 +02:00			`size (buffer, 4)`

			`0x100 0x200 0x400 0x800`

			`[uav 2]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-uint`
tests: Test wave reconvergence after selections and loops. Tangled instructions, like wave operations, are sensitive to how the invocations in a wave reconverge after having diverged. Here we test for some of those scenarios. 2024-04-30 13:54:39 +02:00			`size (buffer, 20)`

			`0 0 0 0 0`
			`0 0 0 0 0`
			`0 0 0 0 0`
			`0 0 0 0 0`

			`[compute shader]`
			`RWBuffer<uint> u0 : register(u0);`
			`RWBuffer<uint> u1 : register(u1);`
			`RWBuffer<uint> u2 : register(u2);`

			`[numthreads(4, 1, 1)]`
			`void main(uint id : SV_GroupIndex)`
			`{`
			`const unsigned int POS_COUNT = 5;`
			`unsigned int i, pos = 0;`
			`/* If SPV_KHR_subgroup_uniform_control_flow is not supported,`
			`* SPIR-V doesn't guarantee reconvergence after the loop. */`
			`for (i = 0; i < 2; ++i)`
			`{`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 1;`
			`if (u0[id] & (1 << i))`
			`{`
			`/* In many programming languages this statement could be`
			`* moved after the loop. Not here, because the set of`
			`* active invocations is supposed to depend on whether`
			`* we're still in the loop (i.e., before reconverging the`
			`* invocations that diverged during the loop) or not. */`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 2;`
			`break;`
			`}`
			`else`
			`{`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 3;`
			`}`
			`}`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 4;`
			`}`

			`[test]`
tests: Dispatch just four invocations when testing for wave reconvergence. I don't know why I put a 4 both in the numthreads() attribute and as a dispatch parameter, but only care about only one thread group. 2024-10-21 12:42:46 +02:00			`dispatch 1 1 1`
tests/shader_runner: Introduce the "u32" probe format. 2025-06-06 00:25:11 +02:00			`probe uav 2 (0) u32(0xf01)`
			`bug(mvk) probe uav 2 (1) u32(0x503)`
			`probe uav 2 (2) u32(0x511)`
			`bug(mvk) probe uav 2 (3) u32(0x113)`
			`probe uav 2 (4) u32(0xf24)`

			`probe uav 2 (5) u32(0xf01)`
			`bug(mvk) probe uav 2 (6) u32(0xa02)`
			`probe uav 2 (7) u32(0xf04)`
			`probe uav 2 (8) u32(0)`
			`probe uav 2 (9) u32(0)`

			`probe uav 2 (10) u32(0xf01)`
			`bug(mvk) probe uav 2 (11) u32(0x503)`
			`probe uav 2 (12) u32(0x511)`
			`bug(mvk) probe uav 2 (13) u32(0x412)`
			`probe uav 2 (14) u32(0xf14)`

			`probe uav 2 (15) u32(0xf01)`
			`bug(mvk) probe uav 2 (16) u32(0xa02)`
			`probe uav 2 (17) u32(0xf04)`
			`probe uav 2 (18) u32(0)`
			`probe uav 2 (19) u32(0)`
tests: Test wave reconvergence after selections and loops. Tangled instructions, like wave operations, are sensitive to how the invocations in a wave reconverge after having diverged. Here we test for some of those scenarios. 2024-04-30 13:54:39 +02:00
			`[uav 2]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-uint`
tests: Test wave reconvergence after selections and loops. Tangled instructions, like wave operations, are sensitive to how the invocations in a wave reconverge after having diverged. Here we test for some of those scenarios. 2024-04-30 13:54:39 +02:00			`size (buffer, 20)`

			`0 0 0 0 0`
			`0 0 0 0 0`
			`0 0 0 0 0`
			`0 0 0 0 0`

			`[compute shader]`
			`RWBuffer<uint> u0 : register(u0);`
			`RWBuffer<uint> u1 : register(u1);`
			`RWBuffer<uint> u2 : register(u2);`

			`[numthreads(4, 1, 1)]`
			`void main(uint id : SV_GroupIndex)`
			`{`
			`const unsigned int POS_COUNT = 5;`
			`unsigned int i, pos = 0;`
			`/* If SPV_KHR_maximal_reconvergence is not supported, SPIR-V`
			`* doesn't guarantee reconvergence after the loop even if`
			`* SPV_KHR_subgroup_uniform_control_flow is supported, because the`
			`* flow wasn't fully convergent when entering the loop. */`
			`if (!(u0[id] & 0x10))`
			`{`
			`for (i = 0; i < 2; ++i)`
			`{`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 1;`
			`if (u0[id] & (1 << i))`
			`{`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 2;`
			`break;`
			`}`
			`else`
			`{`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 3;`
			`}`
			`}`
			`u2[POS_COUNT * id + pos++] = WaveActiveSum(u1[id]) + 16 * i + 4;`
			`}`
			`}`

			`[test]`
tests: Dispatch just four invocations when testing for wave reconvergence. I don't know why I put a 4 both in the numthreads() attribute and as a dispatch parameter, but only care about only one thread group. 2024-10-21 12:42:46 +02:00			`dispatch 1 1 1`
tests/shader_runner: Introduce the "u32" probe format. 2025-06-06 00:25:11 +02:00			`probe uav 2 (0) u32(0x701)`
			`bug(mvk) probe uav 2 (1) u32(0x503)`
			`probe uav 2 (2) u32(0x511)`
			`bug(mvk) probe uav 2 (3) u32(0x113)`
			`probe uav 2 (4) u32(0x724)`

			`probe uav 2 (5) u32(0x701)`
			`bug(mvk) probe uav 2 (6) u32(0x202)`
			`probe uav 2 (7) u32(0x704)`
			`probe uav 2 (8) u32(0)`
			`probe uav 2 (9) u32(0)`

			`probe uav 2 (10) u32(0x701)`
			`bug(mvk) probe uav 2 (11) u32(0x503)`
			`probe uav 2 (12) u32(0x511)`
			`bug(mvk) probe uav 2 (13) u32(0x412)`
			`probe uav 2 (14) u32(0x714)`

			`probe uav 2 (15) u32(0)`
			`probe uav 2 (16) u32(0)`
			`probe uav 2 (17) u32(0)`
			`probe uav 2 (18) u32(0)`
			`probe uav 2 (19) u32(0)`