vkd3d/tests/hlsl/tgsm.shader_test

[require]
% SM < 5.0 requires that groupshared variables are arrays of N elements,
% where N is the number of threads in a group, and they are only ever indexed
% by SV_GroupIndex when storing (though violating this last condition gives
% E_NOTIMPL rather than E_FAIL).
shader model >= 4.0
shader model < 5.0

[compute shader fail todo]
RWByteAddressBuffer u : register(u0);
groupshared uint m;

    [numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
    if (local_idx == 0)
        m = 0;
    GroupMemoryBarrierWithGroupSync();
    u.Store(4 * local_idx, m);
}

[compute shader fail todo]
RWByteAddressBuffer u : register(u0);
groupshared uint m[1];

    [numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
    if (local_idx == 0)
        m[0] = 0;
    GroupMemoryBarrierWithGroupSync();
    u.Store(4 * local_idx, m[0]);
}

[compute shader notimpl]
RWByteAddressBuffer u : register(u0);
groupshared uint m[4];

    [numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
    if (local_idx == 0)
        m[0] = 0;
    GroupMemoryBarrierWithGroupSync();
    u.Store(4 * local_idx, m[0]);
}

[require]
shader model >= 4.0

[uav 0]
format r32-typeless
size (raw_buffer, 4)

0 0 0 0

[compute shader todo]
RWByteAddressBuffer u : register(u0);
groupshared uint m[4];

    [numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
    m[local_idx] = 0xcafef00d;
    GroupMemoryBarrierWithGroupSync();
    u.Store(4 * local_idx, m[0]);
}

[test]
todo(sm<6 | msl) dispatch 1 1 1
probe uav 0 (0) u32(0xcafef00d)
probe uav 0 (1) u32(0xcafef00d)
probe uav 0 (2) u32(0xcafef00d)
probe uav 0 (3) u32(0xcafef00d)

[require]
shader model >= 5.0

[uav 1]
format r32-typeless
size (raw_buffer, 4)

1 0 0 0

[compute shader]
RWByteAddressBuffer u : register(u1);
groupshared uint m;

    [numthreads(32, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
    if (!local_idx)
        m = group_id.x;
    GroupMemoryBarrierWithGroupSync();
    InterlockedAdd(m, group_id.x);
    GroupMemoryBarrierWithGroupSync();
    if (!local_idx)
        u.Store(4 * group_id.x, m);
}

[test]
todo(glsl | msl) dispatch 4 1 1
probe uav 1 (0) u32(0)
probe uav 1 (1) u32(33)
probe uav 1 (2) u32(66)
probe uav 1 (3) u32(99)


[uav 1]
format r32-typeless
size (raw_buffer, 4)

1 0 0 0

[compute shader]
RWByteAddressBuffer u : register(u1);
groupshared int m;

    [numthreads(32, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
    if (!local_idx)
        m = group_id.x;
    GroupMemoryBarrierWithGroupSync();
    InterlockedAdd(m, -group_id.x);
    GroupMemoryBarrierWithGroupSync();
    if (!local_idx)
        u.Store(4 * group_id.x, m);
}

[test]
todo(glsl | msl) dispatch 4 1 1
probe uav 1 (0) i32(0)
probe uav 1 (1) i32(-31)
probe uav 1 (2) i32(-62)
probe uav 1 (3) i32(-93)


[uav 1]
format r32-float
size (buffer, 8)

1 1 1 1 0 0 0 0

[uav 2]
format r32-sint
size (buffer, 8)

1 1 1 1 0 0 0 0

[compute shader todo]
uniform uint idx;

#define GROUP_SIZE 4

struct data
{
    float f;
    uint u;
};

RWBuffer<float> u : register(u1);
RWBuffer<uint> u2 : register(u2);
groupshared data m[GROUP_SIZE];

    [numthreads(GROUP_SIZE, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID,
        uint thread_id : SV_DispatchThreadID)
{
    uint i;
    if (!local_idx)
    {
        for (i = 0; i < GROUP_SIZE; ++i)
        {
            m[i].f = group_id.x;
            m[i].u = group_id.x;
        }
    }
    GroupMemoryBarrierWithGroupSync();
    InterlockedAdd(m[0].u, 2);
    InterlockedAdd(m[idx].u, 1);
    GroupMemoryBarrierWithGroupSync();
    for (i = 0; i < local_idx; ++i)
    {
        m[local_idx].f += group_id.x;
        m[local_idx].u += group_id.x;
    }
    u[thread_id.x] = m[local_idx].f;
    u2[thread_id.x] = m[local_idx].u;
}

[test]
uniform 0 uint 1
todo(sm<6 | msl) dispatch 2 1 1
probe uav 1 (0)  f32(0.0)
probe uav 1 (1)  f32(0.0)
probe uav 1 (2)  f32(0.0)
probe uav 1 (3)  f32(0.0)
probe uav 1 (4)  f32(1.0)
probe uav 1 (5)  f32(2.0)
probe uav 1 (6)  f32(3.0)
probe uav 1 (7)  f32(4.0)
probe uav 2 (0)  i32(8)
probe uav 2 (1)  i32(4)
probe uav 2 (2)  i32(0)
probe uav 2 (3)  i32(0)
probe uav 2 (4)  i32(9)
probe uav 2 (5)  i32(6)
probe uav 2 (6)  i32(3)
probe uav 2 (7)  i32(4)


[uav 1]
format r32-typeless
size (raw_buffer, 1)

0

[compute shader]
RWByteAddressBuffer u : register(u1);
groupshared uint m;

    [numthreads(32, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
    uint orig;
    if (!local_idx)
        m = 7;
    GroupMemoryBarrierWithGroupSync();
    InterlockedCompareExchange(m, local_idx, local_idx + 32, orig);
    GroupMemoryBarrierWithGroupSync();
    if (!local_idx)
        u.Store(0, m);
}

[test]
todo(glsl | msl) dispatch 1 1 1
probe uav 1 (0) u32(39)

[uav 0]
format r32-typeless
size (raw_buffer, 8)

0xf000f 0xf000f 0xf000f 0xf000f
0xf000f 0xf000f 0xf000f 0xf000f

[compute shader todo]
RWByteAddressBuffer u : register(u0);
groupshared min16uint m[4];

    [numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
    min16uint id = (min16uint)group_id.x;
    /* 16-bit atomic ops are not supported. */
    m[local_idx] = id + 8 * (local_idx + 1);
    GroupMemoryBarrierWithGroupSync();
    m[local_idx] += m[local_idx ^ 1];
    GroupMemoryBarrierWithGroupSync();
    /* Depending on the driver and D3D version, two or four bytes are written
     * when using a minimum precision type, so we explicitly cast. */
    u.Store(16 * group_id.x + 4 * local_idx, (uint)m[local_idx]);
}

[test]
todo(sm<6 | msl) dispatch 2 1 1
probe uav 0 (0) u32(0x18)
probe uav 0 (1) u32(0x18)
probe uav 0 (2) u32(0x38)
probe uav 0 (3) u32(0x38)
probe uav 0 (4) u32(0x1a)
probe uav 0 (5) u32(0x1a)
probe uav 0 (6) u32(0x3a)
probe uav 0 (7) u32(0x3a)

[uav 0]
format r32-typeless
size (raw_buffer, 8)

0xf000f 0xf000f 0xf000f 0xf000f
0xf000f 0xf000f 0xf000f 0xf000f

[compute shader]
RWByteAddressBuffer u : register(u0);
groupshared min16uint m;

    [numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
    if (local_idx == 0)
        m = local_idx + 1;
    GroupMemoryBarrierWithGroupSync();
    if (local_idx == 1)
        m += (local_idx + 1) << (4 * local_idx);
    GroupMemoryBarrierWithGroupSync();
    if (local_idx == 2)
        m += (local_idx + 1) << (4 * local_idx);
    GroupMemoryBarrierWithGroupSync();
    if (local_idx == 3)
        m += (local_idx + 1) << (4 * local_idx);
    GroupMemoryBarrierWithGroupSync();
    uint m_local = m;
    m_local += (group_id + 1) << 16;
    u.Store(16 * group_id + 4 * local_idx, m_local);
}

[test]
todo(glsl | msl) dispatch 2 1 1
probe uav 0 (0) u32(0x14321)
probe uav 0 (1) u32(0x14321)
probe uav 0 (2) u32(0x14321)
probe uav 0 (3) u32(0x14321)
probe uav 0 (4) u32(0x24321)
probe uav 0 (5) u32(0x24321)
probe uav 0 (6) u32(0x24321)
probe uav 0 (7) u32(0x24321)

[require]
shader model >= 6.2
native-16-bit

[uav 1]
format r32-typeless
size (raw_buffer, 4)

0xf000f 0xf000f 0xf000f 0xf000f

[compute shader]
RWByteAddressBuffer u : register(u1);
groupshared uint16_t m[4];

    [numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
    uint16_t id = (uint16_t)group_id.x;
    /* 16-bit atomic ops are not supported. */
    m[local_idx] = id + 8 * (local_idx + 1);
    GroupMemoryBarrierWithGroupSync();
    m[local_idx] += m[local_idx ^ 1];
    GroupMemoryBarrierWithGroupSync();
    u.Store(8 * group_id.x + 2 * local_idx, m[local_idx]);
}

[test]
dispatch 2 1 1
probe uav 1 (0) u32(0x180018)
probe uav 1 (1) u32(0x380038)
probe uav 1 (2) u32(0x1a001a)
probe uav 1 (3) u32(0x3a003a)
tests/hlsl: Test some quirks of TGSMs with SM < 5.0. I'm not specifically interested in that, but since I ran into those idiosyncrasies while writing other TGSM tests I decided that it might turn out useful to keep them. 2025-05-29 18:42:18 +02:00			`[require]`
			`% SM < 5.0 requires that groupshared variables are arrays of N elements,`
			`% where N is the number of threads in a group, and they are only ever indexed`
			`% by SV_GroupIndex when storing (though violating this last condition gives`
			`% E_NOTIMPL rather than E_FAIL).`
			`shader model >= 4.0`
			`shader model < 5.0`

			`[compute shader fail todo]`
			`RWByteAddressBuffer u : register(u0);`
			`groupshared uint m;`

			`[numthreads(4, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex)`
			`{`
			`if (local_idx == 0)`
			`m = 0;`
			`GroupMemoryBarrierWithGroupSync();`
			`u.Store(4 * local_idx, m);`
			`}`

			`[compute shader fail todo]`
			`RWByteAddressBuffer u : register(u0);`
			`groupshared uint m[1];`

			`[numthreads(4, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex)`
			`{`
			`if (local_idx == 0)`
			`m[0] = 0;`
			`GroupMemoryBarrierWithGroupSync();`
			`u.Store(4 * local_idx, m[0]);`
			`}`

			`[compute shader notimpl]`
			`RWByteAddressBuffer u : register(u0);`
			`groupshared uint m[4];`

			`[numthreads(4, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex)`
			`{`
			`if (local_idx == 0)`
			`m[0] = 0;`
			`GroupMemoryBarrierWithGroupSync();`
			`u.Store(4 * local_idx, m[0]);`
			`}`

			`[require]`
			`shader model >= 4.0`

			`[uav 0]`
			`format r32-typeless`
			`size (raw_buffer, 4)`

			`0 0 0 0`

			`[compute shader todo]`
			`RWByteAddressBuffer u : register(u0);`
			`groupshared uint m[4];`

			`[numthreads(4, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex)`
			`{`
			`m[local_idx] = 0xcafef00d;`
			`GroupMemoryBarrierWithGroupSync();`
			`u.Store(4 * local_idx, m[0]);`
			`}`

			`[test]`
tests/shader_runner_metal: Run the Metal shader runner with DXIL shaders. 2025-07-14 22:22:04 +02:00			`todo(sm<6 \| msl) dispatch 1 1 1`
tests/shader_runner: Introduce the "u32" probe format. 2025-06-06 00:25:11 +02:00			`probe uav 0 (0) u32(0xcafef00d)`
			`probe uav 0 (1) u32(0xcafef00d)`
			`probe uav 0 (2) u32(0xcafef00d)`
			`probe uav 0 (3) u32(0xcafef00d)`
tests/hlsl: Test some quirks of TGSMs with SM < 5.0. I'm not specifically interested in that, but since I ran into those idiosyncrasies while writing other TGSM tests I decided that it might turn out useful to keep them. 2025-05-29 18:42:18 +02:00
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00			`[require]`
			`shader model >= 5.0`

			`[uav 1]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-typeless`
tests/shader-runner: Set the correct flag and format for raw UAVs. 2024-05-31 23:46:38 +10:00			`size (raw_buffer, 4)`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00
			`1 0 0 0`

vkd3d-shader/hlsl: Support interlocked operations on non-indexed groupshared variables. 2025-06-24 20:25:29 -04:00			`[compute shader]`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00			`RWByteAddressBuffer u : register(u1);`
			`groupshared uint m;`

			`[numthreads(32, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)`
			`{`
			`if (!local_idx)`
			`m = group_id.x;`
			`GroupMemoryBarrierWithGroupSync();`
			`InterlockedAdd(m, group_id.x);`
			`GroupMemoryBarrierWithGroupSync();`
			`if (!local_idx)`
			`u.Store(4 * group_id.x, m);`
			`}`

			`[test]`
vkd3d-shader/hlsl: Support interlocked operations on non-indexed groupshared variables. 2025-06-24 20:25:29 -04:00			`todo(glsl \| msl) dispatch 4 1 1`
tests/shader_runner: Introduce the "u32" probe format. 2025-06-06 00:25:11 +02:00			`probe uav 1 (0) u32(0)`
			`probe uav 1 (1) u32(33)`
			`probe uav 1 (2) u32(66)`
			`probe uav 1 (3) u32(99)`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00

			`[uav 1]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-typeless`
tests/shader-runner: Set the correct flag and format for raw UAVs. 2024-05-31 23:46:38 +10:00			`size (raw_buffer, 4)`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00
			`1 0 0 0`

vkd3d-shader/hlsl: Support interlocked operations on non-indexed groupshared variables. 2025-06-24 20:25:29 -04:00			`[compute shader]`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00			`RWByteAddressBuffer u : register(u1);`
			`groupshared int m;`

			`[numthreads(32, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)`
			`{`
			`if (!local_idx)`
			`m = group_id.x;`
			`GroupMemoryBarrierWithGroupSync();`
			`InterlockedAdd(m, -group_id.x);`
			`GroupMemoryBarrierWithGroupSync();`
			`if (!local_idx)`
			`u.Store(4 * group_id.x, m);`
			`}`

			`[test]`
vkd3d-shader/hlsl: Support interlocked operations on non-indexed groupshared variables. 2025-06-24 20:25:29 -04:00			`todo(glsl \| msl) dispatch 4 1 1`
tests/shader_runner: Introduce the "i32" probe format. 2025-06-05 23:48:18 +02:00			`probe uav 1 (0) i32(0)`
			`probe uav 1 (1) i32(-31)`
			`probe uav 1 (2) i32(-62)`
			`probe uav 1 (3) i32(-93)`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00

			`[uav 1]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-float`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00			`size (buffer, 8)`

			`1 1 1 1 0 0 0 0`

			`[uav 2]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-sint`
tests/shader-runner: Add TGSM tests. 2024-03-12 14:45:39 +10:00			`size (buffer, 8)`

			`1 1 1 1 0 0 0 0`

			`[compute shader todo]`
			`uniform uint idx;`

			`#define GROUP_SIZE 4`

			`struct data`
			`{`
			`float f;`
			`uint u;`
			`};`

			`RWBuffer<float> u : register(u1);`
			`RWBuffer<uint> u2 : register(u2);`
			`groupshared data m[GROUP_SIZE];`

			`[numthreads(GROUP_SIZE, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID,`
			`uint thread_id : SV_DispatchThreadID)`
			`{`
			`uint i;`
			`if (!local_idx)`
			`{`
			`for (i = 0; i < GROUP_SIZE; ++i)`
			`{`
			`m[i].f = group_id.x;`
			`m[i].u = group_id.x;`
			`}`
			`}`
			`GroupMemoryBarrierWithGroupSync();`
			`InterlockedAdd(m[0].u, 2);`
			`InterlockedAdd(m[idx].u, 1);`
			`GroupMemoryBarrierWithGroupSync();`
			`for (i = 0; i < local_idx; ++i)`
			`{`
			`m[local_idx].f += group_id.x;`
			`m[local_idx].u += group_id.x;`
			`}`
			`u[thread_id.x] = m[local_idx].f;`
			`u2[thread_id.x] = m[local_idx].u;`
			`}`

			`[test]`
			`uniform 0 uint 1`
tests/shader_runner_metal: Run the Metal shader runner with DXIL shaders. 2025-07-14 22:22:04 +02:00			`todo(sm<6 \| msl) dispatch 2 1 1`
tests/shader_runner: Introduce the "f32" probe format. 2025-06-06 01:02:45 +02:00			`probe uav 1 (0) f32(0.0)`
			`probe uav 1 (1) f32(0.0)`
			`probe uav 1 (2) f32(0.0)`
			`probe uav 1 (3) f32(0.0)`
			`probe uav 1 (4) f32(1.0)`
			`probe uav 1 (5) f32(2.0)`
			`probe uav 1 (6) f32(3.0)`
			`probe uav 1 (7) f32(4.0)`
tests/shader_runner: Introduce the "i32" probe format. 2025-06-05 23:48:18 +02:00			`probe uav 2 (0) i32(8)`
			`probe uav 2 (1) i32(4)`
			`probe uav 2 (2) i32(0)`
			`probe uav 2 (3) i32(0)`
			`probe uav 2 (4) i32(9)`
			`probe uav 2 (5) i32(6)`
			`probe uav 2 (6) i32(3)`
			`probe uav 2 (7) i32(4)`
tests/hlsl: Add an InterlockedCompareExchange() TGSM test. 2024-04-04 19:13:07 +10:00

			`[uav 1]`
tests/shader_runner: Replace spaces with dashes in format names. Probably good if we want to allow specifying several formats in the same line, separated by spaces. While at it, rename "r32g32 int" to "r32g32-sint". 2024-08-01 21:03:46 -04:00			`format r32-typeless`
tests/shader-runner: Set the correct flag and format for raw UAVs. 2024-05-31 23:46:38 +10:00			`size (raw_buffer, 1)`
tests/hlsl: Add an InterlockedCompareExchange() TGSM test. 2024-04-04 19:13:07 +10:00
			`0`

vkd3d-shader/hlsl: Support interlocked operations on non-indexed groupshared variables. 2025-06-24 20:25:29 -04:00			`[compute shader]`
tests/hlsl: Add an InterlockedCompareExchange() TGSM test. 2024-04-04 19:13:07 +10:00			`RWByteAddressBuffer u : register(u1);`
			`groupshared uint m;`

			`[numthreads(32, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex)`
			`{`
			`uint orig;`
			`if (!local_idx)`
			`m = 7;`
			`GroupMemoryBarrierWithGroupSync();`
			`InterlockedCompareExchange(m, local_idx, local_idx + 32, orig);`
			`GroupMemoryBarrierWithGroupSync();`
			`if (!local_idx)`
			`u.Store(0, m);`
			`}`

			`[test]`
vkd3d-shader/hlsl: Support interlocked operations on non-indexed groupshared variables. 2025-06-24 20:25:29 -04:00			`todo(glsl \| msl) dispatch 1 1 1`
tests/shader_runner: Introduce the "u32" probe format. 2025-06-06 00:25:11 +02:00			`probe uav 1 (0) u32(39)`
tests/hlsl: Add a TGSM 16-bit test. 2025-02-04 14:05:13 +10:00
tests/hlsl: Test TGSMs with minimum precision unsigned integers. 2025-05-29 18:45:18 +02:00			`[uav 0]`
			`format r32-typeless`
			`size (raw_buffer, 8)`

			`0xf000f 0xf000f 0xf000f 0xf000f`
			`0xf000f 0xf000f 0xf000f 0xf000f`

			`[compute shader todo]`
			`RWByteAddressBuffer u : register(u0);`
			`groupshared min16uint m[4];`

			`[numthreads(4, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)`
			`{`
			`min16uint id = (min16uint)group_id.x;`
			`/* 16-bit atomic ops are not supported. */`
			`m[local_idx] = id + 8 * (local_idx + 1);`
			`GroupMemoryBarrierWithGroupSync();`
			`m[local_idx] += m[local_idx ^ 1];`
			`GroupMemoryBarrierWithGroupSync();`
			`/* Depending on the driver and D3D version, two or four bytes are written`
			`* when using a minimum precision type, so we explicitly cast. */`
			`u.Store(16 * group_id.x + 4 * local_idx, (uint)m[local_idx]);`
			`}`

			`[test]`
tests/shader_runner_metal: Run the Metal shader runner with DXIL shaders. 2025-07-14 22:22:04 +02:00			`todo(sm<6 \| msl) dispatch 2 1 1`
vkd3d-shader/dxil: Emit 16-bit structured TGSMs as minimum precision. 2025-05-29 19:06:40 +02:00			`probe uav 0 (0) u32(0x18)`
			`probe uav 0 (1) u32(0x18)`
			`probe uav 0 (2) u32(0x38)`
			`probe uav 0 (3) u32(0x38)`
			`probe uav 0 (4) u32(0x1a)`
			`probe uav 0 (5) u32(0x1a)`
			`probe uav 0 (6) u32(0x3a)`
			`probe uav 0 (7) u32(0x3a)`
tests/hlsl: Test TGSMs with minimum precision unsigned integers. 2025-05-29 18:45:18 +02:00
			`[uav 0]`
			`format r32-typeless`
			`size (raw_buffer, 8)`

			`0xf000f 0xf000f 0xf000f 0xf000f`
			`0xf000f 0xf000f 0xf000f 0xf000f`

vkd3d-shader/hlsl: Support stores to raw groupshared variables. 2025-07-17 13:57:08 -04:00			`[compute shader]`
tests/hlsl: Test TGSMs with minimum precision unsigned integers. 2025-05-29 18:45:18 +02:00			`RWByteAddressBuffer u : register(u0);`
			`groupshared min16uint m;`

			`[numthreads(4, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)`
			`{`
			`if (local_idx == 0)`
			`m = local_idx + 1;`
			`GroupMemoryBarrierWithGroupSync();`
			`if (local_idx == 1)`
			`m += (local_idx + 1) << (4 * local_idx);`
			`GroupMemoryBarrierWithGroupSync();`
			`if (local_idx == 2)`
			`m += (local_idx + 1) << (4 * local_idx);`
			`GroupMemoryBarrierWithGroupSync();`
			`if (local_idx == 3)`
			`m += (local_idx + 1) << (4 * local_idx);`
			`GroupMemoryBarrierWithGroupSync();`
			`uint m_local = m;`
			`m_local += (group_id + 1) << 16;`
			`u.Store(16 * group_id + 4 * local_idx, m_local);`
			`}`

			`[test]`
vkd3d-shader/hlsl: Support stores to raw groupshared variables. 2025-07-17 13:57:08 -04:00			`todo(glsl \| msl) dispatch 2 1 1`
vkd3d-shader/dxil: Emit 16-bit raw TGSMs as minimum precision. 2025-05-29 19:03:53 +02:00			`probe uav 0 (0) u32(0x14321)`
			`probe uav 0 (1) u32(0x14321)`
			`probe uav 0 (2) u32(0x14321)`
			`probe uav 0 (3) u32(0x14321)`
			`probe uav 0 (4) u32(0x24321)`
			`probe uav 0 (5) u32(0x24321)`
			`probe uav 0 (6) u32(0x24321)`
			`probe uav 0 (7) u32(0x24321)`
tests/hlsl: Test TGSMs with minimum precision unsigned integers. 2025-05-29 18:45:18 +02:00
tests/hlsl: Add a TGSM 16-bit test. 2025-02-04 14:05:13 +10:00			`[require]`
			`shader model >= 6.2`
			`native-16-bit`

			`[uav 1]`
			`format r32-typeless`
			`size (raw_buffer, 4)`

			`0xf000f 0xf000f 0xf000f 0xf000f`

			`[compute shader]`
			`RWByteAddressBuffer u : register(u1);`
			`groupshared uint16_t m[4];`

			`[numthreads(4, 1, 1)]`
			`void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)`
			`{`
			`uint16_t id = (uint16_t)group_id.x;`
			`/* 16-bit atomic ops are not supported. */`
			`m[local_idx] = id + 8 * (local_idx + 1);`
			`GroupMemoryBarrierWithGroupSync();`
			`m[local_idx] += m[local_idx ^ 1];`
			`GroupMemoryBarrierWithGroupSync();`
			`u.Store(8 * group_id.x + 2 * local_idx, m[local_idx]);`
			`}`

			`[test]`
			`dispatch 2 1 1`
tests/shader_runner: Introduce the "u32" probe format. 2025-06-06 00:25:11 +02:00			`probe uav 1 (0) u32(0x180018)`
			`probe uav 1 (1) u32(0x380038)`
			`probe uav 1 (2) u32(0x1a001a)`
			`probe uav 1 (3) u32(0x3a003a)`