Files
vkd3d/tests/hlsl/tgsm.shader_test

350 lines
7.5 KiB
Plaintext
Raw Permalink Normal View History

[require]
% SM < 5.0 requires that groupshared variables are arrays of N elements,
% where N is the number of threads in a group, and they are only ever indexed
% by SV_GroupIndex when storing (though violating this last condition gives
% E_NOTIMPL rather than E_FAIL).
shader model >= 4.0
shader model < 5.0
[compute shader fail todo]
RWByteAddressBuffer u : register(u0);
groupshared uint m;
[numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
if (local_idx == 0)
m = 0;
GroupMemoryBarrierWithGroupSync();
u.Store(4 * local_idx, m);
}
[compute shader fail todo]
RWByteAddressBuffer u : register(u0);
groupshared uint m[1];
[numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
if (local_idx == 0)
m[0] = 0;
GroupMemoryBarrierWithGroupSync();
u.Store(4 * local_idx, m[0]);
}
[compute shader notimpl]
RWByteAddressBuffer u : register(u0);
groupshared uint m[4];
[numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
if (local_idx == 0)
m[0] = 0;
GroupMemoryBarrierWithGroupSync();
u.Store(4 * local_idx, m[0]);
}
[require]
shader model >= 4.0
[uav 0]
format r32-typeless
size (raw_buffer, 4)
0 0 0 0
[compute shader todo]
RWByteAddressBuffer u : register(u0);
groupshared uint m[4];
[numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
m[local_idx] = 0xcafef00d;
GroupMemoryBarrierWithGroupSync();
u.Store(4 * local_idx, m[0]);
}
[test]
todo(sm<6 | msl) dispatch 1 1 1
probe uav 0 (0) u32(0xcafef00d)
probe uav 0 (1) u32(0xcafef00d)
probe uav 0 (2) u32(0xcafef00d)
probe uav 0 (3) u32(0xcafef00d)
2024-03-12 14:45:39 +10:00
[require]
shader model >= 5.0
[uav 1]
format r32-typeless
size (raw_buffer, 4)
2024-03-12 14:45:39 +10:00
1 0 0 0
[compute shader]
2024-03-12 14:45:39 +10:00
RWByteAddressBuffer u : register(u1);
groupshared uint m;
[numthreads(32, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
if (!local_idx)
m = group_id.x;
GroupMemoryBarrierWithGroupSync();
InterlockedAdd(m, group_id.x);
GroupMemoryBarrierWithGroupSync();
if (!local_idx)
u.Store(4 * group_id.x, m);
}
[test]
todo(glsl | msl) dispatch 4 1 1
probe uav 1 (0) u32(0)
probe uav 1 (1) u32(33)
probe uav 1 (2) u32(66)
probe uav 1 (3) u32(99)
2024-03-12 14:45:39 +10:00
[uav 1]
format r32-typeless
size (raw_buffer, 4)
2024-03-12 14:45:39 +10:00
1 0 0 0
[compute shader]
2024-03-12 14:45:39 +10:00
RWByteAddressBuffer u : register(u1);
groupshared int m;
[numthreads(32, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
if (!local_idx)
m = group_id.x;
GroupMemoryBarrierWithGroupSync();
InterlockedAdd(m, -group_id.x);
GroupMemoryBarrierWithGroupSync();
if (!local_idx)
u.Store(4 * group_id.x, m);
}
[test]
todo(glsl | msl) dispatch 4 1 1
probe uav 1 (0) i32(0)
probe uav 1 (1) i32(-31)
probe uav 1 (2) i32(-62)
probe uav 1 (3) i32(-93)
2024-03-12 14:45:39 +10:00
[uav 1]
format r32-float
2024-03-12 14:45:39 +10:00
size (buffer, 8)
1 1 1 1 0 0 0 0
[uav 2]
format r32-sint
2024-03-12 14:45:39 +10:00
size (buffer, 8)
1 1 1 1 0 0 0 0
[compute shader todo]
uniform uint idx;
#define GROUP_SIZE 4
struct data
{
float f;
uint u;
};
RWBuffer<float> u : register(u1);
RWBuffer<uint> u2 : register(u2);
groupshared data m[GROUP_SIZE];
[numthreads(GROUP_SIZE, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID,
uint thread_id : SV_DispatchThreadID)
{
uint i;
if (!local_idx)
{
for (i = 0; i < GROUP_SIZE; ++i)
{
m[i].f = group_id.x;
m[i].u = group_id.x;
}
}
GroupMemoryBarrierWithGroupSync();
InterlockedAdd(m[0].u, 2);
InterlockedAdd(m[idx].u, 1);
GroupMemoryBarrierWithGroupSync();
for (i = 0; i < local_idx; ++i)
{
m[local_idx].f += group_id.x;
m[local_idx].u += group_id.x;
}
u[thread_id.x] = m[local_idx].f;
u2[thread_id.x] = m[local_idx].u;
}
[test]
uniform 0 uint 1
todo(sm<6 | msl) dispatch 2 1 1
probe uav 1 (0) f32(0.0)
probe uav 1 (1) f32(0.0)
probe uav 1 (2) f32(0.0)
probe uav 1 (3) f32(0.0)
probe uav 1 (4) f32(1.0)
probe uav 1 (5) f32(2.0)
probe uav 1 (6) f32(3.0)
probe uav 1 (7) f32(4.0)
probe uav 2 (0) i32(8)
probe uav 2 (1) i32(4)
probe uav 2 (2) i32(0)
probe uav 2 (3) i32(0)
probe uav 2 (4) i32(9)
probe uav 2 (5) i32(6)
probe uav 2 (6) i32(3)
probe uav 2 (7) i32(4)
[uav 1]
format r32-typeless
size (raw_buffer, 1)
0
[compute shader]
RWByteAddressBuffer u : register(u1);
groupshared uint m;
[numthreads(32, 1, 1)]
void main(uint local_idx : SV_GroupIndex)
{
uint orig;
if (!local_idx)
m = 7;
GroupMemoryBarrierWithGroupSync();
InterlockedCompareExchange(m, local_idx, local_idx + 32, orig);
GroupMemoryBarrierWithGroupSync();
if (!local_idx)
u.Store(0, m);
}
[test]
todo(glsl | msl) dispatch 1 1 1
probe uav 1 (0) u32(39)
2025-02-04 14:05:13 +10:00
[uav 0]
format r32-typeless
size (raw_buffer, 8)
0xf000f 0xf000f 0xf000f 0xf000f
0xf000f 0xf000f 0xf000f 0xf000f
[compute shader todo]
RWByteAddressBuffer u : register(u0);
groupshared min16uint m[4];
[numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
min16uint id = (min16uint)group_id.x;
/* 16-bit atomic ops are not supported. */
m[local_idx] = id + 8 * (local_idx + 1);
GroupMemoryBarrierWithGroupSync();
m[local_idx] += m[local_idx ^ 1];
GroupMemoryBarrierWithGroupSync();
/* Depending on the driver and D3D version, two or four bytes are written
* when using a minimum precision type, so we explicitly cast. */
u.Store(16 * group_id.x + 4 * local_idx, (uint)m[local_idx]);
}
[test]
todo(sm<6 | msl) dispatch 2 1 1
probe uav 0 (0) u32(0x18)
probe uav 0 (1) u32(0x18)
probe uav 0 (2) u32(0x38)
probe uav 0 (3) u32(0x38)
probe uav 0 (4) u32(0x1a)
probe uav 0 (5) u32(0x1a)
probe uav 0 (6) u32(0x3a)
probe uav 0 (7) u32(0x3a)
[uav 0]
format r32-typeless
size (raw_buffer, 8)
0xf000f 0xf000f 0xf000f 0xf000f
0xf000f 0xf000f 0xf000f 0xf000f
[compute shader]
RWByteAddressBuffer u : register(u0);
groupshared min16uint m;
[numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
if (local_idx == 0)
m = local_idx + 1;
GroupMemoryBarrierWithGroupSync();
if (local_idx == 1)
m += (local_idx + 1) << (4 * local_idx);
GroupMemoryBarrierWithGroupSync();
if (local_idx == 2)
m += (local_idx + 1) << (4 * local_idx);
GroupMemoryBarrierWithGroupSync();
if (local_idx == 3)
m += (local_idx + 1) << (4 * local_idx);
GroupMemoryBarrierWithGroupSync();
uint m_local = m;
m_local += (group_id + 1) << 16;
u.Store(16 * group_id + 4 * local_idx, m_local);
}
[test]
todo(glsl | msl) dispatch 2 1 1
probe uav 0 (0) u32(0x14321)
probe uav 0 (1) u32(0x14321)
probe uav 0 (2) u32(0x14321)
probe uav 0 (3) u32(0x14321)
probe uav 0 (4) u32(0x24321)
probe uav 0 (5) u32(0x24321)
probe uav 0 (6) u32(0x24321)
probe uav 0 (7) u32(0x24321)
2025-02-04 14:05:13 +10:00
[require]
shader model >= 6.2
native-16-bit
[uav 1]
format r32-typeless
size (raw_buffer, 4)
0xf000f 0xf000f 0xf000f 0xf000f
[compute shader]
RWByteAddressBuffer u : register(u1);
groupshared uint16_t m[4];
[numthreads(4, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
uint16_t id = (uint16_t)group_id.x;
/* 16-bit atomic ops are not supported. */
m[local_idx] = id + 8 * (local_idx + 1);
GroupMemoryBarrierWithGroupSync();
m[local_idx] += m[local_idx ^ 1];
GroupMemoryBarrierWithGroupSync();
u.Store(8 * group_id.x + 2 * local_idx, m[local_idx]);
}
[test]
dispatch 2 1 1
probe uav 1 (0) u32(0x180018)
probe uav 1 (1) u32(0x380038)
probe uav 1 (2) u32(0x1a001a)
probe uav 1 (3) u32(0x3a003a)