[require] % SM < 5.0 requires that groupshared variables are arrays of N elements, % where N is the number of threads in a group, and they are only ever indexed % by SV_GroupIndex when storing (though violating this last condition gives % E_NOTIMPL rather than E_FAIL). shader model >= 4.0 shader model < 5.0 [compute shader fail todo] RWByteAddressBuffer u : register(u0); groupshared uint m; [numthreads(4, 1, 1)] void main(uint local_idx : SV_GroupIndex) { if (local_idx == 0) m = 0; GroupMemoryBarrierWithGroupSync(); u.Store(4 * local_idx, m); } [compute shader fail todo] RWByteAddressBuffer u : register(u0); groupshared uint m[1]; [numthreads(4, 1, 1)] void main(uint local_idx : SV_GroupIndex) { if (local_idx == 0) m[0] = 0; GroupMemoryBarrierWithGroupSync(); u.Store(4 * local_idx, m[0]); } [compute shader notimpl] RWByteAddressBuffer u : register(u0); groupshared uint m[4]; [numthreads(4, 1, 1)] void main(uint local_idx : SV_GroupIndex) { if (local_idx == 0) m[0] = 0; GroupMemoryBarrierWithGroupSync(); u.Store(4 * local_idx, m[0]); } [require] shader model >= 4.0 [uav 0] format r32-typeless size (raw_buffer, 4) 0 0 0 0 [compute shader todo] RWByteAddressBuffer u : register(u0); groupshared uint m[4]; [numthreads(4, 1, 1)] void main(uint local_idx : SV_GroupIndex) { m[local_idx] = 0xcafef00d; GroupMemoryBarrierWithGroupSync(); u.Store(4 * local_idx, m[0]); } [test] todo(sm<6 | msl) dispatch 1 1 1 probe uav 0 (0) u32(0xcafef00d) probe uav 0 (1) u32(0xcafef00d) probe uav 0 (2) u32(0xcafef00d) probe uav 0 (3) u32(0xcafef00d) [require] shader model >= 5.0 [uav 1] format r32-typeless size (raw_buffer, 4) 1 0 0 0 [compute shader] RWByteAddressBuffer u : register(u1); groupshared uint m; [numthreads(32, 1, 1)] void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) { if (!local_idx) m = group_id.x; GroupMemoryBarrierWithGroupSync(); InterlockedAdd(m, group_id.x); GroupMemoryBarrierWithGroupSync(); if (!local_idx) u.Store(4 * group_id.x, m); } [test] todo(glsl | msl) dispatch 4 1 1 probe uav 1 (0) u32(0) probe uav 1 (1) u32(33) probe uav 1 (2) u32(66) probe uav 1 (3) u32(99) [uav 1] format r32-typeless size (raw_buffer, 4) 1 0 0 0 [compute shader] RWByteAddressBuffer u : register(u1); groupshared int m; [numthreads(32, 1, 1)] void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) { if (!local_idx) m = group_id.x; GroupMemoryBarrierWithGroupSync(); InterlockedAdd(m, -group_id.x); GroupMemoryBarrierWithGroupSync(); if (!local_idx) u.Store(4 * group_id.x, m); } [test] todo(glsl | msl) dispatch 4 1 1 probe uav 1 (0) i32(0) probe uav 1 (1) i32(-31) probe uav 1 (2) i32(-62) probe uav 1 (3) i32(-93) [uav 1] format r32-float size (buffer, 8) 1 1 1 1 0 0 0 0 [uav 2] format r32-sint size (buffer, 8) 1 1 1 1 0 0 0 0 [compute shader todo] uniform uint idx; #define GROUP_SIZE 4 struct data { float f; uint u; }; RWBuffer u : register(u1); RWBuffer u2 : register(u2); groupshared data m[GROUP_SIZE]; [numthreads(GROUP_SIZE, 1, 1)] void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID, uint thread_id : SV_DispatchThreadID) { uint i; if (!local_idx) { for (i = 0; i < GROUP_SIZE; ++i) { m[i].f = group_id.x; m[i].u = group_id.x; } } GroupMemoryBarrierWithGroupSync(); InterlockedAdd(m[0].u, 2); InterlockedAdd(m[idx].u, 1); GroupMemoryBarrierWithGroupSync(); for (i = 0; i < local_idx; ++i) { m[local_idx].f += group_id.x; m[local_idx].u += group_id.x; } u[thread_id.x] = m[local_idx].f; u2[thread_id.x] = m[local_idx].u; } [test] uniform 0 uint 1 todo(sm<6 | msl) dispatch 2 1 1 probe uav 1 (0) f32(0.0) probe uav 1 (1) f32(0.0) probe uav 1 (2) f32(0.0) probe uav 1 (3) f32(0.0) probe uav 1 (4) f32(1.0) probe uav 1 (5) f32(2.0) probe uav 1 (6) f32(3.0) probe uav 1 (7) f32(4.0) probe uav 2 (0) i32(8) probe uav 2 (1) i32(4) probe uav 2 (2) i32(0) probe uav 2 (3) i32(0) probe uav 2 (4) i32(9) probe uav 2 (5) i32(6) probe uav 2 (6) i32(3) probe uav 2 (7) i32(4) [uav 1] format r32-typeless size (raw_buffer, 1) 0 [compute shader] RWByteAddressBuffer u : register(u1); groupshared uint m; [numthreads(32, 1, 1)] void main(uint local_idx : SV_GroupIndex) { uint orig; if (!local_idx) m = 7; GroupMemoryBarrierWithGroupSync(); InterlockedCompareExchange(m, local_idx, local_idx + 32, orig); GroupMemoryBarrierWithGroupSync(); if (!local_idx) u.Store(0, m); } [test] todo(glsl | msl) dispatch 1 1 1 probe uav 1 (0) u32(39) [uav 0] format r32-typeless size (raw_buffer, 8) 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f [compute shader todo] RWByteAddressBuffer u : register(u0); groupshared min16uint m[4]; [numthreads(4, 1, 1)] void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) { min16uint id = (min16uint)group_id.x; /* 16-bit atomic ops are not supported. */ m[local_idx] = id + 8 * (local_idx + 1); GroupMemoryBarrierWithGroupSync(); m[local_idx] += m[local_idx ^ 1]; GroupMemoryBarrierWithGroupSync(); /* Depending on the driver and D3D version, two or four bytes are written * when using a minimum precision type, so we explicitly cast. */ u.Store(16 * group_id.x + 4 * local_idx, (uint)m[local_idx]); } [test] todo(sm<6 | msl) dispatch 2 1 1 probe uav 0 (0) u32(0x18) probe uav 0 (1) u32(0x18) probe uav 0 (2) u32(0x38) probe uav 0 (3) u32(0x38) probe uav 0 (4) u32(0x1a) probe uav 0 (5) u32(0x1a) probe uav 0 (6) u32(0x3a) probe uav 0 (7) u32(0x3a) [uav 0] format r32-typeless size (raw_buffer, 8) 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f 0xf000f [compute shader] RWByteAddressBuffer u : register(u0); groupshared min16uint m; [numthreads(4, 1, 1)] void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) { if (local_idx == 0) m = local_idx + 1; GroupMemoryBarrierWithGroupSync(); if (local_idx == 1) m += (local_idx + 1) << (4 * local_idx); GroupMemoryBarrierWithGroupSync(); if (local_idx == 2) m += (local_idx + 1) << (4 * local_idx); GroupMemoryBarrierWithGroupSync(); if (local_idx == 3) m += (local_idx + 1) << (4 * local_idx); GroupMemoryBarrierWithGroupSync(); uint m_local = m; m_local += (group_id + 1) << 16; u.Store(16 * group_id + 4 * local_idx, m_local); } [test] todo(glsl | msl) dispatch 2 1 1 probe uav 0 (0) u32(0x14321) probe uav 0 (1) u32(0x14321) probe uav 0 (2) u32(0x14321) probe uav 0 (3) u32(0x14321) probe uav 0 (4) u32(0x24321) probe uav 0 (5) u32(0x24321) probe uav 0 (6) u32(0x24321) probe uav 0 (7) u32(0x24321) [require] shader model >= 6.2 native-16-bit [uav 1] format r32-typeless size (raw_buffer, 4) 0xf000f 0xf000f 0xf000f 0xf000f [compute shader] RWByteAddressBuffer u : register(u1); groupshared uint16_t m[4]; [numthreads(4, 1, 1)] void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) { uint16_t id = (uint16_t)group_id.x; /* 16-bit atomic ops are not supported. */ m[local_idx] = id + 8 * (local_idx + 1); GroupMemoryBarrierWithGroupSync(); m[local_idx] += m[local_idx ^ 1]; GroupMemoryBarrierWithGroupSync(); u.Store(8 * group_id.x + 2 * local_idx, m[local_idx]); } [test] dispatch 2 1 1 probe uav 1 (0) u32(0x180018) probe uav 1 (1) u32(0x380038) probe uav 1 (2) u32(0x1a001a) probe uav 1 (3) u32(0x3a003a)