mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-09-12 18:50:22 -07:00
350 lines
7.5 KiB
Plaintext
350 lines
7.5 KiB
Plaintext
[require]
|
|
% SM < 5.0 requires that groupshared variables are arrays of N elements,
|
|
% where N is the number of threads in a group, and they are only ever indexed
|
|
% by SV_GroupIndex when storing (though violating this last condition gives
|
|
% E_NOTIMPL rather than E_FAIL).
|
|
shader model >= 4.0
|
|
shader model < 5.0
|
|
|
|
[compute shader fail todo]
|
|
RWByteAddressBuffer u : register(u0);
|
|
groupshared uint m;
|
|
|
|
[numthreads(4, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex)
|
|
{
|
|
if (local_idx == 0)
|
|
m = 0;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
u.Store(4 * local_idx, m);
|
|
}
|
|
|
|
[compute shader fail todo]
|
|
RWByteAddressBuffer u : register(u0);
|
|
groupshared uint m[1];
|
|
|
|
[numthreads(4, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex)
|
|
{
|
|
if (local_idx == 0)
|
|
m[0] = 0;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
u.Store(4 * local_idx, m[0]);
|
|
}
|
|
|
|
[compute shader notimpl]
|
|
RWByteAddressBuffer u : register(u0);
|
|
groupshared uint m[4];
|
|
|
|
[numthreads(4, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex)
|
|
{
|
|
if (local_idx == 0)
|
|
m[0] = 0;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
u.Store(4 * local_idx, m[0]);
|
|
}
|
|
|
|
[require]
|
|
shader model >= 4.0
|
|
|
|
[uav 0]
|
|
format r32-typeless
|
|
size (raw_buffer, 4)
|
|
|
|
0 0 0 0
|
|
|
|
[compute shader todo]
|
|
RWByteAddressBuffer u : register(u0);
|
|
groupshared uint m[4];
|
|
|
|
[numthreads(4, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex)
|
|
{
|
|
m[local_idx] = 0xcafef00d;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
u.Store(4 * local_idx, m[0]);
|
|
}
|
|
|
|
[test]
|
|
todo(sm<6 | msl) dispatch 1 1 1
|
|
probe uav 0 (0) u32(0xcafef00d)
|
|
probe uav 0 (1) u32(0xcafef00d)
|
|
probe uav 0 (2) u32(0xcafef00d)
|
|
probe uav 0 (3) u32(0xcafef00d)
|
|
|
|
[require]
|
|
shader model >= 5.0
|
|
|
|
[uav 1]
|
|
format r32-typeless
|
|
size (raw_buffer, 4)
|
|
|
|
1 0 0 0
|
|
|
|
[compute shader]
|
|
RWByteAddressBuffer u : register(u1);
|
|
groupshared uint m;
|
|
|
|
[numthreads(32, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
|
|
{
|
|
if (!local_idx)
|
|
m = group_id.x;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
InterlockedAdd(m, group_id.x);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (!local_idx)
|
|
u.Store(4 * group_id.x, m);
|
|
}
|
|
|
|
[test]
|
|
todo(glsl | msl) dispatch 4 1 1
|
|
probe uav 1 (0) u32(0)
|
|
probe uav 1 (1) u32(33)
|
|
probe uav 1 (2) u32(66)
|
|
probe uav 1 (3) u32(99)
|
|
|
|
|
|
[uav 1]
|
|
format r32-typeless
|
|
size (raw_buffer, 4)
|
|
|
|
1 0 0 0
|
|
|
|
[compute shader]
|
|
RWByteAddressBuffer u : register(u1);
|
|
groupshared int m;
|
|
|
|
[numthreads(32, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
|
|
{
|
|
if (!local_idx)
|
|
m = group_id.x;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
InterlockedAdd(m, -group_id.x);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (!local_idx)
|
|
u.Store(4 * group_id.x, m);
|
|
}
|
|
|
|
[test]
|
|
todo(glsl | msl) dispatch 4 1 1
|
|
probe uav 1 (0) i32(0)
|
|
probe uav 1 (1) i32(-31)
|
|
probe uav 1 (2) i32(-62)
|
|
probe uav 1 (3) i32(-93)
|
|
|
|
|
|
[uav 1]
|
|
format r32-float
|
|
size (buffer, 8)
|
|
|
|
1 1 1 1 0 0 0 0
|
|
|
|
[uav 2]
|
|
format r32-sint
|
|
size (buffer, 8)
|
|
|
|
1 1 1 1 0 0 0 0
|
|
|
|
[compute shader todo]
|
|
uniform uint idx;
|
|
|
|
#define GROUP_SIZE 4
|
|
|
|
struct data
|
|
{
|
|
float f;
|
|
uint u;
|
|
};
|
|
|
|
RWBuffer<float> u : register(u1);
|
|
RWBuffer<uint> u2 : register(u2);
|
|
groupshared data m[GROUP_SIZE];
|
|
|
|
[numthreads(GROUP_SIZE, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID,
|
|
uint thread_id : SV_DispatchThreadID)
|
|
{
|
|
uint i;
|
|
if (!local_idx)
|
|
{
|
|
for (i = 0; i < GROUP_SIZE; ++i)
|
|
{
|
|
m[i].f = group_id.x;
|
|
m[i].u = group_id.x;
|
|
}
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
InterlockedAdd(m[0].u, 2);
|
|
InterlockedAdd(m[idx].u, 1);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
for (i = 0; i < local_idx; ++i)
|
|
{
|
|
m[local_idx].f += group_id.x;
|
|
m[local_idx].u += group_id.x;
|
|
}
|
|
u[thread_id.x] = m[local_idx].f;
|
|
u2[thread_id.x] = m[local_idx].u;
|
|
}
|
|
|
|
[test]
|
|
uniform 0 uint 1
|
|
todo(sm<6 | msl) dispatch 2 1 1
|
|
probe uav 1 (0) f32(0.0)
|
|
probe uav 1 (1) f32(0.0)
|
|
probe uav 1 (2) f32(0.0)
|
|
probe uav 1 (3) f32(0.0)
|
|
probe uav 1 (4) f32(1.0)
|
|
probe uav 1 (5) f32(2.0)
|
|
probe uav 1 (6) f32(3.0)
|
|
probe uav 1 (7) f32(4.0)
|
|
probe uav 2 (0) i32(8)
|
|
probe uav 2 (1) i32(4)
|
|
probe uav 2 (2) i32(0)
|
|
probe uav 2 (3) i32(0)
|
|
probe uav 2 (4) i32(9)
|
|
probe uav 2 (5) i32(6)
|
|
probe uav 2 (6) i32(3)
|
|
probe uav 2 (7) i32(4)
|
|
|
|
|
|
[uav 1]
|
|
format r32-typeless
|
|
size (raw_buffer, 1)
|
|
|
|
0
|
|
|
|
[compute shader]
|
|
RWByteAddressBuffer u : register(u1);
|
|
groupshared uint m;
|
|
|
|
[numthreads(32, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex)
|
|
{
|
|
uint orig;
|
|
if (!local_idx)
|
|
m = 7;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
InterlockedCompareExchange(m, local_idx, local_idx + 32, orig);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (!local_idx)
|
|
u.Store(0, m);
|
|
}
|
|
|
|
[test]
|
|
todo(glsl | msl) dispatch 1 1 1
|
|
probe uav 1 (0) u32(39)
|
|
|
|
[uav 0]
|
|
format r32-typeless
|
|
size (raw_buffer, 8)
|
|
|
|
0xf000f 0xf000f 0xf000f 0xf000f
|
|
0xf000f 0xf000f 0xf000f 0xf000f
|
|
|
|
[compute shader todo]
|
|
RWByteAddressBuffer u : register(u0);
|
|
groupshared min16uint m[4];
|
|
|
|
[numthreads(4, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
|
|
{
|
|
min16uint id = (min16uint)group_id.x;
|
|
/* 16-bit atomic ops are not supported. */
|
|
m[local_idx] = id + 8 * (local_idx + 1);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
m[local_idx] += m[local_idx ^ 1];
|
|
GroupMemoryBarrierWithGroupSync();
|
|
/* Depending on the driver and D3D version, two or four bytes are written
|
|
* when using a minimum precision type, so we explicitly cast. */
|
|
u.Store(16 * group_id.x + 4 * local_idx, (uint)m[local_idx]);
|
|
}
|
|
|
|
[test]
|
|
todo(sm<6 | msl) dispatch 2 1 1
|
|
probe uav 0 (0) u32(0x18)
|
|
probe uav 0 (1) u32(0x18)
|
|
probe uav 0 (2) u32(0x38)
|
|
probe uav 0 (3) u32(0x38)
|
|
probe uav 0 (4) u32(0x1a)
|
|
probe uav 0 (5) u32(0x1a)
|
|
probe uav 0 (6) u32(0x3a)
|
|
probe uav 0 (7) u32(0x3a)
|
|
|
|
[uav 0]
|
|
format r32-typeless
|
|
size (raw_buffer, 8)
|
|
|
|
0xf000f 0xf000f 0xf000f 0xf000f
|
|
0xf000f 0xf000f 0xf000f 0xf000f
|
|
|
|
[compute shader]
|
|
RWByteAddressBuffer u : register(u0);
|
|
groupshared min16uint m;
|
|
|
|
[numthreads(4, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
|
|
{
|
|
if (local_idx == 0)
|
|
m = local_idx + 1;
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (local_idx == 1)
|
|
m += (local_idx + 1) << (4 * local_idx);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (local_idx == 2)
|
|
m += (local_idx + 1) << (4 * local_idx);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (local_idx == 3)
|
|
m += (local_idx + 1) << (4 * local_idx);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
uint m_local = m;
|
|
m_local += (group_id + 1) << 16;
|
|
u.Store(16 * group_id + 4 * local_idx, m_local);
|
|
}
|
|
|
|
[test]
|
|
todo(glsl | msl) dispatch 2 1 1
|
|
probe uav 0 (0) u32(0x14321)
|
|
probe uav 0 (1) u32(0x14321)
|
|
probe uav 0 (2) u32(0x14321)
|
|
probe uav 0 (3) u32(0x14321)
|
|
probe uav 0 (4) u32(0x24321)
|
|
probe uav 0 (5) u32(0x24321)
|
|
probe uav 0 (6) u32(0x24321)
|
|
probe uav 0 (7) u32(0x24321)
|
|
|
|
[require]
|
|
shader model >= 6.2
|
|
native-16-bit
|
|
|
|
[uav 1]
|
|
format r32-typeless
|
|
size (raw_buffer, 4)
|
|
|
|
0xf000f 0xf000f 0xf000f 0xf000f
|
|
|
|
[compute shader]
|
|
RWByteAddressBuffer u : register(u1);
|
|
groupshared uint16_t m[4];
|
|
|
|
[numthreads(4, 1, 1)]
|
|
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
|
|
{
|
|
uint16_t id = (uint16_t)group_id.x;
|
|
/* 16-bit atomic ops are not supported. */
|
|
m[local_idx] = id + 8 * (local_idx + 1);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
m[local_idx] += m[local_idx ^ 1];
|
|
GroupMemoryBarrierWithGroupSync();
|
|
u.Store(8 * group_id.x + 2 * local_idx, m[local_idx]);
|
|
}
|
|
|
|
[test]
|
|
dispatch 2 1 1
|
|
probe uav 1 (0) u32(0x180018)
|
|
probe uav 1 (1) u32(0x380038)
|
|
probe uav 1 (2) u32(0x1a001a)
|
|
probe uav 1 (3) u32(0x3a003a)
|