Files
UnrealEngineUWP/Engine/Shaders/Private/ByteBuffer.usf
tiago costa fd1b310235 Support FRWBuffer scattered updates.
- Modified MemsetResource and MemcpyResource to take number of elements instead of number of bytes as input, since FRWBuffer can use different formats.

#rb Yuriy.ODonnell
#jira UE-120168
#preflight 612f98feb6b29c00013fbb13

[CL 17394373 by tiago costa in ue5-main branch]
2021-09-01 19:20:17 -04:00

186 lines
6.0 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
#include "Common.ush"
#define RESOURCE_TYPE_FLOAT4_BUFFER (0)
#define RESOURCE_TYPE_FLOAT4_STRUCTURED_BUFFER (1)
#define RESOURCE_TYPE_UINT_BUFFER (2)
#define RESOURCE_TYPE_UINT4_ALIGNED_BUFFER (3)
#define RESOURCE_TYPE_FLOAT4_TEXTURE (4)
uint Value;
uint Size;
uint Float4sPerLine;
uint NumScatters;
uint SrcOffset;
uint DstOffset;
#if RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_BUFFER
StructuredBuffer<uint> ScatterStructuredBuffer;
Buffer<float4> SrcBuffer;
StructuredBuffer<float4> UploadStructuredBuffer;
RWBuffer<float4> DstBuffer;
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_STRUCTURED_BUFFER
StructuredBuffer<uint> ScatterStructuredBuffer;
StructuredBuffer<float4> SrcStructuredBuffer;
StructuredBuffer<float4> UploadStructuredBuffer;
RWStructuredBuffer<float4> DstStructuredBuffer;
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_TEXTURE
StructuredBuffer<uint> ScatterStructuredBuffer;
Texture2D<float4> SrcTexture;
StructuredBuffer<float4> UploadStructuredBuffer;
RWTexture2D<float4> DstTexture;
#elif RESOURCE_TYPE == RESOURCE_TYPE_UINT_BUFFER || RESOURCE_TYPE == RESOURCE_TYPE_UINT4_ALIGNED_BUFFER
ByteAddressBuffer ScatterByteAddressBuffer;
ByteAddressBuffer SrcByteAddressBuffer;
ByteAddressBuffer UploadByteAddressBuffer;
RWByteAddressBuffer DstByteAddressBuffer;
#endif
[numthreads(64, 1, 1)]
void MemsetBufferCS( uint ThreadId : SV_DispatchThreadID )
{
#if RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_BUFFER
// Size is in float4s
if (ThreadId < Size)
{
DstBuffer[DstOffset + ThreadId] = asfloat(Value);
}
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_STRUCTURED_BUFFER
// Size is in float4s
if( ThreadId < Size )
{
DstStructuredBuffer[ DstOffset + ThreadId ] = asfloat( Value );
}
#elif RESOURCE_TYPE == RESOURCE_TYPE_UINT_BUFFER || RESOURCE_TYPE == RESOURCE_TYPE_UINT4_ALIGNED_BUFFER
// Size and offset is in dwords
uint DstIndex = DstOffset + ThreadId * 4;
if( ThreadId * 4 + 3 < Size )
{
DstByteAddressBuffer.Store4( DstIndex * 4, uint4( Value, Value, Value, Value ) );
}
else if( ThreadId * 4 + 2 < Size )
{
DstByteAddressBuffer.Store3( DstIndex * 4, uint3( Value, Value, Value ) );
}
else if( ThreadId * 4 + 1 < Size )
{
DstByteAddressBuffer.Store2( DstIndex * 4, uint2( Value, Value ) );
}
else if( ThreadId * 4 < Size )
{
DstByteAddressBuffer.Store( DstIndex * 4, Value );
}
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_TEXTURE
uint2 IndexTexture;
IndexTexture.y = ThreadId / (Float4sPerLine);
IndexTexture.x = ThreadId % (Float4sPerLine);
if (ThreadId < Size)
{
DstTexture[IndexTexture.xy] = asfloat(Value);
}
#else
#error "Not implemented"
#endif
}
[numthreads(64, 1, 1)]
void MemcpyCS( uint ThreadId : SV_DispatchThreadID )
{
#if RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_BUFFER
// Size is in float4s
if (ThreadId < Size)
{
DstBuffer[DstOffset + ThreadId] = SrcBuffer[SrcOffset + ThreadId];
}
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_STRUCTURED_BUFFER
// Size is in float4s
if( ThreadId < Size )
{
DstStructuredBuffer[ DstOffset + ThreadId ] = SrcStructuredBuffer[ SrcOffset + ThreadId ];
}
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_TEXTURE
uint2 IndexTexture;
IndexTexture.y = ThreadId / (Float4sPerLine);
IndexTexture.x = ThreadId % (Float4sPerLine);
if(ThreadId < Size)
{
float4 SrcValue = SrcTexture.Load(float3(IndexTexture.x, IndexTexture.y, 0));
DstTexture[IndexTexture.xy] = SrcValue;
}
#elif RESOURCE_TYPE == RESOURCE_TYPE_UINT_BUFFER || RESOURCE_TYPE == RESOURCE_TYPE_UINT4_ALIGNED_BUFFER
// Size and offsets are in dwords
uint SrcIndex = SrcOffset + ThreadId * 4;
uint DstIndex = DstOffset + ThreadId * 4;
if( ThreadId * 4 + 3 < Size )
{
uint4 SrcData = SrcByteAddressBuffer.Load4( SrcIndex * 4 );
DstByteAddressBuffer.Store4( DstIndex * 4, SrcData );
}
else if( ThreadId * 4 + 2 < Size )
{
uint3 SrcData = SrcByteAddressBuffer.Load3( SrcIndex * 4 );
DstByteAddressBuffer.Store3( DstIndex * 4, SrcData );
}
else if( ThreadId * 4 + 1 < Size )
{
uint2 SrcData = SrcByteAddressBuffer.Load2( SrcIndex * 4 );
DstByteAddressBuffer.Store2( DstIndex * 4, SrcData );
}
else if( ThreadId * 4 < Size )
{
uint SrcData = SrcByteAddressBuffer.Load( SrcIndex * 4 );
DstByteAddressBuffer.Store( DstIndex * 4, SrcData );
}
#else
#error "Not implemented"
#endif
}
[numthreads(64, 1, 1)]
void ScatterCopyCS( uint DispatchThreadId : SV_DispatchThreadID )
{
uint ThreadId = DispatchThreadId + SrcOffset;
uint ScatterIndex = ThreadId / Size;
uint ScatterOffset = ThreadId - ScatterIndex * Size;
if( ScatterIndex < NumScatters )
{
#if RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_BUFFER
uint DstIndex = ScatterStructuredBuffer[ScatterIndex] * Size + ScatterOffset;
uint SrcIndex = ThreadId;
DstBuffer[DstIndex] = UploadStructuredBuffer[SrcIndex];
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_STRUCTURED_BUFFER
uint DstIndex = ScatterStructuredBuffer[ ScatterIndex ] * Size + ScatterOffset;
uint SrcIndex = ThreadId;
DstStructuredBuffer[ DstIndex ] = UploadStructuredBuffer[ SrcIndex ];
#elif RESOURCE_TYPE == RESOURCE_TYPE_UINT4_ALIGNED_BUFFER
uint DstIndex = ScatterByteAddressBuffer.Load( ScatterIndex * 4 ) * Size + ScatterOffset;
uint SrcIndex = ThreadId;
uint4 SrcData = UploadByteAddressBuffer.Load4( SrcIndex * 16 );
DstByteAddressBuffer.Store4( DstIndex * 16, SrcData );
#elif RESOURCE_TYPE == RESOURCE_TYPE_FLOAT4_TEXTURE
uint DstIndex = ScatterStructuredBuffer[ScatterIndex] * Size + ScatterOffset;
uint SrcIndex = ThreadId;
uint2 IndexTexture;
IndexTexture.y = DstIndex / (Float4sPerLine);
IndexTexture.x = DstIndex % (Float4sPerLine);
float4 srvResourceVal = UploadStructuredBuffer[SrcIndex];
DstTexture[IndexTexture.xy] = srvResourceVal;
#elif RESOURCE_TYPE == RESOURCE_TYPE_UINT_BUFFER
uint DstIndex = ScatterByteAddressBuffer.Load( ScatterIndex * 4 ) * Size + ScatterOffset;
uint SrcIndex = ThreadId;
uint SrcData = UploadByteAddressBuffer.Load( SrcIndex * 4 );
DstByteAddressBuffer.Store(DstIndex * 4, SrcData);
#else
#error "Not implemented"
#endif
}
}