Files
UnrealEngineUWP/Engine/Shaders/Private/Strata/StrataMaterialClassification.usf
Sebastien Hillaire 8e389aec73 Strata - SSS data rework and optimisation for classification and SSS. Gain quite a bit of performance back on low end deferred platforms.
Now header is only uint and extra data is only read if needed.

#rb none
#preflight https://horde.devtools.epicgames.com/job/62821984046b81bf9393b136
#fyi charles.derousiers

[CL 20221584 by Sebastien Hillaire in ue5-main branch]
2022-05-16 05:54:34 -04:00

426 lines
17 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
#include "/Engine/Private/Common.ush"
#define STRATA_INLINE_SHADING 0
#define STRATA_SSS_MATERIAL_OVERRIDE 0
#include "/Engine/Private/Strata/Strata.ush"
#include "StrataTile.ush"
#define GROUP_THREAD_COUNT (STRATA_TILE_SIZE * STRATA_TILE_SIZE)
////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_TILE_CATEGORIZATION
int bRectPrimitive;
int2 ViewResolution;
uint MaxBytesPerPixel;
Texture2D<uint> TopLayerTexture;
Texture2DArray<uint> MaterialTextureArray;
// Indirect draw data buffer for all tile types
RWBuffer<uint> TileDrawIndirectDataBuffer;
RWBuffer<uint> SimpleTileListDataBuffer;
RWBuffer<uint> SingleTileListDataBuffer;
RWBuffer<uint> ComplexTileListDataBuffer;
#if STRATA_OPAQUE_ROUGH_REFRACTION_ENABLED
RWBuffer<uint> OpaqueRoughRefractionTileListDataBuffer;
RWBuffer<uint> SSSWithoutOpaqueRoughRefractionTileListDataBuffer;
Texture2D<float3> OpaqueRoughRefractionTexture;
#endif
#if !PERMUTATION_WAVE_OPS
groupshared uint s_TileFlags[GROUP_THREAD_COUNT];
#endif
#if PERMUTATION_STRATA_CLEAR_DURING_CATEGORIZATION
RWTexture2DArray<uint> SSSTextureUAV;
#endif
[numthreads(STRATA_TILE_SIZE, STRATA_TILE_SIZE, 1)]
void TileMainCS(uint2 DispatchThreadId : SV_DispatchThreadID, uint LinearIndex : SV_GroupIndex, uint3 GroupId : SV_GroupID)
{
if (all(DispatchThreadId == 0))
{
const uint IndexCountPerInstance = bRectPrimitive > 0 ? 4 : 6;
TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SIMPLE) + 0] = IndexCountPerInstance;
TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SINGLE) + 0] = IndexCountPerInstance;
TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_COMPLEX) + 0] = IndexCountPerInstance;
TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_ROUGH_REFRACT) + 0] = IndexCountPerInstance;
TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SSS_WITHOUT_ROUGH_REFRACT) + 0] = IndexCountPerInstance;
}
const uint2 PixelCoord = DispatchThreadId.xy + View.ViewRectMin.xy;
const bool bIsValid = all(DispatchThreadId.xy < View.ViewSizeAndInvSize.xy);
// Pixels outside of the view area are considered simple to enable screen borders to receive the simple permutation when not aligned to shader group size.
bool bContainsComplexMaterial = false;
bool bContainsSimpleMaterial = false;
bool bContainsSingleMaterial = false;
bool bContainsStrataMaterial = false;
bool bContainsOpaqueRoughRefraction = false;
bool bContainsScreenSpaceSubsurfaceScattering = false;
FStrataOpaqueRoughRefractionData OpaqueRoughRefractionData = (FStrataOpaqueRoughRefractionData)0;
if (bIsValid)
{
FStrataAddressing StrataAddressing = GetStrataPixelDataByteOffset(PixelCoord, uint2(View.BufferSizeAndInvSize.xy), MaxBytesPerPixel);
FStrataPixelHeader StrataPixelHeader = UnpackStrataHeaderIn(MaterialTextureArray, StrataAddressing, TopLayerTexture);
const bool bIsSimple = IsSimpleMaterial(StrataPixelHeader) || StrataPixelHeader.BSDFCount == 0; // BSDFCount == 0 ensures that non-strata pixel, like sky pixels, won't make a simple tile flagged as complex
const bool bIsSingle = !IsSimpleMaterial(StrataPixelHeader) && IsSingleMaterial(StrataPixelHeader);
bContainsStrataMaterial = StrataPixelHeader.BSDFCount > 0;
bContainsSimpleMaterial = bIsSimple;
bContainsSingleMaterial = bIsSingle;
bContainsComplexMaterial = !bIsSingle & !bIsSimple;
bContainsScreenSpaceSubsurfaceScattering = HasSubsurface(StrataPixelHeader);
#if STRATA_OPAQUE_ROUGH_REFRACTION_ENABLED
OpaqueRoughRefractionData = StrataUnpackOpaqueRoughRefractionData(OpaqueRoughRefractionTexture[PixelCoord]);
bContainsOpaqueRoughRefraction = OpaqueRoughRefractionData.OpaqueRoughRefractionEnabled > 0.0f;
#endif
}
#if PERMUTATION_STRATA_CLEAR_DURING_CATEGORIZATION
BRANCH
if (!bContainsScreenSpaceSubsurfaceScattering)
{
// We must fill all the pixel which does not have subsurface scattering by default so that the SSS code is not executed where it should not.
SSSTextureUAV[uint3(PixelCoord, 0)] = 0; // This is a good clear for FStrataSubsurfaceHeader, and we only need to clear the header.
}
#endif
#if PERMUTATION_WAVE_OPS
const bool bTileContainsStrata = WaveActiveAnyTrue(bContainsStrataMaterial);
// const bool bTileContainsSimple = WaveActiveAnyTrue(bContainsSimpleMaterial);
const bool bTileContainsSingle = WaveActiveAnyTrue(bContainsSingleMaterial);
const bool bTileContainsComplex = WaveActiveAnyTrue(bContainsComplexMaterial);
const bool bTileContainsOpaqueRoughRefraction = WaveActiveAnyTrue(bContainsOpaqueRoughRefraction);
const bool bTileContainsScreenSpaceSubsurfaceScattering = WaveActiveAnyTrue(bContainsScreenSpaceSubsurfaceScattering);
#else // PERMUTATION_WAVE_OPS
s_TileFlags[LinearIndex] =
(bContainsStrataMaterial ? 0x1u : 0u) |
// (bContainsSimpleMaterial ? 0x2u : 0u) |
(bContainsSingleMaterial ? 0x4u : 0u) |
(bContainsComplexMaterial ? 0x8u : 0u) |
(bContainsOpaqueRoughRefraction ? 0x10u : 0u) |
(bContainsScreenSpaceSubsurfaceScattering ? 0x20u : 0u);
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 32)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 32];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 16)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 16];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 8)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 8];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 4)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 4];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 2)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 2];
}
GroupMemoryBarrierWithGroupSync();
const uint FinalTileFlags = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 1];
const bool bTileContainsStrata = (FinalTileFlags & 0x1u) > 0;
// const bool bTileContainsSimple = (FinalTileFlags & 0x2u) > 0;
const bool bTileContainsSingle = (FinalTileFlags & 0x4u) > 0;
const bool bTileContainsComplex = (FinalTileFlags & 0x8u) > 0;
const bool bTileContainsOpaqueRoughRefraction = (FinalTileFlags & 0x10u) > 0;
const bool bTileContainsScreenSpaceSubsurfaceScattering = (FinalTileFlags & 0x20u) > 0;
#endif // PERMUTATION_WAVE_OPS
if (LinearIndex < 1 && bTileContainsStrata)
{
uint EncodedTile = StrataPackTile(GroupId.xy);
if (bTileContainsComplex)
{
uint WriteToIndex;
InterlockedAdd(TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_COMPLEX) + 1], 1, WriteToIndex);
ComplexTileListDataBuffer[WriteToIndex] = EncodedTile;
}
else if (bTileContainsSingle)
{
uint WriteToIndex;
InterlockedAdd(TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SINGLE) + 1], 1, WriteToIndex);
SingleTileListDataBuffer[WriteToIndex] = EncodedTile;
}
else // (bTileContainsSimple)
{
uint WriteToIndex;
InterlockedAdd(TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SIMPLE) + 1], 1, WriteToIndex);
SimpleTileListDataBuffer[WriteToIndex] = EncodedTile;
}
#if STRATA_OPAQUE_ROUGH_REFRACTION_ENABLED
if (bTileContainsOpaqueRoughRefraction)
{
uint WriteToIndex;
InterlockedAdd(TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_ROUGH_REFRACT) + 1], 1, WriteToIndex);
OpaqueRoughRefractionTileListDataBuffer[WriteToIndex] = EncodedTile;
}
if(bTileContainsScreenSpaceSubsurfaceScattering && !bTileContainsOpaqueRoughRefraction)
{
uint WriteToIndex;
InterlockedAdd(TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SSS_WITHOUT_ROUGH_REFRACT) + 1], 1, WriteToIndex);
SSSWithoutOpaqueRoughRefractionTileListDataBuffer[WriteToIndex] = EncodedTile;
}
#endif
}
}
#endif // SHADER_TILE_CATEGORIZATION
////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_MATERIAL_TILE_PREPARE_ARGS
Buffer<uint> TileDrawIndirectDataBuffer;
RWBuffer<uint> TileDispatchIndirectDataBuffer;
[numthreads(1, 1, 1)]
void ArgsMainCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
if (all(DispatchThreadId == 0))
{
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SIMPLE) + 0] = TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SIMPLE) + 1];
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SINGLE) + 0] = TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SINGLE) + 1];
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_COMPLEX) + 0] = TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_COMPLEX) + 1];
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_ROUGH_REFRACT) + 0] = TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_ROUGH_REFRACT) + 1];
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SSS_WITHOUT_ROUGH_REFRACT) + 0] = TileDrawIndirectDataBuffer[GetStrataTileTypeDrawIndirectArgOffset_DWord(STRATA_TILE_TYPE_SSS_WITHOUT_ROUGH_REFRACT) + 1];
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SIMPLE) + 1] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SINGLE) + 1] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_COMPLEX) + 1] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_ROUGH_REFRACT) + 1] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SSS_WITHOUT_ROUGH_REFRACT) + 1] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SIMPLE) + 2] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SINGLE) + 2] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_COMPLEX) + 2] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_ROUGH_REFRACT) + 2] = 1;
TileDispatchIndirectDataBuffer[GetStrataTileTypeDispatchIndirectArgOffset_DWord(STRATA_TILE_TYPE_SSS_WITHOUT_ROUGH_REFRACT) + 2] = 1;
}
}
#endif // SHADER_MATERIAL_TILE_PREPARE_ARGS
////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_BSDF_TILE_PREPARE_ARGS
int2 TileCount_Primary;
int2 TileOffset_Primary;
int2 OverflowTileCount;
int2 OverflowTileOffset;
Buffer<uint> TileDrawIndirectDataBuffer;
RWBuffer<uint> TileDispatchIndirectDataBuffer;
RWBuffer<uint> TileDispatchPerThreadIndirectDataBuffer;
uint DivideAndRoundUp(uint X, uint Y) { return (X + Y - 1) / Y; }
[numthreads(1, 1, 1)]
void ArgsMainCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
if (all(DispatchThreadId == 0))
{
const uint TileCount = TileDrawIndirectDataBuffer[0].x;
const uint DispatchX = min(TileCount, uint(OverflowTileCount.x));
const uint DispatchY = DivideAndRoundUp(TileCount, OverflowTileCount.x);
TileDispatchIndirectDataBuffer[0] = DispatchX;
TileDispatchIndirectDataBuffer[1] = DispatchY;
TileDispatchIndirectDataBuffer[2] = 1;
TileDispatchPerThreadIndirectDataBuffer[0] = DivideAndRoundUp(DispatchX, STRATA_TILE_SIZE);
TileDispatchPerThreadIndirectDataBuffer[1] = DivideAndRoundUp(DispatchY, STRATA_TILE_SIZE);
TileDispatchPerThreadIndirectDataBuffer[2] = 1;
}
}
#endif // SHADER_BSDF_TILE_PREPARE_ARGS
////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_CLEAR_MATERIAL_BUFFER
RWTexture2DArray<uint> MaterialTextureArrayUAV;
RWTexture2DArray<uint> SSSTextureUAV;
uint MaxBytesPerPixel;
int2 TiledViewBufferResolution;
[numthreads(8, 8, 1)]
void ClearMaterialBufferMainCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
if (any(int2(DispatchThreadId.xy) >= TiledViewBufferResolution))
{
return;
}
// Custom clear of the Strata material buffer.
// The first layer of tiled uints contains the header that we need to clear so we only write a single uint per pixel instead of clearing the entire buffer which would be too slow.
FStrataPixelHeader StrataHeader = InitialiseStrataPixelHeader();
uint BSDFCount = 0;
uint PackedStrataHeader = PackStrataHeader(BSDFCount, StrataHeader);
const uint FirstSlice = 0;
MaterialTextureArrayUAV[uint3(DispatchThreadId.xy, FirstSlice)] = PackedStrataHeader;
SSSTextureUAV[uint3(DispatchThreadId.xy, 0)] = 0; // This is a good clear for FStrataSubsurfaceHeader, and we only need to clear the header.
}
#endif // SHADER_CLEAR_MATERIAL_BUFFER
////////////////////////////////////////////////////////////////////////////////////////////////////////////
#if SHADER_BSDF_TILE
int2 ViewResolution;
uint MaxBytesPerPixel;
uint TileSizeLog2;
int2 TileCount_Primary;
int2 TileOffset_Primary;
int2 OverflowTileCount;
int2 OverflowTileOffset;
Texture2D<uint> TopLayerTexture;
Texture2DArray<uint> MaterialTextureArray;
Buffer<uint> TileListBuffer;
RWTexture2D<uint> RWBSDFTileTexture;
RWTexture2D<uint> RWBSDFOffsetTexture;
RWBuffer<uint> RWBSDFTileCountBuffer;
#if !PERMUTATION_WAVE_OPS
groupshared uint s_TileBSDFCount[GROUP_THREAD_COUNT];
#endif
uint2 LinearCoordToTileCoord(uint InLinear)
{
return uint2(InLinear % OverflowTileCount.x, InLinear / OverflowTileCount.x) + OverflowTileOffset;
}
[numthreads(STRATA_TILE_SIZE, STRATA_TILE_SIZE, 1)]
void BSDFTileMainCS(uint2 GroupThreadId : SV_GroupThreadID, uint2 GroupId : SV_GroupID, uint LinearIndex : SV_GroupIndex)
{
const uint2 TileCoord = StrataUnpackTile(TileListBuffer[GroupId.x]);
const uint2 PixelCoord = TileCoord * STRATA_TILE_SIZE + GroupThreadId;
uint BSDFCount = 0;
if (all(PixelCoord < uint2(ViewResolution)))
{
FStrataAddressing StrataAddressing = GetStrataPixelDataByteOffset(PixelCoord, uint2(View.BufferSizeAndInvSize.xy), MaxBytesPerPixel);
FStrataPixelHeader StrataPixelHeader = UnpackStrataHeaderIn(MaterialTextureArray, StrataAddressing, TopLayerTexture);
BSDFCount = min(StrataPixelHeader.BSDFCount, STRATA_MAX_BSDF_COUNT_FOR_BDSFOFFSET);
if (BSDFCount > 0)
{
FStrataBSDFOffset Offsets = (FStrataBSDFOffset)0;
Offsets.BSDFCount = BSDFCount;
UNROLL_N(STRATA_MAX_BSDF_COUNT_FOR_BDSFOFFSET)
for (uint BSDFIndex = 0; BSDFIndex < BSDFCount; ++BSDFIndex)
{
Offsets.BSDFOffsets[BSDFIndex] = StrataAddressing.CurrentIndex;
UnpackStrataBSDFIn(MaterialTextureArray, StrataAddressing, StrataPixelHeader);
}
RWBSDFOffsetTexture[PixelCoord] = PackBSDFOffset(Offsets);
}
}
#if PERMUTATION_WAVE_OPS
const uint TileBSDFCount = WaveActiveMax(BSDFCount);
#else // PERMUTATION_WAVE_OPS
s_TileBSDFCount[LinearIndex] = BSDFCount;
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 32)
{
s_TileBSDFCount[LinearIndex] = max(s_TileBSDFCount[LinearIndex], s_TileBSDFCount[LinearIndex + 32]);
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 16)
{
s_TileBSDFCount[LinearIndex] = max(s_TileBSDFCount[LinearIndex], s_TileBSDFCount[LinearIndex + 16]);
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 8)
{
s_TileBSDFCount[LinearIndex] = max(s_TileBSDFCount[LinearIndex], s_TileBSDFCount[LinearIndex + 8]);
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 4)
{
s_TileBSDFCount[LinearIndex] = max(s_TileBSDFCount[LinearIndex], s_TileBSDFCount[LinearIndex + 4]);
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 2)
{
s_TileBSDFCount[LinearIndex] = max(s_TileBSDFCount[LinearIndex], s_TileBSDFCount[LinearIndex + 2]);
}
GroupMemoryBarrierWithGroupSync();
const uint TileBSDFCount = max(s_TileBSDFCount[LinearIndex], s_TileBSDFCount[LinearIndex + 1]);
#endif // PERMUTATION_WAVE_OPS
if (LinearIndex < 1)
{
if (TileBSDFCount > 1)
{
uint LinearTileCoord = 0;
InterlockedAdd(RWBSDFTileCountBuffer[0], TileBSDFCount - 1, LinearTileCoord);
FStrataBSDFTile Tile;
Tile.Count = TileBSDFCount;
Tile.Index = 0;
// For first/parent tile, set TileCoord to point towards the first 'overflow' tile
Tile.TileCoord = LinearCoordToTileCoord(LinearTileCoord);
RWBSDFTileTexture[TileCoord] = PackBSDFTile(Tile);
// For 'overflow' tiles, set TileCoord to point towards the first/parent tile
Tile.TileCoord = TileCoord;
for (uint BSDFIndex = 1; BSDFIndex < TileBSDFCount; ++BSDFIndex)
{
const uint2 OverflowCoord = LinearCoordToTileCoord(LinearTileCoord + BSDFIndex - 1);
Tile.Index = BSDFIndex;
RWBSDFTileTexture[OverflowCoord] = PackBSDFTile(Tile);
}
}
else
{
RWBSDFTileTexture[GroupId] = 0;
}
}
}
#endif // SHADER_BSDF_TILE