2020-12-08 09:06:58 -04:00
// Copyright Epic Games, Inc. All Rights Reserved.
2023-03-31 05:34:25 -04:00
2020-12-08 09:06:58 -04:00
#include "/Engine/Private/Common.ush"
2023-09-01 15:06:19 -04:00
#define SUBSTRATE_INLINE_SHADING 0
#define SUBSTRATE_SSS_MATERIAL_OVERRIDE 0
#define SUBSTRATE_COMPLEXSPECIALPATH 1
2023-08-31 04:46:35 -04:00
#include "/Engine/Private/Substrate/Substrate.ush"
#include "SubstrateTile.ush"
2020-12-08 09:06:58 -04:00
2023-09-01 15:06:19 -04:00
#define GROUP_THREAD_COUNT (SUBSTRATE_TILE_SIZE * SUBSTRATE_TILE_SIZE)
2021-01-28 07:26:46 -04:00
2021-01-27 08:51:50 -04:00
////////////////////////////////////////////////////////////////////////////////////////////////////////////
2021-01-28 07:26:46 -04:00
#if SHADER_TILE_CATEGORIZATION
int bRectPrimitive;
2021-01-29 18:01:50 -04:00
int2 ViewResolution;
2021-11-26 10:59:06 -05:00
uint MaxBytesPerPixel;
2023-09-01 15:06:19 -04:00
int FirstSliceStoringSubstrateSSSData;
2023-11-10 19:19:46 -05:00
Texture2D<SUBSTRATE_TOP_LAYER_TYPE> TopLayerTexture;
2022-08-16 04:30:21 -04:00
#if PERMUTATION_CMASK
Texture2D<uint> TopLayerCmaskTexture;
#endif
2022-08-29 02:55:17 -04:00
RWTexture2DArray<uint> MaterialTextureArrayUAV;
2022-03-15 09:55:16 -04:00
2023-12-15 02:18:24 -05:00
uint TileEncoding;
2023-09-01 15:06:19 -04:00
uint4 TileListBufferOffsets[SUBSTRATE_TILE_TYPE_COUNT];
2023-05-23 07:22:42 -04:00
uint GetTileListBufferOffsets(uint Type)
{
return TileListBufferOffsets[Type].x;
}
2022-03-15 09:55:16 -04:00
2023-05-23 07:22:42 -04:00
// Indirect draw data buffer for all tile types
RWBuffer<uint> TileDrawIndirectDataBufferUAV;
RWBuffer<uint> TileListBufferUAV;
2021-01-28 07:26:46 -04:00
2022-09-20 09:05:17 -04:00
#if PERMUTATION_DECAL
Texture2D<float4> DBufferATexture;
Texture2D<float4> DBufferBTexture;
Texture2D<float4> DBufferCTexture;
Texture2D<uint> DBufferRenderMask;
SamplerState DBufferATextureSampler;
SamplerState DBufferBTextureSampler;
SamplerState DBufferCTextureSampler;
// @param BufferUV - UV space in the DBuffer textures
uint GetDBufferTargetMask(uint2 PixelPos)
{
#if PLATFORM_SUPPORTS_RENDERTARGET_WRITE_MASK
return DecodeRTWriteMask(PixelPos, DBufferRenderMask, 3);
#elif PLATFORM_SUPPORTS_PER_PIXEL_DBUFFER_MASK
uint Mask = DBufferRenderMask.Load(uint3(PixelPos, 0));
return Mask > 0 ? 0x07 : 0x00;
#else
// For debug purpose:
// return
// (DBufferATexture.Load(uint3(PixelPos, 0)).a < 1.f ? 0x1 : 0x0) |
// (DBufferBTexture.Load(uint3(PixelPos, 0)).a < 1.f ? 0x2 : 0x0) |
// (DBufferCTexture.Load(uint3(PixelPos, 0)).a < 1.f ? 0x3 : 0x0) ;
return 0x07;
#endif
}
#endif // PERMUTATION_DECAL
2023-09-01 15:06:19 -04:00
#if SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED
2022-03-16 11:42:57 -04:00
Texture2D<float3> OpaqueRoughRefractionTexture;
2023-09-01 15:06:19 -04:00
#endif // SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED
2022-03-15 03:11:08 -04:00
2021-11-24 13:13:31 -05:00
#if !PERMUTATION_WAVE_OPS
2021-12-14 10:24:57 -05:00
groupshared uint s_TileFlags[GROUP_THREAD_COUNT];
2021-11-24 13:13:31 -05:00
#endif
2021-01-28 07:26:46 -04:00
2022-10-24 03:32:31 -04:00
#if PERMUTATION_WAVE_OPS && COMPILER_SUPPORTS_WAVE_SIZE
WAVESIZE(64) // PERMUTATION_WAVE_OPS is true only when wave>=64 are available
#endif
2023-09-01 15:06:19 -04:00
[numthreads(SUBSTRATE_TILE_SIZE, SUBSTRATE_TILE_SIZE, 1)]
2021-01-28 07:26:46 -04:00
void TileMainCS(uint2 DispatchThreadId : SV_DispatchThreadID, uint LinearIndex : SV_GroupIndex, uint3 GroupId : SV_GroupID)
{
2022-09-20 09:59:15 -04:00
// Init primitive index
2023-09-01 15:06:19 -04:00
if (DispatchThreadId.x < SUBSTRATE_TILE_TYPE_COUNT && DispatchThreadId.y == 0)
2021-01-27 08:51:50 -04:00
{
2022-09-20 09:59:15 -04:00
const uint TileType = DispatchThreadId.x;
2022-03-15 09:55:16 -04:00
const uint IndexCountPerInstance = bRectPrimitive > 0 ? 4 : 6;
2023-09-01 15:06:19 -04:00
TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(TileType) + 0] = IndexCountPerInstance;
2021-01-27 08:51:50 -04:00
}
2022-04-02 14:31:01 -04:00
const uint2 PixelCoord = DispatchThreadId.xy + View.ViewRectMin.xy;
2022-09-20 09:05:17 -04:00
const bool bIsValid = all(DispatchThreadId.xy < uint2(View.ViewSizeAndInvSize.xy));
2022-08-16 04:30:21 -04:00
// If CMask data are available, we use it as a coarse evaluation to know if a tile contains any data.
// * If the tile is entirely empty: we clear the header & SSS data
// * If the data contains any data: we do fine grain checking, and clear header & SSS data only for needed pixels. The top layer data texture is used
// to know if a pixel is valid or not (since the material header is not cleared when the Cmask permutation is used).
#if PERMUTATION_CMASK
// Coarse test for clearing header (& SSS data) based on CMask data
const uint CMask = TopLayerCmaskTexture.Load(uint3(GroupId.xy, 0));
BRANCH
if (CMask == 0x0)
{
2022-08-29 02:55:17 -04:00
MaterialTextureArrayUAV[uint3(PixelCoord, 0)] = 0u;
2023-09-01 15:06:19 -04:00
SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, 0u); // This is a good clear for FSubstrateSubsurfaceHeader, and we only need to clear the header.
2022-08-16 04:30:21 -04:00
}
else
{
#endif
2021-01-29 18:01:50 -04:00
// Pixels outside of the view area are considered simple to enable screen borders to receive the simple permutation when not aligned to shader group size.
2023-05-24 07:16:08 -04:00
bool bContainsComplexSpecialMaterial = false;
2022-09-20 09:05:17 -04:00
bool bContainsComplexMaterial = false;
bool bContainsSimpleMaterial = false;
bool bContainsSingleMaterial = false;
2023-09-01 15:06:19 -04:00
bool bContainsSubstrateMaterial = false;
2022-09-20 09:05:17 -04:00
bool bContainsDecals = false;
bool bContainsOpaqueRoughRefraction = false;
bool bContainsScreenSpaceSubsurfaceScattering = false;
2023-09-01 15:06:19 -04:00
FSubstrateOpaqueRoughRefractionData OpaqueRoughRefractionData = (FSubstrateOpaqueRoughRefractionData)0;
2022-04-02 14:31:01 -04:00
if (bIsValid)
2021-01-27 08:51:50 -04:00
{
2023-09-01 15:06:19 -04:00
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelCoord, uint2(View.BufferSizeAndInvSize.xy), MaxBytesPerPixel);
2022-08-29 02:55:17 -04:00
// Load mini header.
const uint PackedHeader = MaterialTextureArrayUAV[uint3(PixelCoord, 0)];
2023-09-01 15:06:19 -04:00
FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(PackedHeader, SubstrateAddressing, TopLayerTexture);
2021-11-26 10:59:06 -05:00
2023-10-13 04:02:22 -04:00
const bool bIsSimple = SubstratePixelHeader.IsSimpleMaterial() || SubstratePixelHeader.ClosureCount == 0; // BSDFCount == 0 ensures that non-Substrate pixel, like sky pixels, won't make a simple tile flagged as complex
2023-09-01 15:06:19 -04:00
const bool bIsSingle = !SubstratePixelHeader.IsSimpleMaterial() && SubstratePixelHeader.IsSingleMaterial();
2023-10-05 08:25:19 -04:00
const bool bIsComplexSpecial = SubstratePixelHeader.IsComplexSpecialMaterial();
2023-10-13 04:02:22 -04:00
bContainsSubstrateMaterial = SubstratePixelHeader.ClosureCount > 0;
2021-12-14 10:24:57 -05:00
bContainsSimpleMaterial = bIsSimple;
bContainsSingleMaterial = bIsSingle;
2023-05-24 10:56:39 -04:00
bContainsComplexMaterial = !bIsSingle && !bIsSimple && !bIsComplexSpecial;
bContainsComplexSpecialMaterial = !bIsSingle && !bIsSimple && bIsComplexSpecial;
2023-05-24 07:16:08 -04:00
2023-09-01 15:06:19 -04:00
bContainsScreenSpaceSubsurfaceScattering = SubstratePixelHeader.HasSubsurface();
2022-03-15 03:11:08 -04:00
2022-09-20 09:05:17 -04:00
#if PERMUTATION_DECAL
const uint DBufferResponseMask = SceneStencilTexture.Load(uint3(PixelCoord, 0)) STENCIL_COMPONENT_SWIZZLE;
const uint DBufferTargetMask = GetDBufferTargetMask(PixelCoord);
bContainsDecals = DBufferResponseMask != 0 && DBufferTargetMask != 0;
#endif
2023-09-01 15:06:19 -04:00
#if SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED
OpaqueRoughRefractionData = SubstrateUnpackOpaqueRoughRefractionData(OpaqueRoughRefractionTexture[PixelCoord]);
2022-03-16 11:42:57 -04:00
bContainsOpaqueRoughRefraction = OpaqueRoughRefractionData.OpaqueRoughRefractionEnabled > 0.0f;
2022-08-16 04:30:21 -04:00
#endif
2021-11-24 13:13:31 -05:00
2022-09-15 02:08:13 -04:00
// Output/Patch SSS data for legacy encoding (this allows to save ALU & bandwidth during the base pass0
2022-09-14 03:23:07 -04:00
uint OptimisedLegacyMode = ((PackedHeader >> (HEADER_SINGLEENCODING_BIT_COUNT)) & HEADER_SINGLE_OPTLEGACYMODE_BIT_MASK);
2023-01-23 06:57:50 -05:00
const bool bIsLegacyWrapOrWrapThin = OptimisedLegacyMode == SINGLE_OPTLEGACYMODE_SSSWRAP || OptimisedLegacyMode == SINGLE_OPTLEGACYMODE_SSSWRAP_THIN; // Wrap and Wrap thin have same packing
2022-09-15 02:08:13 -04:00
const bool bIsLegacySSSProfile = OptimisedLegacyMode == SINGLE_OPTLEGACYMODE_SSSPROFILE;
2023-01-23 06:57:50 -05:00
if (bIsSingle && (bIsLegacyWrapOrWrapThin || bIsLegacySSSProfile))
2022-09-14 03:23:07 -04:00
{
bContainsScreenSpaceSubsurfaceScattering = true;
2023-01-23 06:57:50 -05:00
if (bIsLegacyWrapOrWrapThin)
2022-09-15 02:08:13 -04:00
{
const uint PackedSSSWOpacity7bits = (PackedHeader >> (HEADER_SINGLEENCODING_BIT_COUNT + HEADER_SINGLE_OPTLEGACYMODE_BIT_COUNT)) & 0x7F;
2023-09-01 15:06:19 -04:00
FSubstrateSubsurfaceHeader SSSHeader = (FSubstrateSubsurfaceHeader)0;
SubstrateSubSurfaceHeaderSetSSSType(SSSHeader, SSS_TYPE_WRAP);
SubstrateSubSurfaceHeaderSetWrapOpacity(SSSHeader, UnpackR7(PackedSSSWOpacity7bits));
SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, SSSHeader.Bytes);
2022-09-15 02:08:13 -04:00
}
else if (bIsLegacySSSProfile)
{
const uint Data1 = MaterialTextureArrayUAV[uint3(PixelCoord, 1)];
const uint Data2 = MaterialTextureArrayUAV[uint3(PixelCoord, 2)];
float RadiusScale = UnpackR8(Data1 >> 24);
float ProfileId = UnpackR8(Data2 >> 24);
2022-09-14 03:23:07 -04:00
2022-09-15 02:08:13 -04:00
const uint PackedDiffuse20Bits = (Data1 & 0xFFFFF);
const float3 BaseColor = UnpackR7G7B6Gamma2(PackedDiffuse20Bits);
2022-09-14 03:23:07 -04:00
2023-09-01 15:06:19 -04:00
FSubstrateSubsurfaceHeader SSSHeader = (FSubstrateSubsurfaceHeader)0;
SubstrateSubSurfaceHeaderSetSSSType(SSSHeader, SSS_TYPE_DIFFUSION_PROFILE);
SubstrateSubSurfaceHeaderSetProfile(SSSHeader, RadiusScale, SubstrateSubsurfaceProfileIdTo8bits(ProfileId));
2022-09-15 02:08:13 -04:00
2023-09-01 15:06:19 -04:00
FSubstrateSubsurfaceExtras SSSExtras = (FSubstrateSubsurfaceExtras)0;
SubstrateSubsurfaceExtrasSetBaseColor(SSSExtras, BaseColor);
2022-09-15 02:08:13 -04:00
2023-09-01 15:06:19 -04:00
SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, SSSHeader.Bytes);
SubstrateStoreSubsurfaceExtras(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, SSSExtras.Bytes);
2022-09-15 02:08:13 -04:00
}
2022-09-14 03:23:07 -04:00
}
2022-08-16 04:30:21 -04:00
// Fine grain test for clearing based on CMask data
#if PERMUTATION_CMASK
// Fine grain check if clear is needed
bool bClearHeader = false;
BRANCH
if (CMask > 0u && CMask < 0xF)
{
2023-11-10 01:47:08 -05:00
bClearHeader = !SubstrateIsTopLayerMaterial(TopLayerTexture.Load(uint3(PixelCoord, 0)));
2022-08-16 04:30:21 -04:00
}
// Header clear
BRANCH
if (bClearHeader)
{
2022-08-29 02:55:17 -04:00
MaterialTextureArrayUAV[uint3(PixelCoord, 0)] = 0u;
2022-08-16 04:30:21 -04:00
}
#endif
}
2021-12-06 08:54:37 -05:00
BRANCH
2022-03-16 05:45:43 -04:00
if (!bContainsScreenSpaceSubsurfaceScattering)
2021-12-06 08:54:37 -05:00
{
2022-05-16 05:54:34 -04:00
// We must fill all the pixel which does not have subsurface scattering by default so that the SSS code is not executed where it should not.
2023-09-01 15:06:19 -04:00
SubstrateStoreSubsurfaceHeader(MaterialTextureArrayUAV, FirstSliceStoringSubstrateSSSData, PixelCoord, 0u); // This is a good clear for FSubstrateSubsurfaceHeader, and we only need to clear the header.
2021-12-06 08:54:37 -05:00
}
2021-11-24 13:13:31 -05:00
#if PERMUTATION_WAVE_OPS
2023-09-08 17:21:13 -04:00
const bool bTileContainsSubstrate = WaveActiveAnyTrue(bContainsSubstrateMaterial);
2022-09-20 09:05:17 -04:00
const bool bTileContainsSimple = WaveActiveAnyTrue(bContainsSimpleMaterial);
2022-03-16 05:45:43 -04:00
const bool bTileContainsSingle = WaveActiveAnyTrue(bContainsSingleMaterial);
const bool bTileContainsComplex = WaveActiveAnyTrue(bContainsComplexMaterial);
2023-05-24 07:16:08 -04:00
const bool bTileContainsComplexSpecial = WaveActiveAnyTrue(bContainsComplexSpecialMaterial);
2022-03-16 05:45:43 -04:00
const bool bTileContainsOpaqueRoughRefraction = WaveActiveAnyTrue(bContainsOpaqueRoughRefraction);
2022-04-21 10:18:23 -04:00
const bool bTileContainsScreenSpaceSubsurfaceScattering = WaveActiveAnyTrue(bContainsScreenSpaceSubsurfaceScattering);
2022-09-20 09:05:17 -04:00
const bool bTileContainsDecals = WaveActiveAnyTrue(bContainsDecals);
2021-12-14 10:24:57 -05:00
#else // PERMUTATION_WAVE_OPS
s_TileFlags[LinearIndex] =
2023-09-01 15:06:19 -04:00
(bContainsSubstrateMaterial ? 0x1u : 0u)
2022-09-20 09:05:17 -04:00
| (bContainsSimpleMaterial ? 0x2u : 0u)
| (bContainsSingleMaterial ? 0x4u : 0u)
| (bContainsComplexMaterial ? 0x8u : 0u)
2023-05-24 07:16:08 -04:00
| (bContainsComplexSpecialMaterial ? 0x10u : 0u)
| (bContainsOpaqueRoughRefraction ? 0x20u : 0u)
| (bContainsScreenSpaceSubsurfaceScattering ? 0x40u : 0u)
| (bContainsDecals ? 0x80u : 0u);
2021-12-14 10:24:57 -05:00
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 32)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 32];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 16)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 16];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 8)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 8];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 4)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 4];
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 2)
{
s_TileFlags[LinearIndex] = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 2];
}
GroupMemoryBarrierWithGroupSync();
const uint FinalTileFlags = s_TileFlags[LinearIndex] | s_TileFlags[LinearIndex + 1];
2023-09-08 17:21:13 -04:00
const bool bTileContainsSubstrate = (FinalTileFlags & 0x1u) > 0;
2022-09-20 09:05:17 -04:00
const bool bTileContainsSimple = (FinalTileFlags & 0x2u) > 0;
2022-03-16 05:45:43 -04:00
const bool bTileContainsSingle = (FinalTileFlags & 0x4u) > 0;
const bool bTileContainsComplex = (FinalTileFlags & 0x8u) > 0;
2023-05-24 07:16:08 -04:00
const bool bTileContainsComplexSpecial = (FinalTileFlags & 0x10u) > 0;
const bool bTileContainsOpaqueRoughRefraction = (FinalTileFlags & 0x20u) > 0;
const bool bTileContainsScreenSpaceSubsurfaceScattering = (FinalTileFlags & 0x40u) > 0;
const bool bTileContainsDecals = (FinalTileFlags & 0x80u) > 0;
2021-12-14 10:24:57 -05:00
#endif // PERMUTATION_WAVE_OPS
2021-11-24 13:13:31 -05:00
2023-09-01 15:06:19 -04:00
if (LinearIndex < 1 && bTileContainsSubstrate)
2021-11-24 13:13:31 -05:00
{
2023-12-15 02:18:24 -05:00
uint EncodedTile = SubstratePackTile(GroupId.xy, TileEncoding);
2022-03-16 05:45:43 -04:00
2023-05-24 07:16:08 -04:00
if (bTileContainsComplexSpecial)
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_COMPLEX_SPECIAL) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_COMPLEX_SPECIAL) + WriteToIndex] = EncodedTile;
2023-05-24 07:16:08 -04:00
}
2023-09-08 17:21:13 -04:00
else if (bTileContainsComplex)
2021-11-24 13:13:31 -05:00
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_COMPLEX) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_COMPLEX) + WriteToIndex] = EncodedTile;
2021-11-24 13:13:31 -05:00
}
2022-03-16 05:45:43 -04:00
else if (bTileContainsSingle)
2021-12-14 10:24:57 -05:00
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_SINGLE) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_SINGLE) + WriteToIndex] = EncodedTile;
2021-12-14 10:24:57 -05:00
}
2022-03-16 05:45:43 -04:00
else // (bTileContainsSimple)
2021-11-24 13:13:31 -05:00
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_SIMPLE) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_SIMPLE) + WriteToIndex] = EncodedTile;
2021-11-24 13:13:31 -05:00
}
2022-03-15 03:11:08 -04:00
2023-09-01 15:06:19 -04:00
#if SUBSTRATE_OPAQUE_ROUGH_REFRACTION_ENABLED
2022-03-16 05:45:43 -04:00
if (bTileContainsOpaqueRoughRefraction)
2022-03-15 03:11:08 -04:00
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT) + WriteToIndex] = EncodedTile;
2022-03-16 05:45:43 -04:00
}
if(bTileContainsScreenSpaceSubsurfaceScattering && !bTileContainsOpaqueRoughRefraction)
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT_SSS_WITHOUT) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_ROUGH_REFRACT_SSS_WITHOUT) + WriteToIndex] = EncodedTile;
2022-03-15 03:11:08 -04:00
}
2022-03-18 13:43:33 -04:00
#endif
2022-09-20 09:05:17 -04:00
#if PERMUTATION_DECAL
if (bTileContainsDecals)
{
if (bTileContainsComplex)
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_DECAL_COMPLEX) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_DECAL_COMPLEX) + WriteToIndex] = EncodedTile;
2022-09-20 09:05:17 -04:00
}
else if (bTileContainsSingle)
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_DECAL_SINGLE) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_DECAL_SINGLE) + WriteToIndex] = EncodedTile;
2022-09-20 09:05:17 -04:00
}
else // (bTileContainsSimple)
{
uint WriteToIndex;
2023-09-01 15:06:19 -04:00
InterlockedAdd(TileDrawIndirectDataBufferUAV[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(SUBSTRATE_TILE_TYPE_DECAL_SIMPLE) + 1], 1, WriteToIndex);
TileListBufferUAV[GetTileListBufferOffsets(SUBSTRATE_TILE_TYPE_DECAL_SIMPLE) + WriteToIndex] = EncodedTile;
2022-09-20 09:05:17 -04:00
}
}
#endif
2021-11-24 13:13:31 -05:00
}
2022-08-16 04:30:21 -04:00
#if PERMUTATION_CMASK
}
#endif
2021-01-27 08:51:50 -04:00
}
#endif // SHADER_TILE_CATEGORIZATION
////////////////////////////////////////////////////////////////////////////////////////////////////////////
2022-03-21 06:29:05 -04:00
#if SHADER_MATERIAL_TILE_PREPARE_ARGS
2022-03-18 13:43:33 -04:00
Buffer<uint> TileDrawIndirectDataBuffer;
RWBuffer<uint> TileDispatchIndirectDataBuffer;
2022-09-20 09:59:15 -04:00
[numthreads(32, 1, 1)]
2022-03-18 13:43:33 -04:00
void ArgsMainCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
2022-09-20 09:59:15 -04:00
const uint TileType = DispatchThreadId.x;
2023-09-01 15:06:19 -04:00
if (TileType < SUBSTRATE_TILE_TYPE_COUNT)
2022-03-18 13:43:33 -04:00
{
2023-10-12 15:59:22 -04:00
// We could have more than 65k tile in particular with complex multi-layer closure covering full
2023-09-01 15:06:19 -04:00
TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(TileType) + 0] = TileDrawIndirectDataBuffer[GetSubstrateTileTypeDrawIndirectArgOffset_DWord(TileType) + 1];
TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(TileType) + 1] = 1;
TileDispatchIndirectDataBuffer[GetSubstrateTileTypeDispatchIndirectArgOffset_DWord(TileType) + 2] = 1;
2022-03-18 13:43:33 -04:00
}
}
2022-03-21 06:29:05 -04:00
#endif // SHADER_MATERIAL_TILE_PREPARE_ARGS
////////////////////////////////////////////////////////////////////////////////////////////////////////////
2023-10-12 15:59:22 -04:00
#if SHADER_CLOSURE_TILE_PREPARE_ARGS
2022-03-21 06:29:05 -04:00
2022-05-02 12:12:18 -04:00
int2 TileCount_Primary;
2022-03-21 06:29:05 -04:00
Buffer<uint> TileDrawIndirectDataBuffer;
RWBuffer<uint> TileDispatchIndirectDataBuffer;
2022-05-02 12:12:18 -04:00
RWBuffer<uint> TileDispatchPerThreadIndirectDataBuffer;
2022-03-21 06:29:05 -04:00
[numthreads(1, 1, 1)]
void ArgsMainCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
if (all(DispatchThreadId == 0))
{
const uint TileCount = TileDrawIndirectDataBuffer[0].x;
Change how Lumen integration work with Substrate.
This CL changes the tile-overflow linear allocator into a texture array. In theory, this consumes more memory, but since we can predict in advanced how many overflow tiles were needed, we allocated the same amount. So memory-wise there is no differences in practice. This texture array approach simplify the overall handling of multi-BSDF, by removing indirection. This CLs also fixe VR/SplitScreen case with multi-BSDF.
This CL has a few follow up:
* Add debug layering into IndirectDiffuseComposite, for better spotting issue in the future
* Change valid history tracking for Layer>0 in ScreenProbeGather
* Add DownSampleFactor support for Substrate (it wasn't enabled before due to overfloat tile managment complexity, but now it should be trivial).
* Remove overflow tile settings
* Clean evey more logic in temporal/bilateral coordinate computation.
Performance:
* For the legacy case, performance are unchanged on CitySample (ScreenProbeGather ~2.75ms / LumenReflection: ~1.1ms at 1080p). Also measure on PS5 on QAGame. Timings are unchanged.
* For Substrate case, performance for Substrate are improved for multi-layer BSDF. E.g. a fullscreen 3 layers aniso material goes from 15.97ms -> 14.79ms, 1.18ms/7% )
#rb sebastien.hillaire, krzysztof.narkowicz
#jira UE-195651
[CL 28725280 by charles derousiers in ue5-main branch]
2023-10-12 15:55:42 -04:00
const uint DispatchX = min(TileCount, uint(TileCount_Primary.x));
const uint DispatchY = DivideAndRoundUp(TileCount, TileCount_Primary.x);
2022-05-02 12:12:18 -04:00
TileDispatchIndirectDataBuffer[0] = DispatchX;
TileDispatchIndirectDataBuffer[1] = DispatchY;
2022-03-21 06:29:05 -04:00
TileDispatchIndirectDataBuffer[2] = 1;
2022-05-02 12:12:18 -04:00
Change how Lumen integration work with Substrate.
This CL changes the tile-overflow linear allocator into a texture array. In theory, this consumes more memory, but since we can predict in advanced how many overflow tiles were needed, we allocated the same amount. So memory-wise there is no differences in practice. This texture array approach simplify the overall handling of multi-BSDF, by removing indirection. This CLs also fixe VR/SplitScreen case with multi-BSDF.
This CL has a few follow up:
* Add debug layering into IndirectDiffuseComposite, for better spotting issue in the future
* Change valid history tracking for Layer>0 in ScreenProbeGather
* Add DownSampleFactor support for Substrate (it wasn't enabled before due to overfloat tile managment complexity, but now it should be trivial).
* Remove overflow tile settings
* Clean evey more logic in temporal/bilateral coordinate computation.
Performance:
* For the legacy case, performance are unchanged on CitySample (ScreenProbeGather ~2.75ms / LumenReflection: ~1.1ms at 1080p). Also measure on PS5 on QAGame. Timings are unchanged.
* For Substrate case, performance for Substrate are improved for multi-layer BSDF. E.g. a fullscreen 3 layers aniso material goes from 15.97ms -> 14.79ms, 1.18ms/7% )
#rb sebastien.hillaire, krzysztof.narkowicz
#jira UE-195651
[CL 28725280 by charles derousiers in ue5-main branch]
2023-10-12 15:55:42 -04:00
TileDispatchPerThreadIndirectDataBuffer[0] = DivideAndRoundUp(TileCount, SUBSTRATE_TILE_SIZE * SUBSTRATE_TILE_SIZE);
TileDispatchPerThreadIndirectDataBuffer[1] = 1;
2022-05-02 12:12:18 -04:00
TileDispatchPerThreadIndirectDataBuffer[2] = 1;
2022-03-21 06:29:05 -04:00
}
}
2023-10-12 15:59:22 -04:00
#endif // SHADER_CLOSURE_TILE_PREPARE_ARGS
2022-03-18 13:43:33 -04:00
////////////////////////////////////////////////////////////////////////////////////////////////////////////
2023-10-12 15:59:22 -04:00
#if SHADER_CLOSURE_TILE
2022-03-18 13:43:33 -04:00
int2 ViewResolution;
uint MaxBytesPerPixel;
uint TileSizeLog2;
2022-05-02 12:12:18 -04:00
2022-03-18 13:43:33 -04:00
int2 TileCount_Primary;
2023-11-10 19:19:46 -05:00
Texture2D<SUBSTRATE_TOP_LAYER_TYPE> TopLayerTexture;
2022-03-18 13:43:33 -04:00
Texture2DArray<uint> MaterialTextureArray;
Buffer<uint> TileListBuffer;
2023-05-23 07:22:42 -04:00
uint TileListBufferOffset;
2023-12-15 02:18:24 -05:00
uint TileEncoding;
2022-03-18 13:43:33 -04:00
2023-10-12 15:59:22 -04:00
RWTexture2D<uint> RWClosureOffsetTexture;
RWBuffer<uint> RWClosureTileCountBuffer;
RWBuffer<uint> RWClosureTileBuffer;
2022-03-18 13:43:33 -04:00
#if !PERMUTATION_WAVE_OPS
2023-10-12 15:59:22 -04:00
groupshared uint s_TileClosureCount[GROUP_THREAD_COUNT];
2022-03-18 13:43:33 -04:00
#endif
2022-10-24 03:32:31 -04:00
#if PERMUTATION_WAVE_OPS && COMPILER_SUPPORTS_WAVE_SIZE
WAVESIZE(64) // PERMUTATION_WAVE_OPS is true only when wave>=64 are available
#endif
2023-09-01 15:06:19 -04:00
[numthreads(SUBSTRATE_TILE_SIZE, SUBSTRATE_TILE_SIZE, 1)]
2023-10-12 15:59:22 -04:00
void ClosureTileMainCS(uint2 GroupThreadId : SV_GroupThreadID, uint2 GroupId : SV_GroupID, uint LinearIndex : SV_GroupIndex)
2022-03-18 13:43:33 -04:00
{
2023-12-15 02:18:24 -05:00
const uint2 TileCoord = SubstrateUnpackTile(TileListBuffer[TileListBufferOffset + GroupId.x], TileEncoding);
Change how Lumen integration work with Substrate.
This CL changes the tile-overflow linear allocator into a texture array. In theory, this consumes more memory, but since we can predict in advanced how many overflow tiles were needed, we allocated the same amount. So memory-wise there is no differences in practice. This texture array approach simplify the overall handling of multi-BSDF, by removing indirection. This CLs also fixe VR/SplitScreen case with multi-BSDF.
This CL has a few follow up:
* Add debug layering into IndirectDiffuseComposite, for better spotting issue in the future
* Change valid history tracking for Layer>0 in ScreenProbeGather
* Add DownSampleFactor support for Substrate (it wasn't enabled before due to overfloat tile managment complexity, but now it should be trivial).
* Remove overflow tile settings
* Clean evey more logic in temporal/bilateral coordinate computation.
Performance:
* For the legacy case, performance are unchanged on CitySample (ScreenProbeGather ~2.75ms / LumenReflection: ~1.1ms at 1080p). Also measure on PS5 on QAGame. Timings are unchanged.
* For Substrate case, performance for Substrate are improved for multi-layer BSDF. E.g. a fullscreen 3 layers aniso material goes from 15.97ms -> 14.79ms, 1.18ms/7% )
#rb sebastien.hillaire, krzysztof.narkowicz
#jira UE-195651
[CL 28725280 by charles derousiers in ue5-main branch]
2023-10-12 15:55:42 -04:00
uint2 PixelCoord = TileCoord * SUBSTRATE_TILE_SIZE + GroupThreadId;
const bool bIsInViewRect = all(PixelCoord < uint2(View.ViewRectMinAndSize.zw));
PixelCoord += View.ViewRectMinAndSize.xy;
2022-03-18 13:43:33 -04:00
2023-10-12 15:57:47 -04:00
uint ClosureCount = 0;
Change how Lumen integration work with Substrate.
This CL changes the tile-overflow linear allocator into a texture array. In theory, this consumes more memory, but since we can predict in advanced how many overflow tiles were needed, we allocated the same amount. So memory-wise there is no differences in practice. This texture array approach simplify the overall handling of multi-BSDF, by removing indirection. This CLs also fixe VR/SplitScreen case with multi-BSDF.
This CL has a few follow up:
* Add debug layering into IndirectDiffuseComposite, for better spotting issue in the future
* Change valid history tracking for Layer>0 in ScreenProbeGather
* Add DownSampleFactor support for Substrate (it wasn't enabled before due to overfloat tile managment complexity, but now it should be trivial).
* Remove overflow tile settings
* Clean evey more logic in temporal/bilateral coordinate computation.
Performance:
* For the legacy case, performance are unchanged on CitySample (ScreenProbeGather ~2.75ms / LumenReflection: ~1.1ms at 1080p). Also measure on PS5 on QAGame. Timings are unchanged.
* For Substrate case, performance for Substrate are improved for multi-layer BSDF. E.g. a fullscreen 3 layers aniso material goes from 15.97ms -> 14.79ms, 1.18ms/7% )
#rb sebastien.hillaire, krzysztof.narkowicz
#jira UE-195651
[CL 28725280 by charles derousiers in ue5-main branch]
2023-10-12 15:55:42 -04:00
if (bIsInViewRect)
2022-03-18 13:43:33 -04:00
{
2023-09-01 15:06:19 -04:00
FSubstrateAddressing SubstrateAddressing = GetSubstratePixelDataByteOffset(PixelCoord, uint2(View.BufferSizeAndInvSize.xy), MaxBytesPerPixel);
FSubstratePixelHeader SubstratePixelHeader = UnpackSubstrateHeaderIn(MaterialTextureArray, SubstrateAddressing, TopLayerTexture);
2023-10-13 04:02:22 -04:00
ClosureCount = min(SubstratePixelHeader.ClosureCount, SUBSTRATE_MAX_CLOSURE_COUNT_FOR_CLOSUREOFFSET);
2022-03-18 13:43:33 -04:00
2023-10-12 15:57:47 -04:00
if (ClosureCount > 0)
2022-03-18 13:43:33 -04:00
{
2023-10-12 15:59:22 -04:00
FSubstrateClosureOffset Offsets = (FSubstrateClosureOffset)0;
Offsets.ClosureCount = ClosureCount;
2022-03-18 13:43:33 -04:00
2023-10-12 15:57:47 -04:00
UNROLL_N(SUBSTRATE_MAX_CLOSURE_COUNT_FOR_CLOSUREOFFSET)
2023-10-12 15:59:22 -04:00
for (uint ClosureIndex = 0; ClosureIndex < ClosureCount; ++ClosureIndex)
2022-03-18 13:43:33 -04:00
{
2023-10-12 15:59:22 -04:00
Offsets.ClosureOffsets[ClosureIndex] = SubstrateAddressing.CurrentIndex;
2023-09-01 15:06:19 -04:00
UnpackSubstrateBSDFIn(MaterialTextureArray, SubstrateAddressing, SubstratePixelHeader);
2022-03-18 13:43:33 -04:00
}
2023-10-12 15:59:22 -04:00
RWClosureOffsetTexture[PixelCoord] = PackClosureOffset(Offsets);
2022-03-18 13:43:33 -04:00
}
}
#if PERMUTATION_WAVE_OPS
2023-10-12 15:59:22 -04:00
const uint TileClosureCount = WaveActiveMax(ClosureCount);
2022-03-18 13:43:33 -04:00
#else // PERMUTATION_WAVE_OPS
2023-10-12 15:59:22 -04:00
s_TileClosureCount[LinearIndex] = ClosureCount;
2022-03-18 13:43:33 -04:00
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 32)
{
2023-10-12 15:59:22 -04:00
s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 32]);
2022-03-18 13:43:33 -04:00
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 16)
{
2023-10-12 15:59:22 -04:00
s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 16]);
2022-03-18 13:43:33 -04:00
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 8)
{
2023-10-12 15:59:22 -04:00
s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 8]);
2022-03-18 13:43:33 -04:00
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 4)
{
2023-10-12 15:59:22 -04:00
s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 4]);
2022-03-18 13:43:33 -04:00
}
GroupMemoryBarrierWithGroupSync();
if (LinearIndex < 2)
{
2023-10-12 15:59:22 -04:00
s_TileClosureCount[LinearIndex] = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 2]);
2022-03-18 13:43:33 -04:00
}
GroupMemoryBarrierWithGroupSync();
2023-10-12 15:59:22 -04:00
const uint TileClosureCount = max(s_TileClosureCount[LinearIndex], s_TileClosureCount[LinearIndex + 1]);
2022-03-18 13:43:33 -04:00
#endif // PERMUTATION_WAVE_OPS
Change how Lumen integration work with Substrate.
This CL changes the tile-overflow linear allocator into a texture array. In theory, this consumes more memory, but since we can predict in advanced how many overflow tiles were needed, we allocated the same amount. So memory-wise there is no differences in practice. This texture array approach simplify the overall handling of multi-BSDF, by removing indirection. This CLs also fixe VR/SplitScreen case with multi-BSDF.
This CL has a few follow up:
* Add debug layering into IndirectDiffuseComposite, for better spotting issue in the future
* Change valid history tracking for Layer>0 in ScreenProbeGather
* Add DownSampleFactor support for Substrate (it wasn't enabled before due to overfloat tile managment complexity, but now it should be trivial).
* Remove overflow tile settings
* Clean evey more logic in temporal/bilateral coordinate computation.
Performance:
* For the legacy case, performance are unchanged on CitySample (ScreenProbeGather ~2.75ms / LumenReflection: ~1.1ms at 1080p). Also measure on PS5 on QAGame. Timings are unchanged.
* For Substrate case, performance for Substrate are improved for multi-layer BSDF. E.g. a fullscreen 3 layers aniso material goes from 15.97ms -> 14.79ms, 1.18ms/7% )
#rb sebastien.hillaire, krzysztof.narkowicz
#jira UE-195651
[CL 28725280 by charles derousiers in ue5-main branch]
2023-10-12 15:55:42 -04:00
if (LinearIndex == 0)
2022-03-18 13:43:33 -04:00
{
2023-10-12 15:59:22 -04:00
if (TileClosureCount > 1)
2022-03-18 13:43:33 -04:00
{
2023-10-12 15:59:22 -04:00
// Store only tile data for Closure[1..X]. Closure[0] is implicity stored into the first layer
Change how Lumen integration work with Substrate.
This CL changes the tile-overflow linear allocator into a texture array. In theory, this consumes more memory, but since we can predict in advanced how many overflow tiles were needed, we allocated the same amount. So memory-wise there is no differences in practice. This texture array approach simplify the overall handling of multi-BSDF, by removing indirection. This CLs also fixe VR/SplitScreen case with multi-BSDF.
This CL has a few follow up:
* Add debug layering into IndirectDiffuseComposite, for better spotting issue in the future
* Change valid history tracking for Layer>0 in ScreenProbeGather
* Add DownSampleFactor support for Substrate (it wasn't enabled before due to overfloat tile managment complexity, but now it should be trivial).
* Remove overflow tile settings
* Clean evey more logic in temporal/bilateral coordinate computation.
Performance:
* For the legacy case, performance are unchanged on CitySample (ScreenProbeGather ~2.75ms / LumenReflection: ~1.1ms at 1080p). Also measure on PS5 on QAGame. Timings are unchanged.
* For Substrate case, performance for Substrate are improved for multi-layer BSDF. E.g. a fullscreen 3 layers aniso material goes from 15.97ms -> 14.79ms, 1.18ms/7% )
#rb sebastien.hillaire, krzysztof.narkowicz
#jira UE-195651
[CL 28725280 by charles derousiers in ue5-main branch]
2023-10-12 15:55:42 -04:00
uint StoreIndex = 0;
2023-10-12 15:59:22 -04:00
InterlockedAdd(RWClosureTileCountBuffer[0], TileClosureCount - 1, StoreIndex);
2022-03-18 13:43:33 -04:00
2023-10-12 15:59:22 -04:00
FSubstrateClosureTile Tile;
2022-03-18 13:43:33 -04:00
Tile.TileCoord = TileCoord;
2023-10-12 15:59:22 -04:00
Tile.ClosureCount = TileClosureCount;
for (uint ClosureIndex = 1; ClosureIndex < TileClosureCount; ++ClosureIndex)
2022-03-18 13:43:33 -04:00
{
2023-10-12 15:59:22 -04:00
Tile.ClosureIndex = ClosureIndex;
RWClosureTileBuffer[StoreIndex + ClosureIndex - 1] = PackClosureTile(Tile);
2022-03-18 13:43:33 -04:00
}
}
}
}
2023-10-12 15:59:22 -04:00
#endif // SHADER_CLOSURE_TILE