Files
UnrealEngineUWP/Engine/Shaders/Private/DistanceField/GlobalDistanceField.usf
krzysztof narkowicz 367923aa7b GlobalDistanceField - optimizations and refactors
* Decreased Global Distance Field border from full voxel to half voxel and decreased page size from 16 to 8. This removes ~30% of pages, saving memory and improving update speed
* Half texel border requires to compute gradients using 8 page table fetches, but it didn�t influence Lumen tracing performance on console
* Smaller pages put some pressure on culling and other passes. Culling grid was switch from per page to per 4x4x4 pages. Overall full GDF update speed in FortGPUTestBed decreased from 5.89ms to 4.57ms
* Switched page table format to UInt32 in order to support larger Global Distance Field resolutions
* Moved all GlobalDistanceField shaders into /DistanceField/* folder, in future we should move other distance field shaders there
* Moved GlobalDistanceField page stats from r.Lumen.Visualize.Stats in Lumen code to r.GlobalDistanceField.Debug in GlobalDistanceField code
* Fixed Lumen normal visualization when only Global Distance Field tracing is enabled

#preflight 62630427006fa20b683b405a
[FYI] Tiago.Costa, Daniel.Wright

#ROBOMERGE-AUTHOR: krzysztof.narkowicz
#ROBOMERGE-SOURCE: CL 19873116 via CL 19873429 via CL 19874251
#ROBOMERGE-BOT: UE5 (Release-Engine-Staging -> Main) (v940-19807014)

[CL 19878047 by krzysztof narkowicz in ue5-main branch]
2022-04-22 19:55:41 -04:00

823 lines
28 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
#include "../Common.ush"
#include "../DeferredShadingCommon.ush"
#include "../DistanceFieldLightingShared.ush"
#include "../DistanceFieldAOShared.ush"
#include "../MeshDistanceFieldCommon.ush"
#include "GlobalDistanceFieldShared.ush"
#include "GlobalDistanceFieldUtils.ush"
#ifndef CULLOBJECTS_THREADGROUP_SIZE
#define CULLOBJECTS_THREADGROUP_SIZE 1
#endif
#ifndef THREADGROUP_SIZE
#define THREADGROUP_SIZE 1
#endif
#ifndef THREADGROUP_SIZE_X
#define THREADGROUP_SIZE_X 1
#define THREADGROUP_SIZE_Y 1
#define THREADGROUP_SIZE_Z 1
#endif
#define GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X (GLOBAL_DISTANCE_FIELD_PAGE_RESOLUTION_IN_ATLAS / 4)
#define GLOBAL_DISTANCE_FIELD_PAGE_GROUP_NUM (GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X * GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X * GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X)
float ComputeSquaredDistanceBetweenAABBs(float3 CenterA, float3 ExtentA, float3 CenterB, float3 ExtentB)
{
float3 AxisDistances = max(abs(CenterB - CenterA) - (ExtentA + ExtentB), 0);
return dot(AxisDistances, AxisDistances);
}
float InfluenceRadius;
uint ClipmapIndex;
float3 VolumeTexelSize;
uint3 CullGridDimension;
RWStructuredBuffer<uint> RWCullGridAllocator;
RWStructuredBuffer<uint> RWCullGridObjectHeader;
RWStructuredBuffer<uint> RWCullGridObjectArray;
#define CULL_GRID_FACTOR 4
#define GRID_CULL_THREADGROUP_TOTALSIZE 256
uint3 UpdateRegionSize;
StructuredBuffer<uint> CullGridObjectHeader;
StructuredBuffer<uint> CullGridObjectArray;
#ifndef COMPOSITE_THREADGROUP_SIZEX
#define COMPOSITE_THREADGROUP_SIZEX 1
#endif
#ifndef COMPOSITE_THREADGROUP_SIZEY
#define COMPOSITE_THREADGROUP_SIZEY 1
#endif
#ifndef COMPOSITE_THREADGROUP_SIZEZ
#define COMPOSITE_THREADGROUP_SIZEZ 1
#endif
#define COMPOSITE_THREADGORUP_TOTALSIZE (COMPOSITE_THREADGROUP_SIZEX * COMPOSITE_THREADGROUP_SIZEY * COMPOSITE_THREADGROUP_SIZEZ)
RWStructuredBuffer<uint> RWObjectIndexBuffer;
RWStructuredBuffer<uint> RWObjectIndexNumBuffer;
float3 ClipmapWorldCenter;
float3 ClipmapWorldExtent;
float InfluenceRadiusSq;
// 0: accept only mostly static objects, 1: accept only often moving objects, 2: accept all objects
uint AcceptOftenMovingObjectsOnly;
float MeshSDFRadiusThreshold;
[numthreads(CULLOBJECTS_THREADGROUP_SIZE, 1, 1)]
void CullObjectsToClipmapCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ObjectIndex = DispatchThreadId.x;
if (ObjectIndex < NumSceneObjects)
{
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
BRANCH
if ((DFObjectBounds.SphereRadius > MeshSDFRadiusThreshold || DFObjectBounds.bEmissiveLightSource)
&& (AcceptOftenMovingObjectsOnly == 2 || DFObjectBounds.OftenMoving == AcceptOftenMovingObjectsOnly))
{
float DistanceSq = ComputeSquaredDistanceBetweenAABBs(ClipmapWorldCenter, ClipmapWorldExtent, LWCHackToFloat(DFObjectBounds.Center), DFObjectBounds.BoxExtent);
if (DistanceSq < InfluenceRadiusSq)
{
uint DestIndex;
InterlockedAdd(RWObjectIndexNumBuffer[0], 1U, DestIndex);
RWObjectIndexBuffer[DestIndex] = ObjectIndex;
}
}
}
}
RWBuffer<uint> RWPageComposeIndirectArgBuffer;
RWBuffer<uint> RWPageUpdateIndirectArgBuffer;
RWBuffer<uint> RWCullGridUpdateIndirectArgBuffer;
[numthreads(1, 1, 1)]
void ClearIndirectArgBufferCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
if (DispatchThreadId.x == 0)
{
RWPageComposeIndirectArgBuffer[0] = 0;
RWPageComposeIndirectArgBuffer[1] = 1;
RWPageComposeIndirectArgBuffer[2] = 1;
RWPageUpdateIndirectArgBuffer[0] = 0;
RWPageUpdateIndirectArgBuffer[1] = 1;
RWPageUpdateIndirectArgBuffer[2] = 1;
RWCullGridUpdateIndirectArgBuffer[0] = 0;
RWCullGridUpdateIndirectArgBuffer[1] = 1;
RWCullGridUpdateIndirectArgBuffer[2] = 1;
}
}
RWStructuredBuffer<uint> RWPageTileBuffer;
RWBuffer<uint> RWPageIndirectArgBuffer;
RWStructuredBuffer<uint> RWCullGridTileBuffer;
RWBuffer<uint> RWCullGridIndirectArgBuffer;
Buffer<float4> UpdateBoundsBuffer;
uint NumUpdateBounds;
uint3 CullGridResolution;
float3 CullGridCoordToWorldCenterScale;
float3 CullGridCoordToWorldCenterBias;
float3 CullGridTileWorldExtent;
uint3 PageGridResolution;
float3 PageGridCoordToWorldCenterScale;
float3 PageGridCoordToWorldCenterBias;
float3 PageGridTileWorldExtent;
#define MAX_CULLED_TILES 511u
groupshared uint SharedCulledBoundsList[MAX_CULLED_TILES];
groupshared uint SharedNumCulledBounds;
// 4x4x4 bitmask with occupied cells
groupshared uint SharedCullGridMask[2];
void SetSharedCullGridMaskBit(uint3 LocalGridCoord)
{
uint BitIndex = LocalGridCoord.x + (LocalGridCoord.y + LocalGridCoord.z * CULL_GRID_FACTOR) * CULL_GRID_FACTOR;
if (BitIndex >= 32)
{
BitIndex -= 32;
InterlockedOr(SharedCullGridMask[1], 1u << BitIndex);
}
else
{
InterlockedOr(SharedCullGridMask[0], 1u << BitIndex);
}
}
bool IsSharedCullGridMaskBitSet(uint3 LocalGridCoord)
{
uint BitIndex = LocalGridCoord.x + (LocalGridCoord.y + LocalGridCoord.z * CULL_GRID_FACTOR) * CULL_GRID_FACTOR;
if (BitIndex >= 32)
{
BitIndex -= 32;
return (SharedCullGridMask[1] & (1u << BitIndex)) != 0;
}
else
{
return (SharedCullGridMask[0] & (1u << BitIndex)) != 0;
}
}
bool IsAnySharedCullGridMaskBitSet()
{
return SharedCullGridMask[0] != 0 || SharedCullGridMask[1] != 0;
}
/**
* Run two pass culling to find out which cull and page tiles need to be updated.
* First cull into cull tiles, then cull into pages tiles.
* Every cull tile consists out of 4x4x4 (CULL_GRID_FACTOR) page tiles.
*/
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
void BuildGridTilesCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.x + (GroupThreadId.y + GroupThreadId.z * THREADGROUP_SIZE) * THREADGROUP_SIZE;
if (all(GroupThreadId == 0))
{
SharedCullGridMask[0] = 0;
SharedCullGridMask[1] = 0;
SharedNumCulledBounds = 0;
}
GroupMemoryBarrierWithGroupSync();
uint3 CullGridCoord = GroupId.xyz;
// First cull per group
if (all(CullGridCoord < CullGridResolution))
{
float3 CullGridWorldCenter = CullGridCoord * CullGridCoordToWorldCenterScale + CullGridCoordToWorldCenterBias;
for (uint UpdateBoundsIndex = ThreadIndex; UpdateBoundsIndex < NumUpdateBounds; UpdateBoundsIndex += THREADGROUP_SIZE)
{
float3 UpdateBoundsCenter = UpdateBoundsBuffer[UpdateBoundsIndex * 2 + 0].xyz;
float3 UpdateBoundsExtent = UpdateBoundsBuffer[UpdateBoundsIndex * 2 + 1].xyz;
bool bExtendByInflueceRadius = UpdateBoundsBuffer[UpdateBoundsIndex * 2 + 0].w > 0.0f;
float UpdateBoundsInfluenceRadiusSq = bExtendByInflueceRadius ? InfluenceRadiusSq : 0.0f;
float DistanceSq = ComputeSquaredDistanceBetweenAABBs(UpdateBoundsCenter, UpdateBoundsExtent, CullGridWorldCenter, CullGridTileWorldExtent);
if (DistanceSq <= UpdateBoundsInfluenceRadiusSq)
{
uint DestIndex;
InterlockedAdd(SharedNumCulledBounds, 1, DestIndex);
if (DestIndex < MAX_CULLED_TILES)
{
SharedCulledBoundsList[DestIndex] = UpdateBoundsIndex;
}
}
}
}
GroupMemoryBarrierWithGroupSync();
uint3 PageGridCoord = DispatchThreadId.xyz;
uint3 LocalPageGridCoord = GroupThreadId.xyz;
// Then cull per page
if (all(PageGridCoord < PageGridResolution))
{
float3 PageGridWorldCenter = PageGridCoord * PageGridCoordToWorldCenterScale + PageGridCoordToWorldCenterBias;
for (uint IndexInCulledBoundsList = 0; IndexInCulledBoundsList < SharedNumCulledBounds; ++IndexInCulledBoundsList)
{
uint UpdateBoundsIndex = SharedCulledBoundsList[IndexInCulledBoundsList];
float3 UpdateBoundsCenter = UpdateBoundsBuffer[UpdateBoundsIndex * 2 + 0].xyz;
float3 UpdateBoundsExtent = UpdateBoundsBuffer[UpdateBoundsIndex * 2 + 1].xyz;
bool bExtendByInflueceRadius = UpdateBoundsBuffer[UpdateBoundsIndex * 2 + 0].w > 0.0f;
float UpdateBoundsInfluenceRadiusSq = bExtendByInflueceRadius ? InfluenceRadiusSq : 0.0f;
float DistanceSq = ComputeSquaredDistanceBetweenAABBs(UpdateBoundsCenter, UpdateBoundsExtent, PageGridWorldCenter, PageGridTileWorldExtent);
if (DistanceSq <= UpdateBoundsInfluenceRadiusSq)
{
SetSharedCullGridMaskBit(LocalPageGridCoord);
break;
}
}
}
GroupMemoryBarrierWithGroupSync();
if (all(GroupThreadId == 0))
{
if (IsAnySharedCullGridMaskBitSet())
{
uint DestIndex;
InterlockedAdd(RWCullGridIndirectArgBuffer[0], 1, DestIndex);
RWCullGridTileBuffer[DestIndex] = CullGridCoord.x | (CullGridCoord.y << 8) | (CullGridCoord.z << 16);
}
}
if (IsSharedCullGridMaskBitSet(LocalPageGridCoord))
{
uint DestIndex;
InterlockedAdd(RWPageIndirectArgBuffer[0], 1, DestIndex);
RWPageTileBuffer[DestIndex] = PageGridCoord.x | (PageGridCoord.y << 8) | (PageGridCoord.z << 16);
}
}
RWStructuredBuffer<uint> RWPageFreeListBuffer;
RWStructuredBuffer<int> RWPageFreeListAllocatorBuffer;
uint GlobalDistanceFieldMaxPageNum;
[numthreads(THREADGROUP_SIZE, 1, 1)]
void InitPageFreeListCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint FreeListIndex = DispatchThreadId.x;
if (FreeListIndex < GlobalDistanceFieldMaxPageNum)
{
RWPageFreeListBuffer[FreeListIndex] = GlobalDistanceFieldMaxPageNum - 1 - FreeListIndex;
}
if (DispatchThreadId.x == 0)
{
RWPageFreeListAllocatorBuffer[0] = GlobalDistanceFieldMaxPageNum;
}
}
RWTexture3D<uint> RWPageTableCombinedTexture;
RWTexture3D<uint> RWPageTableLayerTexture;
Texture3D<uint> PageTableLayerTexture;
Texture3D<uint> ParentPageTableLayerTexture;
float3 InvPageGridResolution;
uint PageTableClipmapOffsetZ;
float3 PageWorldExtent;
float PageWorldRadius;
float ClipmapInfluenceRadius;
float3 PageCoordToPageWorldCenterScale;
float3 PageCoordToPageWorldCenterBias;
float4 ClipmapVolumeWorldToUVAddAndMul;
RWStructuredBuffer<uint> RWPageComposeTileBuffer;
RWStructuredBuffer<uint> RWPageFreeListReturnBuffer;
RWStructuredBuffer<uint> RWPageFreeListReturnAllocatorBuffer;
StructuredBuffer<uint> MarkedHeightfieldPageBuffer;
StructuredBuffer<uint> PageUpdateTileBuffer;
StructuredBuffer<uint> PageFreeListBuffer;
groupshared float GroupMinPageDistance[64];
uint3 PageGridCoordToPageTableTextureCoord(uint3 PageGridCoord)
{
float3 PageWorldCenter = PageGridCoord * PageCoordToPageWorldCenterScale + PageCoordToPageWorldCenterBias;
float4 WorldToUVAddAndMul = ClipmapVolumeWorldToUVAddAndMul;
float3 ClipmapUV = frac(PageWorldCenter * WorldToUVAddAndMul.www + WorldToUVAddAndMul.xyz);
int3 PageTableTextureCoord = clamp(saturate(ClipmapUV) * PageGridResolution, 0, PageGridResolution - 1) + int3(0, 0, PageTableClipmapOffsetZ);
return PageTableTextureCoord;
}
#ifndef PROCESS_DISTANCE_FIELDS
#define PROCESS_DISTANCE_FIELDS 0
#endif // PROCESS_DISTANCE_FIELDS
[numthreads(THREADGROUP_SIZE_X, 1, 1)]
void AllocatePagesCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.x;
uint IndexInPageTileBuffer = GroupId.x;
uint PackedPageGridCoord = PageUpdateTileBuffer[IndexInPageTileBuffer];
bool bAnyObjectsInPage = false;
#if MARKED_HEIGHTFIELD_PAGE_BUFFER
bAnyObjectsInPage = MarkedHeightfieldPageBuffer[IndexInPageTileBuffer] > 0;
#endif
uint3 PageGridCoord;
PageGridCoord.x = PackedPageGridCoord & 0xFF;
PageGridCoord.y = (PackedPageGridCoord >> 8) & 0xFF;
PageGridCoord.z = (PackedPageGridCoord >> 16) & 0xFF;
uint3 PageTableTextureCoord = PageGridCoordToPageTableTextureCoord(PageGridCoord);
float3 PageWorldCenter = PageGridCoord * PageCoordToPageWorldCenterScale + PageCoordToPageWorldCenterBias;
#if PROCESS_DISTANCE_FIELDS
// Loop through objects to find out if the current page contains any objects and should be allocated
if (!bAnyObjectsInPage)
{
float InfluenceMax = ClipmapInfluenceRadius + PageWorldRadius + GLOBAL_DISTANCE_FIELD_PAGE_BORDER;
float MinDistanceToPageCenter = InfluenceMax;
uint3 CullGridCoord = PageGridCoord / CULL_GRID_FACTOR;
uint CullGridLinearIndex = (CullGridCoord.z * CullGridResolution.y + CullGridCoord.y) * CullGridResolution.x + CullGridCoord.x;
uint CullGridObjectNum = CullGridObjectHeader[CullGridLinearIndex * 2 + 0];
uint CullGridObjectOffset = CullGridObjectHeader[CullGridLinearIndex * 2 + 1];
for (uint IndexInCullCell = ThreadIndex; IndexInCullCell < CullGridObjectNum; IndexInCullCell += 64)
{
uint ObjectIndex = CullGridObjectArray[CullGridObjectOffset + IndexInCullCell];
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
float SquaredDistanceBetweenAABBs = ComputeSquaredDistanceBetweenAABBs(PageWorldCenter, PageWorldExtent, LWCHackToFloat(DFObjectBounds.Center), DFObjectBounds.BoxExtent);
if (SquaredDistanceBetweenAABBs < ClipmapInfluenceRadius * ClipmapInfluenceRadius)
{
float DistanceToPageCenter = DistanceToNearestSurfaceForObject(ObjectIndex, PageWorldCenter, InfluenceMax);
MinDistanceToPageCenter = min(MinDistanceToPageCenter, DistanceToPageCenter);
}
}
GroupMinPageDistance[ThreadIndex] = MinDistanceToPageCenter;
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < 32)
{
GroupMinPageDistance[ThreadIndex] = min(GroupMinPageDistance[ThreadIndex], GroupMinPageDistance[ThreadIndex + 32]);
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < 16)
{
GroupMinPageDistance[ThreadIndex] = min(GroupMinPageDistance[ThreadIndex], GroupMinPageDistance[ThreadIndex + 16]);
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < 8)
{
GroupMinPageDistance[ThreadIndex] = min(GroupMinPageDistance[ThreadIndex], GroupMinPageDistance[ThreadIndex + 8]);
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < 4)
{
GroupMinPageDistance[ThreadIndex] = min(GroupMinPageDistance[ThreadIndex], GroupMinPageDistance[ThreadIndex + 4]);
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < 2)
{
GroupMinPageDistance[ThreadIndex] = min(GroupMinPageDistance[ThreadIndex], GroupMinPageDistance[ThreadIndex + 2]);
}
GroupMemoryBarrierWithGroupSync();
#endif // PROCESS_DISTANCE_FIELDS
if (ThreadIndex == 0)
{
#if PROCESS_DISTANCE_FIELDS
float MinPageDistance = min(GroupMinPageDistance[0], GroupMinPageDistance[1]);
if (MinPageDistance + PageWorldRadius >= -ClipmapInfluenceRadius && MinPageDistance - PageWorldRadius <= ClipmapInfluenceRadius)
{
bAnyObjectsInPage = true;
}
#endif // PROCESS_DISTANCE_FIELDS
uint PageId = RWPageTableLayerTexture[PageTableTextureCoord];
if (bAnyObjectsInPage)
{
// Allocate new page if required
if (PageId == GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID)
{
int FreeListSize = 0;
InterlockedAdd(RWPageFreeListAllocatorBuffer[0], -1, FreeListSize);
uint AllocatedPageId = GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID;
if (FreeListSize > 0)
{
// Grab last element
AllocatedPageId = PageFreeListBuffer[FreeListSize - 1];
}
if (AllocatedPageId != GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID)
{
RWPageTableLayerTexture[PageTableTextureCoord] = AllocatedPageId;
PageId = AllocatedPageId;
}
}
}
else
{
if (PageId != GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID)
{
// Delete current page and return it to the free list
// Return to the free list
uint FreeListReturnIndex = 0;
InterlockedAdd(RWPageFreeListReturnAllocatorBuffer[0], 1, FreeListReturnIndex);
RWPageFreeListReturnBuffer[FreeListReturnIndex] = PageId;
PageId = GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID;
RWPageTableLayerTexture[PageTableTextureCoord] = GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID;
}
}
// Update combined (static and movable) page table
uint CombinedPageId = PageId;
#if COMPOSE_PARENT_DISTANCE_FIELD
{
if (CombinedPageId == GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID)
{
CombinedPageId = ParentPageTableLayerTexture.Load(int4(PageTableTextureCoord, 0));
}
}
RWPageTableCombinedTexture[PageTableTextureCoord] = CombinedPageId;
#endif
// Add new dispatch tile for this page if compose is required for it
if (PageId != GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID)
{
uint DestIndex;
InterlockedAdd(RWPageComposeIndirectArgBuffer[0], GLOBAL_DISTANCE_FIELD_PAGE_GROUP_NUM, DestIndex);
RWPageComposeTileBuffer[DestIndex / GLOBAL_DISTANCE_FIELD_PAGE_GROUP_NUM] = PageGridCoord.x | (PageGridCoord.y << 8) | (PageGridCoord.z << 16);
}
}
}
StructuredBuffer<uint> PageFreeListReturnAllocatorBuffer;
StructuredBuffer<uint> PageFreeListReturnBuffer;
RWBuffer<uint> RWFreeListReturnIndirectArgBuffer;
[numthreads(1, 1, 1)]
void PageFreeListReturnIndirectArgBufferCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
if (DispatchThreadId.x == 0)
{
RWFreeListReturnIndirectArgBuffer[0] = (PageFreeListReturnAllocatorBuffer[0] + 63) / 64;
RWFreeListReturnIndirectArgBuffer[1] = 1;
RWFreeListReturnIndirectArgBuffer[2] = 1;
// Clamp to 0
RWPageFreeListAllocatorBuffer[0] = max(RWPageFreeListAllocatorBuffer[0], 0);
}
}
[numthreads(THREADGROUP_SIZE_X, 1, 1)]
void PageFreeListReturnCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint IndexInReturnList = DispatchThreadId.x;
if (IndexInReturnList < PageFreeListReturnAllocatorBuffer[0])
{
uint FreeListLastElementIndex = 0;
InterlockedAdd(RWPageFreeListAllocatorBuffer[0], 1, FreeListLastElementIndex);
RWPageFreeListBuffer[FreeListLastElementIndex] = PageFreeListReturnBuffer[IndexInReturnList];
}
}
groupshared uint NumObjectsGridCell;
groupshared uint GridCellStartOffset;
StructuredBuffer<uint> ObjectIndexBuffer;
StructuredBuffer<uint> ObjectIndexNumBuffer;
StructuredBuffer<uint> CullGridTileBuffer;
float3 CullTileWorldExtent;
[numthreads(GRID_CULL_THREADGROUP_TOTALSIZE, 1, 1)]
void CullObjectsToGridCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.x;
uint PackedCullGridTile = CullGridTileBuffer[GroupId.x];
uint3 CullGridCoord;
CullGridCoord.x = PackedCullGridTile & 0xFF;
CullGridCoord.y = (PackedCullGridTile >> 8) & 0xFF;
CullGridCoord.z = (PackedCullGridTile >> 16) & 0xFF;
uint CullGridLinearIndex = (CullGridCoord.z * CullGridResolution.y + CullGridCoord.y) * CullGridResolution.x + CullGridCoord.x;
if (ThreadIndex == 0)
{
NumObjectsGridCell = 0;
}
GroupMemoryBarrierWithGroupSync();
float3 CullTileWorldCenter = CullGridCoord * CullGridCoordToWorldCenterScale + CullGridCoordToWorldCenterBias;
if (all(CullGridCoord < CullGridResolution))
{
for (uint IndexInObjectIndexBuffer = ThreadIndex; IndexInObjectIndexBuffer < ObjectIndexNumBuffer[0]; IndexInObjectIndexBuffer += GRID_CULL_THREADGROUP_TOTALSIZE)
{
uint ObjectIndex = ObjectIndexBuffer[IndexInObjectIndexBuffer];
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
float DistanceSq = ComputeSquaredDistanceBetweenAABBs(CullTileWorldCenter, CullTileWorldExtent, LWCHackToFloat(DFObjectBounds.Center), DFObjectBounds.BoxExtent);
if (DistanceSq <= InfluenceRadiusSq)
{
InterlockedAdd(NumObjectsGridCell, 1U);
}
}
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex == 0)
{
// Allocate space in the array for our objects
InterlockedAdd(RWCullGridAllocator[0], NumObjectsGridCell, GridCellStartOffset);
RWCullGridObjectHeader[CullGridLinearIndex * 2 + 0] = NumObjectsGridCell;
RWCullGridObjectHeader[CullGridLinearIndex * 2 + 1] = GridCellStartOffset;
NumObjectsGridCell = 0;
}
GroupMemoryBarrierWithGroupSync();
if (all(CullGridCoord < CullGridResolution))
{
for (uint IndexInObjectIndexBuffer = ThreadIndex; IndexInObjectIndexBuffer < ObjectIndexNumBuffer[0]; IndexInObjectIndexBuffer += GRID_CULL_THREADGROUP_TOTALSIZE)
{
uint ObjectIndex = ObjectIndexBuffer[IndexInObjectIndexBuffer];
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
float DistanceSq = ComputeSquaredDistanceBetweenAABBs(CullTileWorldCenter, CullTileWorldExtent, LWCHackToFloat(DFObjectBounds.Center), DFObjectBounds.BoxExtent);
if (DistanceSq <= InfluenceRadiusSq)
{
uint DestIndex;
InterlockedAdd(NumObjectsGridCell, 1U, DestIndex);
// Write the intersecting object index into the array
RWCullGridObjectArray[GridCellStartOffset + DestIndex] = ObjectIndex;
}
}
}
}
// Tweaked for the LDS size / occupancy
#define MAX_CULLED_DF_OBJECTS 511u
groupshared uint SharedCulledObjectList[MAX_CULLED_DF_OBJECTS];
groupshared uint NumTileCulledObjects;
#define USE_CULL_GRID 1
RWTexture3D<UNORM float> RWPageAtlasTexture;
RWTexture3D<UNORM float> RWCoverageAtlasTexture;
StructuredBuffer<uint> PageTileBuffer;
StructuredBuffer<uint> ComposeTileBuffer;
uint3 ClipmapResolution;
float3 ComposeTileWorldExtent;
float3 ClipmapMinBounds;
float ClipmapVoxelExtent;
float3 PageCoordToVoxelCenterScale;
float3 PageCoordToVoxelCenterBias;
void CompositeMeshSDF(uint ObjectIndex, float3 VoxelWorldCenter, inout float MinDistance, inout bool bVoxelHasTwoSidedMeshes, inout bool bVoxelHasOneSidedMeshes)
{
float MaxEarlyOutDistance = 0.0f;
#if COMPOSITE_COVERAGE_ATLAS
// Only allow skipping the SDF sample if we are beyond the range that is needed for COMPOSITE_COVERAGE_ATLAS
MaxEarlyOutDistance = ClipmapVoxelExtent * max(TWO_SIDED_BAND_SIZE, DEFAULT_BAND_SIZE);
#endif
float MaxQueryDistance = min(InfluenceRadius, max(MinDistance, MaxEarlyOutDistance));
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
float DistanceToOccluder = DistanceToNearestSurfaceForObject(DFObjectData, VoxelWorldCenter, MaxQueryDistance);
MinDistance = min(MinDistance, DistanceToOccluder);
#if COMPOSITE_COVERAGE_ATLAS
if (DFObjectData.bMostlyTwoSided && abs(DistanceToOccluder) < ClipmapVoxelExtent * TWO_SIDED_BAND_SIZE)
{
bVoxelHasTwoSidedMeshes = true;
}
else if (!DFObjectData.bMostlyTwoSided && abs(DistanceToOccluder) < ClipmapVoxelExtent * DEFAULT_BAND_SIZE)
{
bVoxelHasOneSidedMeshes = true;
}
#endif
}
[numthreads(COMPOSITE_THREADGROUP_SIZEX, COMPOSITE_THREADGROUP_SIZEY, COMPOSITE_THREADGROUP_SIZEZ)]
void ComposeObjectsIntoPagesCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = (GroupThreadId.z * COMPOSITE_THREADGROUP_SIZEY + GroupThreadId.y) * COMPOSITE_THREADGROUP_SIZEX + GroupThreadId.x;
uint PackedPageTile = ComposeTileBuffer[GroupId.x / GLOBAL_DISTANCE_FIELD_PAGE_GROUP_NUM];
uint LinearPageGroupOffset = GroupId.x % GLOBAL_DISTANCE_FIELD_PAGE_GROUP_NUM;
uint3 PageGroupOffset;
PageGroupOffset.x = LinearPageGroupOffset % GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X;
PageGroupOffset.y = (LinearPageGroupOffset / GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X) % GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X;
PageGroupOffset.z = (LinearPageGroupOffset / GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X) / GLOBAL_DISTANCE_FIELD_PAGE_GROUP_X;
uint3 PageGridCoord;
PageGridCoord.x = PackedPageTile & 0xFF;
PageGridCoord.y = (PackedPageTile >> 8) & 0xFF;
PageGridCoord.z = (PackedPageTile >> 16) & 0xFF;
uint3 TexelCoordInPage = PageGroupOffset * 4 + GroupThreadId;
float3 PageCoord = PageGridCoord * GLOBAL_DISTANCE_FIELD_PAGE_RESOLUTION + TexelCoordInPage - GLOBAL_DISTANCE_FIELD_PAGE_BORDER;
float3 ComposeTileCenterPageCoord = PageGridCoord * GLOBAL_DISTANCE_FIELD_PAGE_RESOLUTION + PageGroupOffset * 4 + 1.5f - GLOBAL_DISTANCE_FIELD_PAGE_BORDER;
uint3 CullGridCoord = PageGridCoord / CULL_GRID_FACTOR;
float3 VoxelWorldCenter = PageCoord * PageCoordToVoxelCenterScale + PageCoordToVoxelCenterBias;
float3 ComposeTileWorldCenter = ComposeTileCenterPageCoord * PageCoordToVoxelCenterScale + PageCoordToVoxelCenterBias;
uint3 PageTableTextureCoord = PageGridCoordToPageTableTextureCoord(PageGridCoord);
uint PageId = PageTableLayerTexture.Load(int4(PageTableTextureCoord, 0));
uint3 PageAtlasCoord = GlobalDistanceFieldPageLinearIndexToPageAtlasOffset(PageId) * GLOBAL_DISTANCE_FIELD_PAGE_RESOLUTION_IN_ATLAS + TexelCoordInPage;
uint CullGridLinearIndex = (CullGridCoord.z * CullGridResolution.y + CullGridCoord.y) * CullGridResolution.x + CullGridCoord.x;
uint CullGridObjectNum = CullGridObjectHeader[CullGridLinearIndex * 2 + 0];
uint CullGridObjectOffset = CullGridObjectHeader[CullGridLinearIndex * 2 + 1];
float MinDistance = InfluenceRadius;
bool bVoxelHasTwoSidedMeshes = false;
bool bVoxelHasOneSidedMeshes = false;
#if PROCESS_DISTANCE_FIELDS
#undef USE_OBJECT_COMPOSITING_TILE_CULLING
#define USE_OBJECT_COMPOSITING_TILE_CULLING 1
#if USE_OBJECT_COMPOSITING_TILE_CULLING
uint NumPasses = (CullGridObjectNum + MAX_CULLED_DF_OBJECTS - 1) / MAX_CULLED_DF_OBJECTS;
for (uint PassIndex = 0; PassIndex < NumPasses; PassIndex++)
{
uint PassStartObject = PassIndex * MAX_CULLED_DF_OBJECTS;
uint PassNumObjects = PassStartObject + min(CullGridObjectNum - PassStartObject, MAX_CULLED_DF_OBJECTS);
if (all(GroupThreadId == 0))
{
NumTileCulledObjects = 0;
}
GroupMemoryBarrierWithGroupSync();
for (uint IndexInCullCell = PassStartObject + ThreadIndex; IndexInCullCell < PassNumObjects; IndexInCullCell += COMPOSITE_THREADGORUP_TOTALSIZE)
{
uint ObjectIndex = CullGridObjectArray[CullGridObjectOffset + IndexInCullCell];
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
float DistanceSq = ComputeSquaredDistanceBetweenAABBs(ComposeTileWorldCenter, ComposeTileWorldExtent, LWCHackToFloat(DFObjectBounds.Center), DFObjectBounds.BoxExtent);
if (DistanceSq < InfluenceRadiusSq)
{
uint DestIndex;
InterlockedAdd(NumTileCulledObjects, 1U, DestIndex);
SharedCulledObjectList[DestIndex] = ObjectIndex;
}
}
GroupMemoryBarrierWithGroupSync();
uint LocalNumTileCulledObjects = NumTileCulledObjects;
LOOP
for (uint ListObjectIndex = 0; ListObjectIndex < LocalNumTileCulledObjects; ListObjectIndex++)
{
uint ObjectIndex = SharedCulledObjectList[ListObjectIndex];
CompositeMeshSDF(ObjectIndex, VoxelWorldCenter, MinDistance, bVoxelHasTwoSidedMeshes, bVoxelHasOneSidedMeshes);
}
}
#else
for (uint IndexInCullGrid = 0; IndexInCullGrid < CullGridObjectNum; ++IndexInCullGrid)
{
uint ObjectIndex = CullGridObjectArray[CullGridObjectOffset + IndexInCullGrid];
CompositeMeshSDF(ObjectIndex, VoxelWorldCenter, MinDistance, bVoxelHasTwoSidedMeshes, bVoxelHasOneSidedMeshes);
}
#endif
#endif // PROCESS_DISTANCE_FIELDS
if (all(TexelCoordInPage < GLOBAL_DISTANCE_FIELD_PAGE_RESOLUTION_IN_ATLAS))
{
uint3 CoverageTexelCoordInPage = TexelCoordInPage / GLOBAL_DISTANCE_FIELD_COVERAGE_DOWNSAMPLE_FACTOR;
bool bThreadWritesCoverage = all(and(
TexelCoordInPage % GLOBAL_DISTANCE_FIELD_COVERAGE_DOWNSAMPLE_FACTOR == 0,
CoverageTexelCoordInPage < GLOBAL_DISTANCE_FIELD_COVERAGE_PAGE_RESOLUTION_IN_ATLAS));
// Conservatively set uncovered only if there are no one sided meshes, and we're not in empty space
float Coverage = bVoxelHasTwoSidedMeshes && !bVoxelHasOneSidedMeshes ? 0.0f : 1.0f;
#if COMPOSE_PARENT_DISTANCE_FIELD
{
uint ParentPageId = ParentPageTableLayerTexture.Load(int4(PageTableTextureCoord, 0));
if (ParentPageId != GLOBAL_DISTANCE_FIELD_INVALID_PAGE_ID)
{
uint3 ParentPageAtlasOffset = GlobalDistanceFieldPageLinearIndexToPageAtlasOffset(ParentPageId);
uint3 ParentPageAtlasCoord = ParentPageAtlasOffset * GLOBAL_DISTANCE_FIELD_PAGE_RESOLUTION_IN_ATLAS + TexelCoordInPage;
float ParentDistanceField = DecodeGlobalDistanceFieldPageDistance(RWPageAtlasTexture[ParentPageAtlasCoord].x, InfluenceRadius);
MinDistance = min(MinDistance, ParentDistanceField);
#if COMPOSITE_COVERAGE_ATLAS
if (bThreadWritesCoverage)
{
uint3 ParentCoverageAtlasCoord = ParentPageAtlasOffset * GLOBAL_DISTANCE_FIELD_COVERAGE_PAGE_RESOLUTION_IN_ATLAS + CoverageTexelCoordInPage;
Coverage = max(Coverage, RWCoverageAtlasTexture[ParentCoverageAtlasCoord]);
}
#endif
}
}
#endif
RWPageAtlasTexture[PageAtlasCoord] = EncodeGlobalDistanceFieldPageDistance(MinDistance, InfluenceRadius);
#if COMPOSITE_COVERAGE_ATLAS
if (bThreadWritesCoverage)
{
uint3 CoverageAtlasCoord = GlobalDistanceFieldPageLinearIndexToPageAtlasOffset(PageId) * GLOBAL_DISTANCE_FIELD_COVERAGE_PAGE_RESOLUTION_IN_ATLAS + CoverageTexelCoordInPage;
RWCoverageAtlasTexture[CoverageAtlasCoord] = Coverage;
}
#endif
}
}