You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
This avoids issues with simple lights getting sorted to the front of the list (required for some other passes) and wasting slots, and makes the allocation more robust against light ordering Update clustered shading path to properly sample the new packed mask Separate out VSM directional/local light sampling functions so that callers who know what the light type is (ex. anything light grid for local lights) do not need to include the other code paths in the shader #rb ola.olsson [CL 28494774 by andrew lauritzen in ue5-main branch]
1145 lines
43 KiB
Plaintext
1145 lines
43 KiB
Plaintext
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
VirtualShadowMapPhysicalPageManagement.usf:
|
|
=============================================================================*/
|
|
|
|
#include "../Common.ush"
|
|
#include "../WaveOpUtil.ush"
|
|
#include "../ReductionCommon.ush"
|
|
#include "../GPUMessaging.ush"
|
|
#include "../ShaderPrint.ush"
|
|
#include "VirtualShadowMapProjectionStructs.ush"
|
|
#include "VirtualShadowMapProjectionCommon.ush"
|
|
#include "VirtualShadowMapPageAccessCommon.ush"
|
|
#include "VirtualShadowMapStats.ush"
|
|
|
|
#ifndef HAS_CACHE_DATA
|
|
#define HAS_CACHE_DATA 1
|
|
#endif //HAS_CACHE_DATA
|
|
|
|
// Page flags generated by page allocation to indicate state to rendering passes (i.e., present / invalid)
|
|
StructuredBuffer<uint> PageRequestFlags;
|
|
RWStructuredBuffer<uint> OutPageFlags;
|
|
RWStructuredBuffer<uint> OutPageTable;
|
|
RWStructuredBuffer<FPhysicalPageMetaData> OutPhysicalPageMetaData;
|
|
|
|
// A series of lists used to track various page states (free, used)
|
|
// Each list is MaxPhysicalPages + 1 uint counter
|
|
RWStructuredBuffer<int> OutPhysicalPageLists;
|
|
|
|
// Stores available pages (i.e. ones not used this frame) for allocation in LRU order
|
|
#define PHYSICAL_PAGE_LIST_LRU 0
|
|
// Packed available list
|
|
// Pages invalidated this frame will be added to the end. Allocations come from the end.
|
|
#define PHYSICAL_PAGE_LIST_AVAILABLE 1
|
|
// Stores invalidated/empty pages temporarily before they are re-added to the AVAILABLE list
|
|
#define PHYSICAL_PAGE_LIST_EMPTY 2
|
|
// Stores pages requested/used this frame, not available for allocation
|
|
#define PHYSICAL_PAGE_LIST_REQUESTED 3
|
|
// Number of page lists
|
|
#define PHYSICAL_PAGE_LIST_COUNT 4
|
|
|
|
int GetPhysicalPageListStart(int PageList)
|
|
{
|
|
return PageList * (VirtualShadowMap.MaxPhysicalPages + 1);
|
|
}
|
|
|
|
int GetPhysicalPageListItem(uint PageList, int Index)
|
|
{
|
|
return OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + Index];
|
|
}
|
|
|
|
void SetPhysicalPageListItem(uint PageList, int Index, int Value)
|
|
{
|
|
//check(Index < VirtualShadowMap.MaxPhysicalPages);
|
|
OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + Index] = Value;
|
|
}
|
|
|
|
int GetPhysicalPageListCount(int PageList)
|
|
{
|
|
return OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + VirtualShadowMap.MaxPhysicalPages];
|
|
}
|
|
|
|
void SetPhysicalPageListCount(int PageList, int NewCount)
|
|
{
|
|
OutPhysicalPageLists[GetPhysicalPageListStart(PageList) + VirtualShadowMap.MaxPhysicalPages] = NewCount;
|
|
}
|
|
|
|
bool PushPhysicalPageList(uint PageList, int PhysicalPageIndex)
|
|
{
|
|
uint PageListStart = GetPhysicalPageListStart(PageList);
|
|
// NOTE: Counter is the final element of the list
|
|
int Offset = 0;
|
|
WaveInterlockedAddScalar_(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], 1, Offset);
|
|
// We have to guard against overflow as it will overwrite the counter and potentially into other lists
|
|
if (Offset < VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
OutPhysicalPageLists[PageListStart + Offset] = PhysicalPageIndex;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Returns <0 if none available, otherwise returns the actual value
|
|
int PopPhysicalPageList(uint PageList)
|
|
{
|
|
uint PageListStart = GetPhysicalPageListStart(PageList);
|
|
int Offset = 0;
|
|
|
|
#if 1
|
|
WaveInterlockedAddScalar_(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], -1, Offset);
|
|
#else
|
|
// Need negative numbers here...
|
|
InterlockedAdd(OutPhysicalPageLists[PageListStart + VirtualShadowMap.MaxPhysicalPages], -1, Offset);
|
|
#endif
|
|
|
|
// We want the value *after* decrement in this case
|
|
--Offset;
|
|
return Offset < 0 ? INDEX_NONE : OutPhysicalPageLists[PageListStart + Offset];
|
|
}
|
|
|
|
StructuredBuffer<int> PrevPhysicalPageLists;
|
|
|
|
RWStructuredBuffer<uint4> OutPageRectBounds;
|
|
|
|
// This is admitadly a weird fusion of several initializations but it is the first thing
|
|
// we run in a given analysis phase so it's more efficient to do it all here rather than
|
|
// have several small passes later.
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void InitPageRectBounds(uint3 Index : SV_DispatchThreadID)
|
|
{
|
|
if (Index.x < VSM_MAX_MIP_LEVELS * VirtualShadowMap.NumShadowMapSlots)
|
|
{
|
|
OutPageRectBounds[Index.x] = uint4(VSM_LEVEL0_DIM_PAGES_XY, VSM_LEVEL0_DIM_PAGES_XY, 0, 0);
|
|
}
|
|
|
|
// Clear the various list counters
|
|
if (Index.x == 0)
|
|
{
|
|
// Thiese lists are going to start "full" before packing
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_LRU, VirtualShadowMap.MaxPhysicalPages);
|
|
// These start empty and are added to as elements are removed from the LRU one
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE, 0);
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_EMPTY, 0);
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_REQUESTED, 0);
|
|
}
|
|
}
|
|
|
|
|
|
// Mapping of previous frame/update data to current frame
|
|
StructuredBuffer<FNextVirtualShadowMapData> NextVirtualShadowMapData;
|
|
uint NextVirtualShadowMapDataCount;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void UpdatePhysicalPageAddresses(uint3 Index : SV_DispatchThreadID)
|
|
{
|
|
// TODO: Make this a loose constant probably to remove dependency of this shader on VSM UB
|
|
// Still needs the VSM defines for IsVirtualShadowMapPageAddressValid addressing math though!
|
|
if (Index.x >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Use identity mapping by default
|
|
int PhysicalPageIndex = Index.x;
|
|
checkStructuredBufferAccessSlow(OutPhysicalPageMetaData, PhysicalPageIndex);
|
|
|
|
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
|
|
|
|
bool bKeepPage = false;
|
|
if (PrevMetaData.Flags != 0)
|
|
{
|
|
// Update virtual shadow map ID to the equivalent one this frame if present
|
|
// NOTE: We need a range check as we only add elements to this mapping if they exist this frame
|
|
int PrevVirtualShadowMapId = PrevMetaData.VirtualShadowMapId;
|
|
if (PrevVirtualShadowMapId != INDEX_NONE && PrevVirtualShadowMapId < NextVirtualShadowMapDataCount)
|
|
{
|
|
FNextVirtualShadowMapData NextData = NextVirtualShadowMapData[PrevVirtualShadowMapId];
|
|
int VirtualShadowMapId = NextData.NextVirtualShadowMapId;
|
|
|
|
// Check if it maps to a valid virtual shadow map this frame
|
|
if (VirtualShadowMapId != INDEX_NONE)
|
|
{
|
|
// Clipmap panning; zeroed otherwise so safe
|
|
int2 TestPageAddress = int2(PrevMetaData.PageAddress) + NextData.PageAddressOffset;
|
|
if (IsVirtualShadowMapPageAddressValid(TestPageAddress, PrevMetaData.MipLevel))
|
|
{
|
|
// Valid physical page in the cache!
|
|
// It may still be invalidated by flags or over-written by new requests this frame, but for now we will maintain it
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapId;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = uint2(TestPageAddress);
|
|
// No changes to other fields
|
|
bKeepPage = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!bKeepPage)
|
|
{
|
|
// Only need to zero out flags for it to be considered invalid
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = 0;
|
|
}
|
|
}
|
|
|
|
|
|
int bDynamicPageInvalidation;
|
|
int bAllocateViaLRU;
|
|
int MaxPageAgeSinceLastRequest;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void UpdatePhysicalPages(uint3 Index : SV_DispatchThreadID)
|
|
{
|
|
// Because of launch size rounding we might get here.
|
|
if (Index.x >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// This is the index in the PhysicalPageList
|
|
const uint PhysicalPageListIndex = Index.x;
|
|
bool bRemovedPageFromList = false;
|
|
|
|
// Use identity mapping by default
|
|
int PhysicalPageIndex = PhysicalPageListIndex;
|
|
|
|
#if HAS_CACHE_DATA
|
|
if (bAllocateViaLRU)
|
|
{
|
|
// If available, use last frame's LRU ordering as the input here so we can maintain that order
|
|
// NOTE: These end up sorted into the PHYSICAL_PAGE_LIST_REQUESTED list at the end of the frame
|
|
// LastFrameLRUList[PhysicalPageListIndex]
|
|
const int PrevPageListStart = GetPhysicalPageListStart(PHYSICAL_PAGE_LIST_REQUESTED);
|
|
PhysicalPageIndex = PrevPhysicalPageLists[PrevPageListStart + PhysicalPageListIndex];
|
|
checkSlow(PhysicalPageIndex >= INDEX_NONE);
|
|
checkSlow(PhysicalPageIndex < VirtualShadowMap.MaxPhysicalPages);
|
|
}
|
|
#endif
|
|
|
|
checkStructuredBufferAccessSlow(OutPhysicalPageMetaData, PhysicalPageIndex);
|
|
|
|
// 1:1 read modify write is safe
|
|
uint NextPhysicalFlags = 0;
|
|
|
|
#if HAS_CACHE_DATA
|
|
{
|
|
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
|
|
uint MipLevel = PrevMetaData.MipLevel;
|
|
|
|
if (PrevMetaData.Flags != 0)
|
|
{
|
|
// Convenience
|
|
const int VirtualShadowMapId = PrevMetaData.VirtualShadowMapId;
|
|
const uint2 PageAddress = PrevMetaData.PageAddress;
|
|
|
|
// Look up the request flags for this frame to see if this page was requested again
|
|
const uint GlobalPageOffset = CalcPageOffset(VirtualShadowMapId, MipLevel, PageAddress);
|
|
const uint RequestFlags = PageRequestFlags[GlobalPageOffset];
|
|
const bool bRequestedThisFrame = RequestFlags != 0;
|
|
|
|
const int PhysicalPageRequestedAge = int(VirtualShadowMap.SceneFrameNumber - PrevMetaData.LastRequestedSceneFrameNumber);
|
|
|
|
// If the light is unreferenced we also allow its pages to live (unless reallocated) regardless of age for now
|
|
// since we won't be rendering into them so they don't do a lot of harm being present.
|
|
const FVirtualShadowMapProjectionShaderData Projection = GetVirtualShadowMapProjectionData(VirtualShadowMapId);
|
|
if (bRequestedThisFrame || Projection.bUnreferenced || PhysicalPageRequestedAge <= MaxPageAgeSinceLastRequest)
|
|
{
|
|
const uint PrevPhysicalFlags = PrevMetaData.Flags;
|
|
|
|
// Update the mapping data for any valid cached pages so we don't lose it
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapId;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = PageAddress;
|
|
|
|
// Map the page to the physical page
|
|
// Note that we do this even if it was invalidated though as we might as well reuse the same slot and clear/overwrite it this frame.
|
|
// We also do this even if the page was not requested this frame because we still need the mapping for invalidations to be triggered, etc.
|
|
// If we later allocate over top of this page (for one requested this frame), we will zero this out again. See AllocateNewPageMappings
|
|
checkStructuredBufferAccessSlow(OutPageTable, GlobalPageOffset);
|
|
checkStructuredBufferAccessSlow(OutPageFlags, GlobalPageOffset);
|
|
OutPageTable[GlobalPageOffset] = ShadowEncodePageTable(VSMPhysicalIndexToPageAddress(PhysicalPageIndex));
|
|
// NOTE: PageFlags get written below
|
|
|
|
if (Projection.bUnreferenced)
|
|
{
|
|
// If the light is unrefereced (i.e. we are not going to render to it this frame) we want to leave the physical
|
|
// metadata alone, *specifically* the invalidation flags. Since an unreferenced shadow map will not get
|
|
// rendered to this frame, we can't clear these flags and instead want to maintain them until a potential
|
|
// future frame when this light might be referenced again.
|
|
// Tag the page so we can skip it in rendering-related tasks like clearing and merging
|
|
NextPhysicalFlags = PrevPhysicalFlags | VSM_PHYSICAL_FLAG_UNREFERENCED;
|
|
|
|
// NOTE: This is unused during this render, but may be used by invalidation between frames/renders
|
|
OutPageFlags[GlobalPageOffset] = PrevPhysicalFlags & VSM_PAGE_FLAGS_BITS_MASK;
|
|
}
|
|
else
|
|
{
|
|
uint NextPageFlags = VSM_ALLOCATED_FLAG;
|
|
|
|
// Distant lights block GPU-GPU invalidations (and all others) as they are round-robin invalidated
|
|
if (bDynamicPageInvalidation && !IsSinglePageVirtualShadowMap(VirtualShadowMapId))
|
|
{
|
|
uint InvalidationFlags = (PrevPhysicalFlags >> VSM_PHYSICAL_PAGE_INVALIDATION_FLAGS_SHIFT);
|
|
if (VirtualShadowMapShouldCacheStaticSeparately())
|
|
{
|
|
NextPageFlags |= InvalidationFlags;
|
|
|
|
// Since we merge the static into the dynamic page, if static is invalidated, force dynamic invalid too
|
|
if (InvalidationFlags & VSM_STATIC_UNCACHED_FLAG)
|
|
{
|
|
NextPageFlags |= VSM_DYNAMIC_UNCACHED_FLAG;
|
|
}
|
|
}
|
|
else if (InvalidationFlags != 0)
|
|
{
|
|
NextPageFlags |= (VSM_STATIC_UNCACHED_FLAG | VSM_DYNAMIC_UNCACHED_FLAG);
|
|
}
|
|
}
|
|
|
|
uint PhysicalPageDetailGeometryFlag = (PrevPhysicalFlags & VSM_DETAIL_GEOMETRY_FLAG);
|
|
if (bRequestedThisFrame)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_REQUESTED_THIS_FRAME_PAGES);
|
|
// Remove from LRU list and add to requested list
|
|
PushPhysicalPageList(PHYSICAL_PAGE_LIST_REQUESTED, PhysicalPageIndex);
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].LastRequestedSceneFrameNumber = VirtualShadowMap.SceneFrameNumber;
|
|
bRemovedPageFromList = true;
|
|
|
|
// If the detail geometry flag doesn't match the cached page we treat it as a full invalidation.
|
|
// TODO: This could potentially be a problem for interleaved multiview rendering;
|
|
// If the flag differs in the two views it will cause cache thrashing.
|
|
const uint RequestDetailGeometryFlag = (RequestFlags & VSM_DETAIL_GEOMETRY_FLAG);
|
|
if (RequestDetailGeometryFlag != PhysicalPageDetailGeometryFlag)
|
|
{
|
|
NextPageFlags |= (VSM_STATIC_UNCACHED_FLAG | VSM_DYNAMIC_UNCACHED_FLAG);
|
|
PhysicalPageDetailGeometryFlag = RequestDetailGeometryFlag;
|
|
}
|
|
|
|
// Only increment the stats for pages requested this render, otherwise it gets confusing
|
|
if (NextPageFlags & VSM_STATIC_UNCACHED_FLAG)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_STATIC_INVALIDATED_PAGES);
|
|
}
|
|
else
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_STATIC_CACHED_PAGES);
|
|
}
|
|
if (NextPageFlags & VSM_DYNAMIC_UNCACHED_FLAG)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_DYNAMIC_INVALIDATED_PAGES);
|
|
}
|
|
else
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_DYNAMIC_CACHED_PAGES);
|
|
}
|
|
}
|
|
NextPageFlags |= PhysicalPageDetailGeometryFlag;
|
|
|
|
const uint PhysicalFlags = (Projection.bUnCached ? VSM_PHYSICAL_FLAG_VIEW_UNCACHED : 0U);
|
|
OutPageFlags[GlobalPageOffset] = NextPageFlags;
|
|
NextPhysicalFlags = NextPageFlags | PhysicalFlags;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// If pages is invalidated/empty, remove it from the LRU list and add it to the empty list
|
|
// It will be re-added after packing to the end of the AVAILABLE list
|
|
if (NextPhysicalFlags == 0)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_EMPTY_PAGES);
|
|
PushPhysicalPageList(PHYSICAL_PAGE_LIST_EMPTY, PhysicalPageIndex);
|
|
bRemovedPageFromList = true;
|
|
}
|
|
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = NextPhysicalFlags;
|
|
|
|
// Write out the LRU list while maintaining order, with anything we removed marked as INDEX_NONE
|
|
SetPhysicalPageListItem(PHYSICAL_PAGE_LIST_LRU, PhysicalPageListIndex, bRemovedPageFromList ? INDEX_NONE : PhysicalPageIndex);
|
|
}
|
|
|
|
// Utility to map from PageIndex -> PageX, PageY, MipLevel
|
|
void CalcPageAddressFromIndex(uint Index, inout uint MipLevel, inout uint2 PageAddress)
|
|
{
|
|
PageAddress = uint2(0xFFFFFFFF, 0xFFFFFFFF);
|
|
|
|
// TODO: There is probably some clever math we can use for this instead;
|
|
// See CalcLevelOffsets for a start
|
|
UNROLL
|
|
for (MipLevel = 0; MipLevel < VSM_MAX_MIP_LEVELS - 1; ++MipLevel)
|
|
{
|
|
if (Index < CalcLevelOffsets(MipLevel + 1))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
const uint Level0RowMask = ((1U << VSM_LOG2_LEVEL0_DIM_PAGES_XY) - 1U);
|
|
const uint OffsetInLevel = Index - CalcLevelOffsets(MipLevel);
|
|
PageAddress.x = OffsetInLevel & (Level0RowMask >> MipLevel);
|
|
PageAddress.y = OffsetInLevel >> (VSM_LOG2_LEVEL0_DIM_PAGES_XY - MipLevel);
|
|
}
|
|
|
|
#define EARLY_ALLOCATE 1
|
|
|
|
void AllocateNewPageMappings(uint VirtualShadowMapId, uint GlobalPageOffset, uint PageOffsetInSM, const bool bSinglePageSM)
|
|
{
|
|
checkStructuredBufferAccessSlow(PageRequestFlags, GlobalPageOffset);
|
|
const uint RequestFlags = PageRequestFlags[GlobalPageOffset];
|
|
if (RequestFlags != 0)
|
|
{
|
|
// See if we already hooked this up to a mapped page
|
|
const uint PageFlags = (OutPageFlags[GlobalPageOffset] & VSM_PAGE_FLAGS_BITS_MASK);
|
|
|
|
if (PageFlags == 0)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_REQUESTED_THIS_FRAME_PAGES);
|
|
|
|
int PhysicalPageIndex = PopPhysicalPageList(PHYSICAL_PAGE_LIST_AVAILABLE);
|
|
if (PhysicalPageIndex >= 0)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_ALLOCATED_NEW);
|
|
|
|
// Add back to the end of the requested list
|
|
PushPhysicalPageList(PHYSICAL_PAGE_LIST_REQUESTED, PhysicalPageIndex);
|
|
|
|
uint2 PhysicalPageAddress = VSMPhysicalIndexToPageAddress(PhysicalPageIndex);
|
|
|
|
// FIRST, check if there's a valid page already mapped to this physical page
|
|
// If so, we must go back and clear out its page table entry before we reallocate this page
|
|
{
|
|
FPhysicalPageMetaData PrevMetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
|
|
if (PrevMetaData.Flags != 0)
|
|
{
|
|
uint PrevGlobalPageOffset = CalcPageOffset(PrevMetaData.VirtualShadowMapId, PrevMetaData.MipLevel, PrevMetaData.PageAddress);
|
|
OutPageTable[PrevGlobalPageOffset] = 0;
|
|
OutPageFlags[PrevGlobalPageOffset] = 0;
|
|
}
|
|
}
|
|
|
|
uint RequestDetailGeometryFlag = RequestFlags & VSM_DETAIL_GEOMETRY_FLAG;
|
|
uint Flags = VSM_ALLOCATED_FLAG | VSM_DYNAMIC_UNCACHED_FLAG | VSM_STATIC_UNCACHED_FLAG | RequestDetailGeometryFlag;
|
|
|
|
// Mark this page as allocated and not cached (needing rendering)
|
|
OutPageTable[GlobalPageOffset] = ShadowEncodePageTable(PhysicalPageAddress);
|
|
OutPageFlags[GlobalPageOffset] = Flags;
|
|
|
|
const FVirtualShadowMapProjectionShaderData Projection = GetVirtualShadowMapProjectionData(VirtualShadowMapId);
|
|
const uint PhysicalFlags = (Projection.bUnCached ? VSM_PHYSICAL_FLAG_VIEW_UNCACHED : 0U);
|
|
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = Flags | PhysicalFlags;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].LastRequestedSceneFrameNumber = VirtualShadowMap.SceneFrameNumber;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].VirtualShadowMapId = VirtualShadowMapId;
|
|
|
|
// Compute page address info from the offset
|
|
// NOTE: We could store this directly in the new page request, but this works for now
|
|
uint MipLevel = 0U;
|
|
uint2 PageAddress = uint2(0U, 0U);
|
|
if (!IsSinglePageVirtualShadowMap(VirtualShadowMapId))
|
|
{
|
|
CalcPageAddressFromIndex(PageOffsetInSM, MipLevel, PageAddress);
|
|
}
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].MipLevel = MipLevel;
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].PageAddress = PageAddress;
|
|
}
|
|
else
|
|
{
|
|
// We end up here if we're out of physical pages, this means some parts get no physical backing provided.
|
|
// Post this error condition back to the host somehow!
|
|
// Probably want to know if we're getting close even.
|
|
//OutPageTable[GlobalPageOffset] = 0;
|
|
//OutPageFlags[GlobalPageOffset] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* X = One thread per virtual page, Y dim == GetNumShadowMaps() (single-page + full).
|
|
*/
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void AllocateNewPageMappingsCS(uint2 ThreadId : SV_DispatchThreadID)
|
|
{
|
|
if (ThreadId.x >= VSM_PAGE_TABLE_SIZE)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Note: driving with an if at the root to try to help compiler preserve scalarization for the non-single-page path.
|
|
// Single-page lights use the K remaining slices of the 2D dispatch
|
|
BRANCH
|
|
if (ThreadId.y >= VirtualShadowMap.NumFullShadowMaps)
|
|
{
|
|
// remap the threads ID to shadow map ID
|
|
uint VirtualShadowMapId = (ThreadId.y - VirtualShadowMap.NumFullShadowMaps) * VSM_PAGE_TABLE_SIZE + ThreadId.x;
|
|
uint GlobalPageOffset = VirtualShadowMapId;
|
|
|
|
// Avoid oob access
|
|
if (VirtualShadowMapId < VirtualShadowMap.NumSinglePageShadowMaps)
|
|
{
|
|
AllocateNewPageMappings(VirtualShadowMapId, GlobalPageOffset, 0U, true);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint PageOffsetInSM = ThreadId.x;
|
|
// Full SMs are allocated after the single-page ones.
|
|
uint VirtualShadowMapId = VSM_MAX_SINGLE_PAGE_SHADOW_MAPS + ThreadId.y;
|
|
uint GlobalPageOffset = CalcFullPageTableLevelOffset(VirtualShadowMapId, 0U) + PageOffsetInSM;
|
|
|
|
AllocateNewPageMappings(VirtualShadowMapId, GlobalPageOffset, PageOffsetInSM, false);
|
|
}
|
|
}
|
|
|
|
// NOTE: We only launch a single group here for now to avoid multi-pass so we really want it as large as possible
|
|
// Can optimize this later if needed for larger physical page counts
|
|
#define NUM_THREADS_PER_GROUP 1024
|
|
#include "../ThreadGroupPrefixSum.ush"
|
|
|
|
[numthreads(NUM_THREADS_PER_GROUP, 1, 1)]
|
|
void PackAvailablePages(uint GroupIndex : SV_GroupIndex)
|
|
{
|
|
int TotalCount = 0;
|
|
|
|
// Must be a uniform loop
|
|
for (int GroupStart = 0; GroupStart < VirtualShadowMap.MaxPhysicalPages; GroupStart += NUM_THREADS_PER_GROUP)
|
|
{
|
|
int ListIndex = GroupStart + GroupIndex;
|
|
|
|
int PhysicalPageIndex = ListIndex < VirtualShadowMap.MaxPhysicalPages ?
|
|
GetPhysicalPageListItem(PHYSICAL_PAGE_LIST_LRU, ListIndex) :
|
|
INDEX_NONE;
|
|
|
|
bool bListItemValid = PhysicalPageIndex != INDEX_NONE;
|
|
|
|
int SumValue = bListItemValid ? 1 : 0;
|
|
// NOTE: Cannot be under any divergent branching!
|
|
int GroupCount = 0;
|
|
int Offset = ThreadGroupPrefixSum(SumValue, GroupIndex, GroupCount);
|
|
|
|
if (bListItemValid)
|
|
{
|
|
SetPhysicalPageListItem(PHYSICAL_PAGE_LIST_AVAILABLE, TotalCount + Offset, PhysicalPageIndex);
|
|
}
|
|
TotalCount += GroupCount;
|
|
|
|
// This should already be accounted for internally by ThreadGroupPrefixSum, but putting one here
|
|
// to be absolutely sure.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
}
|
|
|
|
// Set total number
|
|
if (GroupIndex == 0)
|
|
{
|
|
SetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE, TotalCount);
|
|
}
|
|
}
|
|
|
|
#undef NUM_THREADS_PER_GROUP
|
|
|
|
uint bAppendEmptyToAvailable;
|
|
// If true, simply updates the counts instead of copying items
|
|
// This should be run with the same parameters right after the copy pass, with a single group
|
|
uint bUpdateCounts;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void AppendPhysicalPageLists(uint ThreadId : SV_DispatchThreadID)
|
|
{
|
|
// We only need two variants currently, EMPTY->AVAILABLE and AVAILABLE->REQUESTED
|
|
int InputList = bAppendEmptyToAvailable ? PHYSICAL_PAGE_LIST_EMPTY : PHYSICAL_PAGE_LIST_AVAILABLE;
|
|
int OutputList = bAppendEmptyToAvailable ? PHYSICAL_PAGE_LIST_AVAILABLE : PHYSICAL_PAGE_LIST_REQUESTED;
|
|
|
|
// NOTE: This needs to maintain order!
|
|
// It also needs to be robust against physical page pool overflows, ensuring that we never "lose" any
|
|
// items in the final LRU list for the next frame.
|
|
int InputCount = GetPhysicalPageListCount(InputList);
|
|
int OutputCount = GetPhysicalPageListCount(OutputList);
|
|
int CopyCount = max(0, min(InputCount, int(VirtualShadowMap.MaxPhysicalPages) - OutputCount));
|
|
|
|
if (bUpdateCounts)
|
|
{
|
|
// Update pass (after copy pass)
|
|
if (ThreadId == 0)
|
|
{
|
|
int NewOutputCount = OutputCount + CopyCount;
|
|
SetPhysicalPageListCount(OutputList, NewOutputCount);
|
|
SetPhysicalPageListCount(InputList, 0);
|
|
|
|
// The REQUESTED list needs to specifically end up with a single unique copy of each index as
|
|
// this becomes the LRU list for the next update. If we were to lose any indices or list entries
|
|
// then we would also (permanently) lose actual physical pages. Thus we assert that at least the
|
|
// list must end up as the right size after the final append!
|
|
if (!bAppendEmptyToAvailable)
|
|
{
|
|
/*
|
|
PLATFORM_ASSERT4(
|
|
NewOutputCount == VirtualShadowMap.MaxPhysicalPages,
|
|
0xCECC,
|
|
__LINE__,
|
|
InputCount,
|
|
OutputCount,
|
|
NewOutputCount);
|
|
*/
|
|
checkSlow(NewOutputCount == VirtualShadowMap.MaxPhysicalPages);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
// All pages should now be in PHYSICAL_PAGE_LIS_AVAILABLE or REQUESTED
|
|
int AvailableCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_AVAILABLE);
|
|
int RequestedCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_REQUESTED);
|
|
int EmptyCount = GetPhysicalPageListCount(PHYSICAL_PAGE_LIST_EMPTY);
|
|
int TotalPages = AvailableCount + RequestedCount;
|
|
|
|
PLATFORM_ASSERT4(
|
|
TotalPages == VirtualShadowMap.MaxPhysicalPages,
|
|
0xCECC,
|
|
__LINE__,
|
|
AvailableCount,
|
|
RequestedCount,
|
|
EmptyCount);
|
|
*/
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (ThreadId < CopyCount)
|
|
{
|
|
int InputItem = GetPhysicalPageListItem(InputList, ThreadId);
|
|
SetPhysicalPageListItem(OutputList, OutputCount + ThreadId, InputItem);
|
|
}
|
|
}
|
|
}
|
|
|
|
StructuredBuffer<FPhysicalPageMetaData> PhysicalPageMetaData;
|
|
RWTexture2DArray<uint> OutPhysicalPagePool;
|
|
|
|
// Helper function to merge static and dynamic depth.
|
|
void MergePhysicalPixel(uint2 PixelCoord)
|
|
{
|
|
// 1:1 pixels so this is safe RMW
|
|
OutPhysicalPagePool[uint3(PixelCoord, 0)] = max(
|
|
OutPhysicalPagePool[uint3(PixelCoord, 0)],
|
|
OutPhysicalPagePool[uint3(PixelCoord, GetVirtualShadowMapStaticArrayIndex())]);
|
|
}
|
|
|
|
RWBuffer<uint> OutIndirectArgsBuffer;
|
|
uint NumIndirectArgs;
|
|
uint IndirectArgStride;
|
|
|
|
// Set dispatch Args to 0,1,1 (3D grid dim) ready for atomic adding on GPU
|
|
[numthreads(64, 1, 1)]
|
|
void ClearIndirectDispatchArgs1DCS(uint IndirectArgIndex : SV_DispatchThreadID)
|
|
{
|
|
if (IndirectArgIndex < NumIndirectArgs)
|
|
{
|
|
OutIndirectArgsBuffer[IndirectArgIndex * IndirectArgStride + 0] = 0;
|
|
OutIndirectArgsBuffer[IndirectArgIndex * IndirectArgStride + 1] = 1;
|
|
OutIndirectArgsBuffer[IndirectArgIndex * IndirectArgStride + 2] = 1;
|
|
}
|
|
}
|
|
|
|
// Log2 2D dimension of thread group size, 2^4 == 16,
|
|
#define LOG2_TILE_THREAD_GROUP_SIZE_XY 4U
|
|
#define TILE_THREAD_GROUP_SIZE_XY (1U << LOG2_TILE_THREAD_GROUP_SIZE_XY)
|
|
|
|
// Each thread takes 2x2 samples to work with, so tile size is 2x thread group size
|
|
#define LOG2_TILE_SIZE_XY (LOG2_TILE_THREAD_GROUP_SIZE_XY + 1U)
|
|
|
|
#if VSM_LOG2_PAGE_SIZE < LOG2_TILE_SIZE
|
|
#error "VSM_LOG2_PAGE_SIZE must be larger than LOG2_TILE_SIZE, either increase one or reduce the other"
|
|
#endif
|
|
|
|
// Number of tiles (thread groups) in each dimension to cover the page
|
|
#define LOG2_TILES_PER_PAGE_XY ( VSM_LOG2_PAGE_SIZE - LOG2_TILE_SIZE_XY )
|
|
// Log2 1D tile count to cover the page LOG2_TILES_PER_PAGE_XY * LOG2_TILES_PER_PAGE_XY
|
|
#define LOG2_TILES_PER_PAGE_1D ( 2U * LOG2_TILES_PER_PAGE_XY )
|
|
// 1D tile count to cover the page
|
|
#define TILES_PER_PAGE_1D ( 1U << LOG2_TILES_PER_PAGE_1D )
|
|
|
|
#define TILES_PER_PAGE_XY_MASK ( ( 1U << LOG2_TILES_PER_PAGE_XY ) - 1U )
|
|
#define TILES_PER_PAGE_1D_MASK ( ( 1U << LOG2_TILES_PER_PAGE_1D ) - 1U )
|
|
|
|
RWBuffer<uint> OutInitializePagesIndirectArgsBuffer;
|
|
RWStructuredBuffer<uint> OutPhysicalPagesToInitialize;
|
|
|
|
void EmitPageToProcess(RWBuffer<uint> OutIndirectArgsBuffer, RWStructuredBuffer<uint> OutSelectedPhysicalIndexBuffer, uint PhysicalPageIndex)
|
|
{
|
|
int GroupCount = 0;
|
|
// Each page needs TILES_PER_PAGE_1D groups launched
|
|
WaveInterlockedAddScalar_(OutIndirectArgsBuffer[0], TILES_PER_PAGE_1D, GroupCount);
|
|
OutSelectedPhysicalIndexBuffer[GroupCount >> LOG2_TILES_PER_PAGE_1D] = PhysicalPageIndex;
|
|
}
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void SelectPagesToInitializeCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FPhysicalPageMetaData MetaData = PhysicalPageMetaData[PhysicalPageIndex];
|
|
|
|
bool bUnreferenced = (MetaData.Flags & VSM_PHYSICAL_FLAG_UNREFERENCED) != 0;
|
|
bool bFullyCached = (MetaData.Flags & (VSM_DYNAMIC_UNCACHED_FLAG | VSM_STATIC_UNCACHED_FLAG)) == 0;
|
|
bool bStaticUncached = (MetaData.Flags & VSM_STATIC_UNCACHED_FLAG) != 0;
|
|
|
|
if ((MetaData.Flags & VSM_ALLOCATED_FLAG) == 0)
|
|
{
|
|
// Page not used, we're done
|
|
}
|
|
else if (bUnreferenced || bFullyCached)
|
|
{
|
|
// Page fully cached or unreferenced. Leave the data alone.
|
|
}
|
|
else
|
|
{
|
|
// TODO: In the relatively common case of static cached/dynamic uncached we could
|
|
// copy the static data to initialize the dynamic data and avoid the merge later.
|
|
// Before doing this we need to verify it works properly with geometry getting
|
|
// "added on top" of cached pages though, which would break this optimization.
|
|
|
|
// At least one of the pages is uncached
|
|
// NOTE: Dynamic cached/static uncached is currently an invalid state
|
|
// Since we merge the static stuff over the dynamic stuff after rendering we can't
|
|
// actually maintain separate dynamic cached pages when "only" the (theoretically)
|
|
// static moved. Thus if not fully cached, we always regenerate the dynamic page.
|
|
EmitPageToProcess(OutInitializePagesIndirectArgsBuffer, OutPhysicalPagesToInitialize, PhysicalPageIndex);
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_CLEAR);
|
|
|
|
if (VirtualShadowMapShouldCacheStaticSeparately() &&
|
|
bStaticUncached &&
|
|
(MetaData.Flags & VSM_PHYSICAL_FLAG_VIEW_UNCACHED) == 0U)
|
|
{
|
|
EmitPageToProcess(OutInitializePagesIndirectArgsBuffer, OutPhysicalPagesToInitialize, PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages);
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_CLEAR);
|
|
}
|
|
}
|
|
}
|
|
|
|
uint3 GetTileOffset(uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer, inout FPhysicalPageMetaData OutMetaData)
|
|
{
|
|
const uint PageInputIndex = GroupIndex >> LOG2_TILES_PER_PAGE_1D;
|
|
uint PageIndex = PageIndexBuffer[PageInputIndex];
|
|
int ArrayIndex = 0;
|
|
|
|
if (PageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
// Request to clear the static page
|
|
PageIndex -= VirtualShadowMap.MaxPhysicalPages;
|
|
ArrayIndex = 1;
|
|
}
|
|
|
|
OutMetaData = PhysicalPageMetaData[PageIndex];
|
|
|
|
// Each page has 1 << LOG2_TILES_PER_PAGE_XY groups (aka tiles) assigned to work on it.
|
|
const uint LocalTileIndex = GroupIndex & TILES_PER_PAGE_1D_MASK;
|
|
// wrap to 2D tile coord
|
|
const uint2 LocalTile = uint2(LocalTileIndex & TILES_PER_PAGE_XY_MASK, LocalTileIndex >> LOG2_TILES_PER_PAGE_XY);
|
|
|
|
uint2 PhysPageAddress = VSMPhysicalIndexToPageAddress(PageIndex);
|
|
// Pixel address of tile region for this thread group.
|
|
const uint2 TileOffset = (PhysPageAddress << uint2(VSM_LOG2_PAGE_SIZE, VSM_LOG2_PAGE_SIZE)) + (LocalTile << uint2(LOG2_TILE_SIZE_XY, LOG2_TILE_SIZE_XY));
|
|
|
|
return uint3(TileOffset, ArrayIndex);
|
|
}
|
|
|
|
uint3 GetTileBasePos(uint2 TileThreadID, uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer, inout FPhysicalPageMetaData OutMetaData)
|
|
{
|
|
// Pixel address of tile region for this thread group.
|
|
const uint3 TileOffset = GetTileOffset(GroupIndex, PageIndexBuffer, OutMetaData);
|
|
// Pixel address of 2x2 region to sample for this thread.
|
|
const uint2 BasePos = TileOffset.xy + (TileThreadID.xy << 1u);
|
|
|
|
return uint3(BasePos, TileOffset.z);
|
|
}
|
|
|
|
uint3 GetTileBasePos(uint2 TileThreadID, uint GroupIndex, StructuredBuffer<uint> PageIndexBuffer)
|
|
{
|
|
FPhysicalPageMetaData TmpMetaData;
|
|
return GetTileBasePos(TileThreadID, GroupIndex, PageIndexBuffer, TmpMetaData);
|
|
}
|
|
|
|
StructuredBuffer<uint> PhysicalPagesToInitialize;
|
|
|
|
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void InitializePhysicalPagesIndirectCS(uint2 TileThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupID)
|
|
{
|
|
FPhysicalPageMetaData MetaData;
|
|
uint3 BasePos = GetTileBasePos(TileThreadID, GroupIndex, PhysicalPagesToInitialize, MetaData);
|
|
bool bStaticCached = (MetaData.Flags & VSM_STATIC_UNCACHED_FLAG) == 0U;
|
|
|
|
if (bStaticCached && VirtualShadowMapShouldCacheStaticSeparately() && (MetaData.Flags & VSM_PHYSICAL_FLAG_VIEW_UNCACHED) == 0U)
|
|
{
|
|
// Initialize from the static page data
|
|
checkSlow(BasePos.z == 0U);
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 0U, 0U)] = OutPhysicalPagePool[BasePos + uint3(0U, 0U, 1U)];
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 0U, 0U)] = OutPhysicalPagePool[BasePos + uint3(1U, 0U, 1U)];
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 1U, 0U)] = OutPhysicalPagePool[BasePos + uint3(0U, 1U, 1U)];
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 1U, 0U)] = OutPhysicalPagePool[BasePos + uint3(1U, 1U, 1U)];
|
|
}
|
|
else
|
|
{
|
|
// Clear the page to zero
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 0U, 0U)] = 0U;
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 0U, 0U)] = 0U;
|
|
OutPhysicalPagePool[BasePos + uint3(0U, 1U, 0U)] = 0U;
|
|
OutPhysicalPagePool[BasePos + uint3(1U, 1U, 0U)] = 0U;
|
|
}
|
|
}
|
|
|
|
RWBuffer<uint> OutMergePagesIndirectArgsBuffer;
|
|
RWStructuredBuffer<uint> OutPhysicalPagesToMerge;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void SelectPagesToMergeCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FPhysicalPageMetaData MetaData = PhysicalPageMetaData[PhysicalPageIndex];
|
|
|
|
// An uncached view is always exclusively renders into the dynamic pages, and thus require no merging.
|
|
if ((MetaData.Flags & VSM_ALLOCATED_FLAG) != 0U &&
|
|
(MetaData.Flags & VSM_PHYSICAL_FLAG_VIEW_UNCACHED) == 0U &&
|
|
(MetaData.Flags & VSM_PHYSICAL_FLAG_DIRTY) != 0U)
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_PAGES_TO_MERGE);
|
|
EmitPageToProcess(OutMergePagesIndirectArgsBuffer, OutPhysicalPagesToMerge, PhysicalPageIndex);
|
|
}
|
|
}
|
|
|
|
StructuredBuffer<uint> PhysicalPagesToMerge;
|
|
|
|
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void MergeStaticPhysicalPagesIndirectCS(uint2 TileThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupID)
|
|
{
|
|
uint2 BasePos = GetTileBasePos(TileThreadID, GroupIndex, PhysicalPagesToMerge).xy;
|
|
|
|
// 1:1 pixels so this is safe RMW
|
|
MergePhysicalPixel(BasePos + uint2(0U, 0U));
|
|
MergePhysicalPixel(BasePos + uint2(1U, 0U));
|
|
MergePhysicalPixel(BasePos + uint2(0U, 1U));
|
|
MergePhysicalPixel(BasePos + uint2(1U, 1U));
|
|
}
|
|
|
|
|
|
|
|
// Indirect HZB building:
|
|
RWStructuredBuffer<uint> DirtyPageFlagsInOut;
|
|
|
|
bool UpdateAndClearDirtyFlags(uint PhysicalPageIndex, inout FPhysicalPageMetaData MetaData)
|
|
{
|
|
bool bPageDirty = DirtyPageFlagsInOut[PhysicalPageIndex] != 0U;
|
|
bool bInvalidatesDynamic = DirtyPageFlagsInOut[PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages] != 0U;
|
|
bool bInvalidatesStatic = DirtyPageFlagsInOut[PhysicalPageIndex + 2U * VirtualShadowMap.MaxPhysicalPages] != 0U;
|
|
|
|
// clear the dirty/invalidation flags
|
|
DirtyPageFlagsInOut[PhysicalPageIndex] = 0U;
|
|
DirtyPageFlagsInOut[PhysicalPageIndex + VirtualShadowMap.MaxPhysicalPages] = 0U;
|
|
DirtyPageFlagsInOut[PhysicalPageIndex + 2U * VirtualShadowMap.MaxPhysicalPages] = 0U;
|
|
|
|
uint CacheFlags = (bInvalidatesStatic ? VSM_STATIC_UNCACHED_FLAG : 0U) | (bInvalidatesDynamic ? VSM_DYNAMIC_UNCACHED_FLAG : 0U);
|
|
|
|
// Update the page metadata to mark the pages as uncached, which allows the page merging to pick them up.
|
|
// Also mark bits for invalidation (need own set of bits to avoid being merged with the input state at the start of the frame - e.g., fresh pages are already uncached)
|
|
MetaData = OutPhysicalPageMetaData[PhysicalPageIndex];
|
|
MetaData.Flags |= (bPageDirty ? VSM_PHYSICAL_FLAG_DIRTY : 0U) | (CacheFlags << VSM_PHYSICAL_PAGE_INVALIDATION_FLAGS_SHIFT);
|
|
OutPhysicalPageMetaData[PhysicalPageIndex].Flags = MetaData.Flags;
|
|
|
|
return bPageDirty;
|
|
}
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void UpdateAndClearDirtyFlagsCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
FPhysicalPageMetaData MetaData;
|
|
UpdateAndClearDirtyFlags(PhysicalPageIndex, MetaData);
|
|
}
|
|
|
|
RWBuffer<uint> OutPagesForHZBIndirectArgsBuffer;
|
|
RWStructuredBuffer<uint> OutPhysicalPagesForHZB;
|
|
uint bFirstBuildThisFrame;
|
|
uint bForceFullHZBUpdate;
|
|
|
|
[numthreads(VSM_DEFAULT_CS_GROUP_X, 1, 1)]
|
|
void SelectPagesForHZBAndUpdateDirtyFlagsCS(uint PhysicalPageIndex : SV_DispatchThreadID)
|
|
{
|
|
if (PhysicalPageIndex >= VirtualShadowMap.MaxPhysicalPages)
|
|
{
|
|
return;
|
|
}
|
|
|
|
FPhysicalPageMetaData MetaData;
|
|
bool bPageDirty = UpdateAndClearDirtyFlags(PhysicalPageIndex, MetaData);
|
|
|
|
if ((MetaData.Flags & VSM_ALLOCATED_FLAG) != 0)
|
|
{
|
|
// rebuild the HZB if the page is rendered to or it is freshly allocated/cleared, but only the first time for a given frame.
|
|
bool bStaticUncached = (MetaData.Flags & VSM_STATIC_UNCACHED_FLAG) != 0;
|
|
if (bForceFullHZBUpdate || bPageDirty || (bFirstBuildThisFrame != 0 && bStaticUncached))
|
|
{
|
|
StatsBufferInterlockedInc(VSM_STAT_NUM_HZB_PAGES_BUILT);
|
|
|
|
int GroupCount = 0;
|
|
// Each page needs TILES_PER_PAGE_1D groups launched
|
|
WaveInterlockedAddScalar_(OutPagesForHZBIndirectArgsBuffer[0], TILES_PER_PAGE_1D, GroupCount);
|
|
OutPhysicalPagesForHZB[GroupCount >> LOG2_TILES_PER_PAGE_1D] = PhysicalPageIndex;
|
|
|
|
// Each top-reduction needs only one group launched
|
|
WaveInterlockedAddScalar_(OutPagesForHZBIndirectArgsBuffer[0 + 4], 1U, GroupCount);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
SamplerState PhysicalPagePoolSampler;
|
|
Texture2DArray<uint> PhysicalPagePool;
|
|
|
|
float4 Gather4VisZ(uint2 PixelCoord, uint ArrayIndex)
|
|
{
|
|
#if COMPILER_SUPPORTS_GATHER_UINT
|
|
// Offset to 2x2 footprint center and scale to UV space
|
|
float2 UV = float2(PixelCoord + uint2(1U, 1U)) * VirtualShadowMap.RecPhysicalPoolSize.xy;
|
|
return asfloat(PhysicalPagePool.Gather(PhysicalPagePoolSampler, float3(UV, ArrayIndex), 0));
|
|
#else
|
|
uint4 PixelRect = uint4(PixelCoord.xy, PixelCoord.xy + uint2(1U, 1U));
|
|
uint4 UintDepths = uint4(
|
|
PhysicalPagePool[uint3(PixelRect.xw, ArrayIndex)].r, // (-, +)
|
|
PhysicalPagePool[uint3(PixelRect.zw, ArrayIndex)].r, // (+, +)
|
|
PhysicalPagePool[uint3(PixelRect.zy, ArrayIndex)].r, // (+, -)
|
|
PhysicalPagePool[uint3(PixelRect.xy, ArrayIndex)].r // (-, -)
|
|
);
|
|
return asfloat(UintDepths);
|
|
#endif
|
|
}
|
|
|
|
StructuredBuffer<uint> PhysicalPagesForHzb;
|
|
// out input output
|
|
RWTexture2D<float> FurthestHZBOutput_0; // 64 // 1 Group: 32 (16 threads x2) 16
|
|
RWTexture2D<float> FurthestHZBOutput_1; // 32 // 1 16 8
|
|
RWTexture2D<float> FurthestHZBOutput_2; // 16 8 4
|
|
RWTexture2D<float> FurthestHZBOutput_3; // 8 4 2
|
|
RWTexture2D<float> FurthestHZBOutput_4; // 4 2 1
|
|
|
|
|
|
groupshared float SharedMinDeviceZ[TILE_THREAD_GROUP_SIZE_XY * TILE_THREAD_GROUP_SIZE_XY];
|
|
groupshared float SharedMaxDeviceZ[TILE_THREAD_GROUP_SIZE_XY * TILE_THREAD_GROUP_SIZE_XY];
|
|
|
|
#define DIM_FURTHEST 1
|
|
#define DIM_CLOSEST 0
|
|
|
|
void OutputMipLevel(uint MipLevel, uint2 OutputPixelPos, float FurthestDeviceZ, float ClosestDeviceZ)
|
|
{
|
|
#if DIM_FURTHEST
|
|
#define COND_OUTPUT_LEVEL(_level_) \
|
|
if (MipLevel == _level_) \
|
|
{ \
|
|
FurthestHZBOutput_##_level_[OutputPixelPos] = FurthestDeviceZ; \
|
|
return; \
|
|
}
|
|
#endif
|
|
#if DIM_CLOSEST
|
|
ClosestHZBOutput_1[OutputPixelPos] = ClosestDeviceZ;
|
|
#endif
|
|
|
|
COND_OUTPUT_LEVEL(1)
|
|
COND_OUTPUT_LEVEL(2)
|
|
COND_OUTPUT_LEVEL(3)
|
|
COND_OUTPUT_LEVEL(4)
|
|
|
|
#undef COND_OUTPUT_LEVEL
|
|
}
|
|
|
|
[numthreads(TILE_THREAD_GROUP_SIZE_XY, TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void BuildHZBPerPageCS(uint GroupThreadIndex : SV_GroupIndex, uint GroupIndex : SV_GroupID)
|
|
{
|
|
FPhysicalPageMetaData MetaData;
|
|
uint2 SrcTileOffset = GetTileOffset(GroupIndex, PhysicalPagesForHzb, MetaData).xy;
|
|
|
|
// uncachable views always go to the dynamic slice (slice 0)
|
|
uint ArrayIndex = (MetaData.Flags & VSM_PHYSICAL_FLAG_VIEW_UNCACHED) != 0U ? 0U : GetVirtualShadowMapStaticArrayIndex();
|
|
|
|
uint2 RemappedGroupThreadIndex = InitialTilePixelPositionForReduction2x2(LOG2_TILE_SIZE_XY - 1U, GroupThreadIndex);
|
|
|
|
uint2 SrcPos = SrcTileOffset + (RemappedGroupThreadIndex << uint2(1U, 1U));
|
|
// Sample 2x2 footprint - thread group covers 32x32 area
|
|
float4 DeviceZ = Gather4VisZ(SrcPos, ArrayIndex);
|
|
float MinDeviceZ = min(min3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
|
|
float MaxDeviceZ = 0.0f;//max(max3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
//uint LinearGroupThreadID = RemappedGroupThreadIndex.y << LOG2_TILE_THREAD_GROUP_SIZE_XY + RemappedGroupThreadIndex.x;
|
|
|
|
// Broadcast to all threads (16x16).
|
|
SharedMinDeviceZ[GroupThreadIndex] = MinDeviceZ;
|
|
// Write base HZB level (half physical page size, e.g., 64x64)
|
|
uint2 OutPixelPos = SrcPos >> 1U;
|
|
FurthestHZBOutput_0[OutPixelPos] = MinDeviceZ;
|
|
|
|
// Build next 4 levels: 32, 16, 8, 4
|
|
UNROLL
|
|
for (uint MipLevel = 1U; MipLevel < LOG2_TILE_SIZE_XY; ++MipLevel)
|
|
{
|
|
// 8x8, 4x4, 2x2, 1x1
|
|
const uint OutTileDim = uint(TILE_THREAD_GROUP_SIZE_XY) >> MipLevel;
|
|
const uint ReduceBankSize = OutTileDim * OutTileDim;
|
|
|
|
// LDS has been written before.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
BRANCH
|
|
if (GroupThreadIndex < ReduceBankSize)
|
|
{
|
|
float4 ParentMinDeviceZ;
|
|
//float4 ParentMaxDeviceZ;
|
|
ParentMinDeviceZ[0] = MinDeviceZ;
|
|
//ParentMaxDeviceZ[0] = MaxDeviceZ;
|
|
|
|
UNROLL
|
|
for (uint i = 1; i < 4; i++)
|
|
{
|
|
uint LDSIndex = GroupThreadIndex + i * ReduceBankSize;
|
|
ParentMinDeviceZ[i] = SharedMinDeviceZ[LDSIndex];
|
|
//ParentMaxDeviceZ[i] = SharedMaxDeviceZ[LDSIndex];
|
|
}
|
|
|
|
MinDeviceZ = min(min3(ParentMinDeviceZ.x, ParentMinDeviceZ.y, ParentMinDeviceZ.z), ParentMinDeviceZ.w);
|
|
//MaxDeviceZ = max(max3(ParentMaxDeviceZ.x, ParentMaxDeviceZ.y, ParentMaxDeviceZ.z), ParentMaxDeviceZ.w);
|
|
|
|
OutPixelPos = OutPixelPos >> 1;
|
|
OutputMipLevel(MipLevel, OutPixelPos, MinDeviceZ, MaxDeviceZ);
|
|
|
|
SharedMinDeviceZ[GroupThreadIndex] = MinDeviceZ;
|
|
//SharedMaxDeviceZ[GroupThreadIndex] = MaxDeviceZ;
|
|
}
|
|
}
|
|
}
|
|
|
|
float4 Gather4(Texture2D Texture, SamplerState TextureSampler, uint2 SrcPos, float2 InvSize)
|
|
{
|
|
float2 SrcUV = float2(SrcPos) * InvSize;
|
|
#if COMPILER_GLSL || FEATURE_LEVEL < FEATURE_LEVEL_SM5
|
|
float2 HalfTexelOffset = float2(0.5f, 0.5f) * InvSize;
|
|
|
|
float4 Out;
|
|
Out.x = Texture.SampleLevel(TextureSampler, SrcUV + float2(-HalfTexelOffset.x, -HalfTexelOffset.y), 0 ).r;
|
|
Out.y = Texture.SampleLevel(TextureSampler, SrcUV + float2( HalfTexelOffset.x, -HalfTexelOffset.y), 0 ).r;
|
|
Out.z = Texture.SampleLevel(TextureSampler, SrcUV + float2(-HalfTexelOffset.x, HalfTexelOffset.y), 0 ).r;
|
|
Out.w = Texture.SampleLevel(TextureSampler, SrcUV + float2( HalfTexelOffset.x, HalfTexelOffset.y), 0 ).r;
|
|
|
|
return Out;
|
|
#else
|
|
return Texture.GatherRed(TextureSampler, SrcUV, 0);
|
|
#endif
|
|
}
|
|
|
|
|
|
Texture2D ParentTextureMip;
|
|
SamplerState ParentTextureMipSampler;
|
|
|
|
float2 InvHzbInputSize;
|
|
|
|
#define TOP_MIP_TILE_SIZE_XY 4
|
|
// Each fetches 2x2 using gather
|
|
#define TOP_MIP_TILE_THREAD_GROUP_SIZE_XY (TOP_MIP_TILE_SIZE_XY/2)
|
|
|
|
[numthreads(TOP_MIP_TILE_THREAD_GROUP_SIZE_XY, TOP_MIP_TILE_THREAD_GROUP_SIZE_XY, 1)]
|
|
void BuildHZBPerPageTopCS(uint2 GroupThreadId : SV_GroupThreadID, uint PageInputIndex : SV_GroupID)
|
|
{
|
|
const uint PageIndex = PhysicalPagesForHzb[PageInputIndex];
|
|
uint2 PhysPageAddress = VSMPhysicalIndexToPageAddress(PageIndex);
|
|
|
|
// Pixel address of tile region for this thread group.
|
|
const uint2 SrcTileOffset = PhysPageAddress * uint2(TOP_MIP_TILE_SIZE_XY, TOP_MIP_TILE_SIZE_XY);
|
|
|
|
uint2 SrcPos = SrcTileOffset + (GroupThreadId << uint2(1U, 1U));
|
|
|
|
// Sample 2x2 footprint - thread group covers 32x32 area
|
|
float4 DeviceZ = Gather4(ParentTextureMip, ParentTextureMipSampler, SrcPos + uint2(1U, 1U), InvHzbInputSize);
|
|
float MinDeviceZ = min(min3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
|
|
float MaxDeviceZ = 0.0f;//max(max3(DeviceZ.x, DeviceZ.y, DeviceZ.z), DeviceZ.w);
|
|
//uint LinearGroupThreadID = RemappedGroupThreadIndex.y << LOG2_TILE_THREAD_GROUP_SIZE_XY + RemappedGroupThreadIndex.x;
|
|
|
|
// Broadcast to all threads.
|
|
SharedMinDeviceZ[GroupThreadId.y * TOP_MIP_TILE_THREAD_GROUP_SIZE_XY + GroupThreadId.x] = MinDeviceZ;
|
|
// Write first HZB output level (half size)
|
|
uint2 OutPixelPos = SrcPos >> 1U;
|
|
FurthestHZBOutput_0[OutPixelPos] = MinDeviceZ;
|
|
|
|
// Build last level
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
BRANCH
|
|
if (all(GroupThreadId.xy == uint2(0U, 0U)))
|
|
{
|
|
float4 ParentMinDeviceZ;
|
|
//float4 ParentMaxDeviceZ;
|
|
ParentMinDeviceZ[0] = MinDeviceZ;
|
|
//ParentMaxDeviceZ[0] = MaxDeviceZ;
|
|
|
|
UNROLL
|
|
for (uint Index = 1; Index < 4; ++Index)
|
|
{
|
|
ParentMinDeviceZ[Index] = SharedMinDeviceZ[Index];
|
|
//ParentMaxDeviceZ[i] = SharedMaxDeviceZ[LDSIndex];
|
|
}
|
|
|
|
MinDeviceZ = min(min3(ParentMinDeviceZ.x, ParentMinDeviceZ.y, ParentMinDeviceZ.z), ParentMinDeviceZ.w);
|
|
//MaxDeviceZ = max(max3(ParentMaxDeviceZ.x, ParentMaxDeviceZ.y, ParentMaxDeviceZ.z), ParentMaxDeviceZ.w);
|
|
|
|
OutPixelPos = OutPixelPos >> 1;
|
|
FurthestHZBOutput_1[OutPixelPos] = MinDeviceZ;
|
|
}
|
|
}
|
|
|
|
uint StatusMessageId;
|
|
StructuredBuffer<int> PhysicalPageLists;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void FeedbackStatusCS()
|
|
{
|
|
{
|
|
FGPUMessageWriter Mw = GPUMessageBegin(StatusMessageId, 2U);
|
|
|
|
// Write out how many pages are still available
|
|
int CountIndex = GetPhysicalPageListStart(PHYSICAL_PAGE_LIST_AVAILABLE) + VirtualShadowMap.MaxPhysicalPages;
|
|
GPUMessageWriteItem(Mw, PhysicalPageLists[CountIndex]);
|
|
|
|
// Write out the resolution lod bias from this frame
|
|
GPUMessageWriteItem(Mw, VirtualShadowMap.GlobalResolutionLodBias);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
int PageListStatsRow = 0;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void LogPageListStatsCS()
|
|
{
|
|
float TopMargin = 0.5f;
|
|
float ItemX = 0.05f;
|
|
FShaderPrintContext Ctx = InitShaderPrintContext(true, float2(ItemX, TopMargin));
|
|
|
|
Ctx.Pos.y += PageListStatsRow * 0.02f;
|
|
|
|
Print(Ctx, GetPhysicalPageListCount(0));
|
|
for (int i = 1; i < PHYSICAL_PAGE_LIST_COUNT; ++i)
|
|
{
|
|
Print(Ctx, TEXT(", "));
|
|
Print(Ctx, GetPhysicalPageListCount(i));
|
|
}
|
|
}
|