You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
* Decreased Global Distance Field border from full voxel to half voxel and decreased page size from 16 to 8. This removes ~30% of pages, saving memory and improving update speed * Half texel border requires to compute gradients using 8 page table fetches, but it didn�t influence Lumen tracing performance on console * Smaller pages put some pressure on culling and other passes. Culling grid was switch from per page to per 4x4x4 pages. Overall full GDF update speed in FortGPUTestBed decreased from 5.89ms to 4.57ms * Switched page table format to UInt32 in order to support larger Global Distance Field resolutions * Moved all GlobalDistanceField shaders into /DistanceField/* folder, in future we should move other distance field shaders there * Moved GlobalDistanceField page stats from r.Lumen.Visualize.Stats in Lumen code to r.GlobalDistanceField.Debug in GlobalDistanceField code * Fixed Lumen normal visualization when only Global Distance Field tracing is enabled #preflight 62630427006fa20b683b405a [FYI] Tiago.Costa, Daniel.Wright #ROBOMERGE-AUTHOR: krzysztof.narkowicz #ROBOMERGE-SOURCE: CL 19873116 via CL 19873429 via CL 19874251 #ROBOMERGE-BOT: UE5 (Release-Engine-Staging -> Main) (v940-19807014) [CL 19878047 by krzysztof narkowicz in ue5-main branch]
417 lines
16 KiB
Plaintext
417 lines
16 KiB
Plaintext
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
DistanceFieldObjectCulling.usf
|
|
=============================================================================*/
|
|
|
|
#include "Common.ush"
|
|
#include "DeferredShadingCommon.ush"
|
|
#include "DistanceFieldLightingShared.ush"
|
|
#include "DistanceFieldAOShared.ush"
|
|
#include "DistanceField/GlobalDistanceFieldShared.ush"
|
|
|
|
uint ObjectBoundingGeometryIndexCount;
|
|
|
|
groupshared uint NumGroupObjects;
|
|
|
|
groupshared uint GroupBaseIndex;
|
|
groupshared uint GroupObjectIndices[UPDATEOBJECTS_THREADGROUP_SIZE];
|
|
|
|
[numthreads(UPDATEOBJECTS_THREADGROUP_SIZE, 1, 1)]
|
|
void CullObjectsForViewCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ObjectIndex = DispatchThreadId.x;
|
|
|
|
#define USE_FRUSTUM_CULLING 1
|
|
#if USE_FRUSTUM_CULLING
|
|
if (DispatchThreadId.x == 0)
|
|
{
|
|
// RWObjectIndirectArguments is zeroed by a clear before this shader, only need to set things that are non-zero (and are not read by this shader as that would be a race condition)
|
|
// IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance
|
|
RWObjectIndirectArguments[0] = ObjectBoundingGeometryIndexCount;
|
|
}
|
|
|
|
if (GroupThreadId.x == 0)
|
|
{
|
|
NumGroupObjects = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (ObjectIndex < NumSceneObjects)
|
|
{
|
|
uint SourceIndex = ObjectIndex;
|
|
|
|
FDFObjectBounds DFObjectBounds = LoadDFObjectBounds(ObjectIndex);
|
|
const float3 TranslatedCenter = LWCToFloat(LWCAdd(DFObjectBounds.Center, PrimaryView.PreViewTranslation));
|
|
|
|
const float3 CenterToCamera = PrimaryView.TranslatedWorldCameraOrigin - TranslatedCenter;
|
|
float DistanceToViewSq = dot(CenterToCamera, CenterToCamera);
|
|
|
|
if (DistanceToViewSq < Square(AOMaxViewDistance + DFObjectBounds.SphereRadius)
|
|
&& ViewFrustumIntersectSphere(TranslatedCenter, DFObjectBounds.SphereRadius + AOObjectMaxDistance))
|
|
{
|
|
FDFObjectData DFObjectData = LoadDFObjectData(SourceIndex);
|
|
|
|
if ((DFObjectData.MinMaxDrawDistance2.x < 0.0001 || DistanceToViewSq > DFObjectData.MinMaxDrawDistance2.x)
|
|
&& (DFObjectData.MinMaxDrawDistance2.y < 0.0001 || DistanceToViewSq < DFObjectData.MinMaxDrawDistance2.y))
|
|
{
|
|
uint DestIndex;
|
|
InterlockedAdd(NumGroupObjects, 1U, DestIndex);
|
|
GroupObjectIndices[DestIndex] = SourceIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GroupThreadId.x == 0)
|
|
{
|
|
InterlockedAdd(RWObjectIndirectArguments[1], NumGroupObjects, GroupBaseIndex);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GroupThreadId.x < NumGroupObjects)
|
|
{
|
|
uint SourceIndex = GroupObjectIndices[GroupThreadId.x];
|
|
uint DestIndex = GroupBaseIndex + GroupThreadId.x;
|
|
RWCulledObjectIndices[DestIndex] = SourceIndex;
|
|
}
|
|
|
|
#else
|
|
|
|
if (DispatchThreadId.x == 0)
|
|
{
|
|
// IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance
|
|
RWObjectIndirectArguments[0] = ObjectBoundingGeometryIndexCount;
|
|
RWObjectIndirectArguments[1] = NumSceneObjects;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (ObjectIndex < NumSceneObjects)
|
|
{
|
|
uint SourceIndex = ObjectIndex;
|
|
uint DestIndex = ObjectIndex;
|
|
|
|
RWCulledObjectIndices[DestIndex] = SourceIndex;
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
/** Min and Max depth for this tile. */
|
|
groupshared uint IntegerTileMinZ;
|
|
groupshared uint IntegerTileMaxZ;
|
|
|
|
/** Inner Min and Max depth for this tile. */
|
|
groupshared uint IntegerTileMinZ2;
|
|
groupshared uint IntegerTileMaxZ2;
|
|
|
|
/** View rect min in xy, max in zw. */
|
|
uint4 ViewDimensions;
|
|
float2 NumGroups;
|
|
|
|
RWStructuredBuffer<float4> RWTileConeAxisAndCos;
|
|
RWStructuredBuffer<float4> RWTileConeDepthRanges;
|
|
|
|
/** Builds tile depth ranges and bounding cones. */
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void BuildTileConesMain(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
|
|
|
|
float2 BaseLevelScreenUV = (DispatchThreadId.xy + float2(.5f, .5f)) * DOWNSAMPLE_FACTOR * View.BufferSizeAndInvSize.zw;
|
|
float SceneDepth = GetDownsampledDepth(BaseLevelScreenUV);
|
|
|
|
//float2 ScreenUV = (DispatchThreadId.xy * DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + float2(.5f, .5f)) * View.BufferSizeAndInvSize.zw;
|
|
//float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
|
|
// Initialize per-tile variables
|
|
if (ThreadIndex == 0)
|
|
{
|
|
IntegerTileMinZ = 0x7F7FFFFF;
|
|
IntegerTileMaxZ = 0;
|
|
IntegerTileMinZ2 = 0x7F7FFFFF;
|
|
IntegerTileMaxZ2 = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Use shared memory atomics to build the depth bounds for this tile
|
|
// Each thread is assigned to a pixel at this point
|
|
|
|
if (SceneDepth < AOMaxViewDistance)
|
|
{
|
|
InterlockedMin(IntegerTileMinZ, asuint(SceneDepth));
|
|
InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth));
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinTileZ = asfloat(IntegerTileMinZ);
|
|
float MaxTileZ = asfloat(IntegerTileMaxZ);
|
|
|
|
float HalfZ = .5f * (MinTileZ + MaxTileZ);
|
|
|
|
// Compute a second min and max Z, clipped by HalfZ, so that we get two depth bounds per tile
|
|
// This results in more conservative tile depth bounds and fewer intersections
|
|
if (SceneDepth >= HalfZ && SceneDepth < AOMaxViewDistance)
|
|
{
|
|
InterlockedMin(IntegerTileMinZ2, asuint(SceneDepth));
|
|
}
|
|
|
|
if (SceneDepth <= HalfZ)
|
|
{
|
|
InterlockedMax(IntegerTileMaxZ2, asuint(SceneDepth));
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinTileZ2 = asfloat(IntegerTileMinZ2);
|
|
float MaxTileZ2 = asfloat(IntegerTileMaxZ2);
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
float3 TileConeVertex;
|
|
float3 TileConeAxis;
|
|
float TileConeAngleCos;
|
|
float TileConeAngleSin;
|
|
float4 ConeAxisDepthRanges;
|
|
|
|
{
|
|
float2 ViewSize = float2(1 / View.ViewToClip[0][0], 1 / View.ViewToClip[1][1]);
|
|
float3 TileCorner00 = normalize(float3((GroupId.x + 0) / NumGroups.x * ViewSize.x * 2 - ViewSize.x, ViewSize.y - (GroupId.y + 0) / NumGroups.y * ViewSize.y * 2, 1));
|
|
float3 TileCorner10 = normalize(float3((GroupId.x + 1) / NumGroups.x * ViewSize.x * 2 - ViewSize.x, ViewSize.y - (GroupId.y + 0) / NumGroups.y * ViewSize.y * 2, 1));
|
|
float3 TileCorner01 = normalize(float3((GroupId.x + 0) / NumGroups.x * ViewSize.x * 2 - ViewSize.x, ViewSize.y - (GroupId.y + 1) / NumGroups.y * ViewSize.y * 2, 1));
|
|
float3 TileCorner11 = normalize(float3((GroupId.x + 1) / NumGroups.x * ViewSize.x * 2 - ViewSize.x, ViewSize.y - (GroupId.y + 1) / NumGroups.y * ViewSize.y * 2, 1));
|
|
|
|
TileConeAxis = normalize(TileCorner00 + TileCorner10 + TileCorner01 + TileCorner11);
|
|
TileConeAngleCos = dot(TileConeAxis, TileCorner00);
|
|
TileConeAngleSin = sqrt(1 - TileConeAngleCos * TileConeAngleCos);
|
|
|
|
float TileConeAngleTan = TileConeAngleSin / TileConeAngleCos;
|
|
float ConeExpandDistance = 0;
|
|
float VertexPullbackLength = ConeExpandDistance / TileConeAngleTan;
|
|
float DistanceToNearPlane = length(TileConeAxis / TileConeAxis.z * View.NearPlane);
|
|
// 1 / cos(AngleBetweenTileCenterAndViewForward)
|
|
float InvCosTileAngle = 1.0f / TileConeAxis.z;
|
|
float ConeAxisDistanceMultiply = InvCosTileAngle;
|
|
float ConeAxisDistanceAdd = VertexPullbackLength + DistanceToNearPlane;
|
|
ConeAxisDepthRanges.x = ConeAxisDistanceMultiply * (MinTileZ - ConeExpandDistance) + ConeAxisDistanceAdd;
|
|
ConeAxisDepthRanges.y = ConeAxisDistanceMultiply * (MaxTileZ2 + ConeExpandDistance) + ConeAxisDistanceAdd;
|
|
ConeAxisDepthRanges.z = ConeAxisDistanceMultiply * (MinTileZ2 - ConeExpandDistance) + ConeAxisDistanceAdd;
|
|
ConeAxisDepthRanges.w = ConeAxisDistanceMultiply * (MaxTileZ + ConeExpandDistance) + ConeAxisDistanceAdd;
|
|
|
|
// Pull back cone vertex to contain potential samples
|
|
TileConeVertex = float3(0, 0, 0) - TileConeAxis * VertexPullbackLength;
|
|
}
|
|
|
|
uint TileIndex = GroupId.y * NumGroups.x + GroupId.x;
|
|
if (IntegerTileMinZ > IntegerTileMaxZ)
|
|
{
|
|
// Guard against IntegerTileMinZ never getting updated
|
|
RWTileConeAxisAndCos[TileIndex] = float4(0, 0, 0, 1);
|
|
RWTileConeDepthRanges[TileIndex] = 0;
|
|
}
|
|
else
|
|
{
|
|
RWTileConeAxisAndCos[TileIndex] = float4(TileConeAxis, TileConeAngleCos);
|
|
RWTileConeDepthRanges[TileIndex] = ConeAxisDepthRanges;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
struct FObjectCullVertexOutput
|
|
{
|
|
nointerpolation float4 TranslatedPositionAndRadius : TEXCOORD0;
|
|
nointerpolation uint2 ObjectIndexInstanceIndex : TEXCOORD1;
|
|
};
|
|
|
|
float ConservativeRadiusScale;
|
|
|
|
/** Used when culling objects into screenspace tile lists */
|
|
void ObjectCullVS(
|
|
float4 InPosition : ATTRIBUTE0,
|
|
uint InstanceIndex : SV_InstanceID,
|
|
out FObjectCullVertexOutput Output,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
const uint ObjectIndex = CulledObjectIndices[InstanceIndex];
|
|
|
|
//@todo - implement ConservativelyBoundSphere
|
|
FDFObjectBounds ObjectBounds = LoadDFObjectBounds(ObjectIndex);
|
|
const float3 TranslatedCenter = LWCToFloat(LWCAdd(ObjectBounds.Center, PrimaryView.PreViewTranslation));
|
|
//@todo - expand to handle conservative rasterization
|
|
float EffectiveRadius = (ObjectBounds.SphereRadius + AOObjectMaxDistance) * ConservativeRadiusScale;
|
|
float3 TranslatedWorldPosition = InPosition.xyz * EffectiveRadius + TranslatedCenter;
|
|
OutPosition = mul(float4(TranslatedWorldPosition, 1), PrimaryView.TranslatedWorldToClip);
|
|
Output.TranslatedPositionAndRadius.xyz = TranslatedCenter;
|
|
Output.TranslatedPositionAndRadius.w = ObjectBounds.SphereRadius;
|
|
Output.ObjectIndexInstanceIndex = uint2(ObjectIndex, InstanceIndex);
|
|
}
|
|
|
|
/** Used for object <-> tile culling */
|
|
bool IntersectObjectWithConeDepthRange(
|
|
float3 TileConeVertex,
|
|
float3 TileConeAxis,
|
|
float TileConeAngleCos,
|
|
float TileConeAngleSin,
|
|
float2 ConeDepthRange,
|
|
float2 ConeAxisDistanceMinMax,
|
|
uint ObjectIndex)
|
|
{
|
|
BRANCH
|
|
if (ConeAxisDistanceMinMax.x > ConeDepthRange.x && ConeAxisDistanceMinMax.y < ConeDepthRange.y)
|
|
{
|
|
#define USE_DISTANCE_FIELD_FOR_OBJECT_CULLING 1
|
|
#if USE_DISTANCE_FIELD_FOR_OBJECT_CULLING
|
|
FDFObjectData DFObjectData = LoadDFObjectData(ObjectIndex);
|
|
float4x4 TranslatedWorldToVolume = LWCMultiplyTranslation(LWCNegate(PrimaryView.PreViewTranslation), DFObjectData.WorldToVolume);
|
|
|
|
// Use the position halfway between the depth ranges as the center for the bounding sphere of this tile depth range
|
|
float3 ViewTileBoundingSphereCenter = TileConeVertex + TileConeAxis * (.5f * (ConeDepthRange.x + ConeDepthRange.y));
|
|
float3 TranslatedWorldTileBoundingSphereCenter = mul(float4(ViewTileBoundingSphereCenter.xyz, 1), View.ViewToTranslatedWorld).xyz;
|
|
float DistanceAlongAxis = .5f * (ConeDepthRange.y - ConeDepthRange.x);
|
|
float FarDepthDistanceToEdgeOfCone = ConeDepthRange.y * TileConeAngleSin / TileConeAngleCos;
|
|
float TileBoundingSphereRadius = sqrt(DistanceAlongAxis * DistanceAlongAxis + FarDepthDistanceToEdgeOfCone * FarDepthDistanceToEdgeOfCone);
|
|
|
|
float3 VolumeTileBoundingSphereCenter = mul(float4(TranslatedWorldTileBoundingSphereCenter, 1), TranslatedWorldToVolume).xyz;
|
|
float BoxDistance = ComputeDistanceFromBoxToPoint(-DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent, VolumeTileBoundingSphereCenter) * DFObjectData.VolumeScale;
|
|
|
|
BRANCH
|
|
if (BoxDistance < TileBoundingSphereRadius + AOObjectMaxDistance)
|
|
{
|
|
float3 ClampedSamplePosition = clamp(VolumeTileBoundingSphereCenter, -DFObjectData.VolumePositionExtent, DFObjectData.VolumePositionExtent);
|
|
float DistanceToClamped = length(VolumeTileBoundingSphereCenter - ClampedSamplePosition);
|
|
float DistanceToOccluder = (DistanceToMeshSurfaceStandalone(ClampedSamplePosition, DFObjectData) + DistanceToClamped) * DFObjectData.VolumeScale;
|
|
|
|
BRANCH
|
|
if (DistanceToOccluder < TileBoundingSphereRadius + AOObjectMaxDistance)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
#else
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
StructuredBuffer<float4> TileConeAxisAndCos;
|
|
StructuredBuffer<float4> TileConeDepthRanges;
|
|
|
|
RWStructuredBuffer<uint> RWNumCulledTilesArray;
|
|
RWStructuredBuffer<uint> RWCulledTilesStartOffsetArray;
|
|
RWBuffer<uint> RWCulledTileDataArray;
|
|
|
|
/** Intersects a single object with the tile and adds to the intersection list if needed. */
|
|
void ObjectCullPS(
|
|
FObjectCullVertexOutput Input,
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
OutColor = 0;
|
|
|
|
uint2 TilePosition = (uint2)SVPos.xy;
|
|
uint TileIndex = TilePosition.y * NumGroups.x + TilePosition.x;
|
|
float4 ConeAxisAndCos = TileConeAxisAndCos[TileIndex];
|
|
float4 ConeAxisDepthRanges = TileConeDepthRanges[TileIndex];
|
|
float3 TileConeVertex = 0;
|
|
float3 TileConeAxis = ConeAxisAndCos.xyz;
|
|
float TileConeAngleCos = ConeAxisAndCos.w;
|
|
float TileConeAngleSin = sqrt(1 - TileConeAngleCos * TileConeAngleCos);
|
|
|
|
float3 TranslatedWorldSphereCenter = Input.TranslatedPositionAndRadius.xyz;
|
|
float SphereRadius = Input.TranslatedPositionAndRadius.w;
|
|
float3 ViewSpaceSphereCenter = mul(float4(TranslatedWorldSphereCenter, 1), View.TranslatedWorldToView).xyz;
|
|
|
|
// A value of 1 is conservative, but has a huge impact on performance
|
|
float RadiusScale = .5f;
|
|
|
|
float4 SphereCenterAndRadius = float4(ViewSpaceSphereCenter, SphereRadius + RadiusScale * AOObjectMaxDistance);
|
|
|
|
if (SphereIntersectCone(SphereCenterAndRadius, TileConeVertex, TileConeAxis, TileConeAngleCos, TileConeAngleSin))
|
|
{
|
|
float ConeAxisDistance = dot(SphereCenterAndRadius.xyz - TileConeVertex, TileConeAxis);
|
|
float2 ConeAxisDistanceMinMax = float2(ConeAxisDistance + SphereCenterAndRadius.w, ConeAxisDistance - SphereCenterAndRadius.w);
|
|
|
|
const uint ObjectIndex = Input.ObjectIndexInstanceIndex.x;
|
|
const uint InstanceIndex = Input.ObjectIndexInstanceIndex.y;
|
|
|
|
bool bTileIntersectsObject = IntersectObjectWithConeDepthRange(TileConeVertex, TileConeAxis, TileConeAngleCos, TileConeAngleSin, ConeAxisDepthRanges.xy, ConeAxisDistanceMinMax, ObjectIndex);
|
|
|
|
if (!bTileIntersectsObject)
|
|
{
|
|
bTileIntersectsObject = IntersectObjectWithConeDepthRange(TileConeVertex, TileConeAxis, TileConeAngleCos, TileConeAngleSin, ConeAxisDepthRanges.zw, ConeAxisDistanceMinMax, ObjectIndex);
|
|
}
|
|
|
|
if (bTileIntersectsObject)
|
|
{
|
|
#if SCATTER_CULLING_COUNT_PASS
|
|
InterlockedAdd(RWNumCulledTilesArray[InstanceIndex], 1);
|
|
#else
|
|
uint CulledTileIndex;
|
|
InterlockedAdd(RWNumCulledTilesArray[InstanceIndex], 1, CulledTileIndex);
|
|
|
|
uint CulledTileDataStart = CulledTilesStartOffsetArray[InstanceIndex];
|
|
|
|
RWCulledTileDataArray[(CulledTileDataStart + CulledTileIndex) * CULLED_TILE_DATA_STRIDE + 0] = TileIndex;
|
|
RWCulledTileDataArray[(CulledTileDataStart + CulledTileIndex) * CULLED_TILE_DATA_STRIDE + 1] = ObjectIndex;
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
RWBuffer<uint> RWObjectTilesIndirectArguments;
|
|
StructuredBuffer<uint> NumCulledTilesArray;
|
|
|
|
#ifndef COMPUTE_START_OFFSET_GROUP_SIZE
|
|
#define COMPUTE_START_OFFSET_GROUP_SIZE 1
|
|
#endif
|
|
|
|
[numthreads(COMPUTE_START_OFFSET_GROUP_SIZE, 1, 1)]
|
|
void ComputeCulledTilesStartOffsetCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ObjectIndex = DispatchThreadId.x;
|
|
uint NumCulledObjects = GetCulledNumObjects();
|
|
|
|
if (ObjectIndex < NumCulledObjects)
|
|
{
|
|
uint NumIntersectingTiles = NumCulledTilesArray[ObjectIndex];
|
|
uint NumConeTraceThreadGroups = (NumIntersectingTiles + CONE_TRACE_TILES_PER_THREADGROUP - 1) / CONE_TRACE_TILES_PER_THREADGROUP;
|
|
|
|
uint StartOffsetThreadGroups;
|
|
InterlockedAdd(RWObjectTilesIndirectArguments[0], NumConeTraceThreadGroups, StartOffsetThreadGroups);
|
|
uint StartOffset = StartOffsetThreadGroups * CONE_TRACE_TILES_PER_THREADGROUP;
|
|
RWCulledTilesStartOffsetArray[ObjectIndex] = StartOffset;
|
|
|
|
// Pad remaining entries with INVALID_TILE_INDEX so we can skip computing them in the cone tracing pass
|
|
for (uint PaddingTileIndex = NumIntersectingTiles; PaddingTileIndex < NumConeTraceThreadGroups * CONE_TRACE_TILES_PER_THREADGROUP; PaddingTileIndex++)
|
|
{
|
|
RWCulledTileDataArray[(StartOffset + PaddingTileIndex) * CULLED_TILE_DATA_STRIDE + 0] = INVALID_TILE_INDEX;
|
|
RWCulledTileDataArray[(StartOffset + PaddingTileIndex) * CULLED_TILE_DATA_STRIDE + 1] = ObjectIndex;
|
|
}
|
|
}
|
|
|
|
if (DispatchThreadId.x == 0)
|
|
{
|
|
RWObjectTilesIndirectArguments[1] = 1;
|
|
RWObjectTilesIndirectArguments[2] = 1;
|
|
}
|
|
} |