Files
UnrealEngineUWP/Engine/Shaders/GlobalDistanceField.usf
Mark Satterthwaite 977542aeaa Merging general rendering & shader changes necessary to support desktop Metal.
reviewedby michael.trepka, rolando.caloca, lee.clark

[CL 2670859 by Mark Satterthwaite in Main branch]
2015-08-27 10:11:22 -04:00

299 lines
10 KiB
Plaintext

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
GlobalDistanceField.usf
=============================================================================*/
#include "Common.usf"
#include "DeferredShadingCommon.usf"
#include "DistanceFieldLightingShared.usf"
#include "DistanceFieldAOShared.usf"
#include "GlobalDistanceFieldShared.usf"
RWBuffer<float4> RWCulledObjectBounds;
RWBuffer<float4> RWCulledObjectData;
#ifndef CULLOBJECTS_THREADGROUP_SIZE
#define CULLOBJECTS_THREADGROUP_SIZE 1
#endif
float AOGlobalMaxSphereQueryRadius;
groupshared uint NumGroupObjects;
groupshared uint GroupBaseIndex;
groupshared uint GroupObjectIndices[CULLOBJECTS_THREADGROUP_SIZE];
float4 FetchObjectDataFloat4(uint SourceIndex)
{
return float4(ObjectData[4 * SourceIndex + 0], ObjectData[4 * SourceIndex + 1], ObjectData[4 * SourceIndex + 2], ObjectData[4 * SourceIndex + 3]);
}
void CopyCulledObjectData(uint DestIndex, uint SourceIndex)
{
RWCulledObjectBounds[DestIndex] = float4(ObjectBounds[4 * SourceIndex + 0], ObjectBounds[4 * SourceIndex + 1], ObjectBounds[4 * SourceIndex + 2], ObjectBounds[4 * SourceIndex + 3]);
UNROLL
for (uint VectorIndex = 0; VectorIndex < CULLED_OBJECT_DATA_STRIDE; VectorIndex++)
{
float4 Data = FetchObjectDataFloat4(SourceIndex * OBJECT_DATA_STRIDE + VectorIndex);
// Note: only copying the first CULLED_OBJECT_DATA_STRIDE of the original object data
RWCulledObjectData[DestIndex * CULLED_OBJECT_DATA_STRIDE + VectorIndex] = Data;
}
}
float4 VolumeBounds;
[numthreads(CULLOBJECTS_THREADGROUP_SIZE, 1, 1)]
void CullObjectsForVolumeCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ObjectIndex = DispatchThreadId.x;
#define USE_FRUSTUM_CULLING 1
#if USE_FRUSTUM_CULLING
if (GroupThreadId.x == 0)
{
NumGroupObjects = 0;
}
GroupMemoryBarrierWithGroupSync();
if (ObjectIndex < NumSceneObjects)
{
uint SourceIndex = ObjectIndex;
float4 ObjectBoundingSphere = float4(ObjectBounds[4 * SourceIndex + 0], ObjectBounds[4 * SourceIndex + 1], ObjectBounds[4 * SourceIndex + 2], ObjectBounds[4 * SourceIndex + 3]);
float Distance = length(VolumeBounds.xyz - ObjectBoundingSphere.xyz);
if (Distance < VolumeBounds.w + ObjectBoundingSphere.w + AOGlobalMaxSphereQueryRadius)
{
uint DestIndex;
InterlockedAdd(NumGroupObjects, 1U, DestIndex);
GroupObjectIndices[DestIndex] = SourceIndex;
}
}
GroupMemoryBarrierWithGroupSync();
if (GroupThreadId.x == 0)
{
InterlockedAdd(RWObjectIndirectArguments[1], NumGroupObjects, GroupBaseIndex);
}
GroupMemoryBarrierWithGroupSync();
if (GroupThreadId.x < NumGroupObjects)
{
uint SourceIndex = GroupObjectIndices[GroupThreadId.x];
uint DestIndex = GroupBaseIndex + GroupThreadId.x;
CopyCulledObjectData(DestIndex, SourceIndex);
}
#else
if (DispatchThreadId.x == 0)
{
// IndexCount, NumInstances, StartIndex, BaseVertexIndex, FirstInstance
RWObjectIndirectArguments[1] = NumSceneObjects;
}
GroupMemoryBarrierWithGroupSync();
if (ObjectIndex < NumSceneObjects)
{
uint SourceIndex = ObjectIndex;
uint DestIndex = ObjectIndex;
CopyCulledObjectData(DestIndex, SourceIndex);
}
#endif
}
uint ClipmapIndex;
float3 VolumeTexelSize;
float3 UpdateRegionVolumeMin;
uint3 CullGridDimension;
RWBuffer<uint> RWCulledObjectGrid;
#ifndef CULL_GRID_TILE_SIZE
#define CULL_GRID_TILE_SIZE 1
#endif
#ifndef MAX_GRID_CULLED_DF_OBJECTS
#define MAX_GRID_CULLED_DF_OBJECTS 0
#endif
#define GRID_CULL_THREADGORUP_TOTALSIZE 256
uint ComputeGridBaseIndex(uint3 GridCoordinate)
{
return ((GridCoordinate.z * CullGridDimension.y + GridCoordinate.y) * CullGridDimension.x + GridCoordinate.x) * (MAX_GRID_CULLED_DF_OBJECTS + 1);
}
[numthreads(GRID_CULL_THREADGORUP_TOTALSIZE, 1, 1)]
void CullObjectsToGridCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.x;
uint GridBaseIndex = ComputeGridBaseIndex(GroupId);
if (ThreadIndex == 0 && all(GroupId < CullGridDimension))
{
// Clear array num to 0 for all cull grid cells
RWCulledObjectGrid[GridBaseIndex] = 0;
}
GroupMemoryBarrierWithGroupSync();
if (all(GroupId < CullGridDimension))
{
uint NumCulledObjects = GetCulledNumObjects();
float GlobalVolumeExtent = GlobalVolumeCenterAndExtent[ClipmapIndex].w;
float3 TileBoxExtent = .5f * CULL_GRID_TILE_SIZE * VolumeTexelSize * GlobalVolumeExtent * 2;
float3 TileBoxCenter = UpdateRegionVolumeMin + ((GroupId.xyz + .5f) * CULL_GRID_TILE_SIZE + .5f) * VolumeTexelSize * GlobalVolumeExtent * 2;
for (uint ObjectIndex = ThreadIndex; ObjectIndex < NumCulledObjects; ObjectIndex += GRID_CULL_THREADGORUP_TOTALSIZE)
{
float4 ObjectPositionAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
//float BoxDistance = ComputeDistanceFromBoxToPoint(TileBoxCenter - TileBoxExtent, TileBoxCenter + TileBoxExtent, ObjectPositionAndRadius.xyz);
float BoxDistanceSq = ComputeSquaredDistanceFromBoxToPoint(TileBoxCenter, TileBoxExtent, ObjectPositionAndRadius.xyz);
float CombinedDistance = ObjectPositionAndRadius.w + AOGlobalMaxSphereQueryRadius;
if (BoxDistanceSq < CombinedDistance * CombinedDistance)
{
uint DestIndex;
// Allocate space in the array for one more object
InterlockedAdd(RWCulledObjectGrid[GridBaseIndex], 1U, DestIndex);
if (DestIndex < MAX_GRID_CULLED_DF_OBJECTS)
{
// Write the intersecting object index into the array
RWCulledObjectGrid[GridBaseIndex + 1 + DestIndex] = ObjectIndex;
}
}
}
}
}
uint3 UpdateRegionSize;
Buffer<uint> CulledObjectGrid;
RWTexture3D<float> RWGlobalDistanceFieldTexture;
#ifndef COMPOSITE_THREADGROUP_SIZE
#define COMPOSITE_THREADGROUP_SIZE 1
#endif
#define COMPOSITE_THREADGORUP_TOTALSIZE (COMPOSITE_THREADGROUP_SIZE * COMPOSITE_THREADGROUP_SIZE * COMPOSITE_THREADGROUP_SIZE)
#define MAX_CULLED_DF_OBJECTS 512
groupshared uint SharedCulledObjectList[MAX_CULLED_DF_OBJECTS];
groupshared uint NumTileCulledObjects;
#define USE_CULL_GRID 1
[numthreads(COMPOSITE_THREADGROUP_SIZE, COMPOSITE_THREADGROUP_SIZE, COMPOSITE_THREADGROUP_SIZE)]
void CompositeObjectDistanceFieldsCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint NumCulledObjects = GetCulledNumObjects();
float GlobalVolumeExtent = GlobalVolumeCenterAndExtent[ClipmapIndex].w;
#define USE_OBJECT_COMPOSITING_TILE_CULLING 1
#if USE_OBJECT_COMPOSITING_TILE_CULLING
if (all(GroupThreadId == 0))
{
NumTileCulledObjects = 0;
}
GroupMemoryBarrierWithGroupSync();
uint ThreadIndex = (GroupThreadId.z * COMPOSITE_THREADGROUP_SIZE + GroupThreadId.y) * COMPOSITE_THREADGROUP_SIZE + GroupThreadId.x;
float3 TileBoxExtent = .5f * COMPOSITE_THREADGROUP_SIZE * VolumeTexelSize * GlobalVolumeExtent * 2;
float3 TileBoxCenter = UpdateRegionVolumeMin + ((GroupId.xyz + .5f) * COMPOSITE_THREADGROUP_SIZE + .5f) * VolumeTexelSize * GlobalVolumeExtent * 2;
#if USE_CULL_GRID
uint GridBaseIndex = ComputeGridBaseIndex(DispatchThreadId / CULL_GRID_TILE_SIZE);
uint NumGridCulledObjects = min(CulledObjectGrid[GridBaseIndex], (uint)MAX_GRID_CULLED_DF_OBJECTS);
NumCulledObjects = NumGridCulledObjects;
#endif
for (uint ListObjectIndex = ThreadIndex; ListObjectIndex < NumCulledObjects; ListObjectIndex += COMPOSITE_THREADGORUP_TOTALSIZE)
{
#if USE_CULL_GRID
uint ObjectIndex = CulledObjectGrid[GridBaseIndex + 1 + ListObjectIndex];
#else
uint ObjectIndex = ListObjectIndex;
#endif
float4 ObjectPositionAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
//float BoxDistance = ComputeDistanceFromBoxToPoint(TileBoxCenter - TileBoxExtent, TileBoxCenter + TileBoxExtent, ObjectPositionAndRadius.xyz);
// ComputeSquaredDistanceFromBoxToPoint is correct but adds a lot to the final cost
float BoxDistanceSq = ComputeSquaredDistanceFromBoxToPoint(TileBoxCenter, TileBoxExtent, ObjectPositionAndRadius.xyz);
float CombinedDistance = ObjectPositionAndRadius.w + AOGlobalMaxSphereQueryRadius;
if (BoxDistanceSq < CombinedDistance * CombinedDistance)
{
uint DestIndex;
InterlockedAdd(NumTileCulledObjects, 1U, DestIndex);
SharedCulledObjectList[DestIndex] = ObjectIndex;
}
}
GroupMemoryBarrierWithGroupSync();
NumCulledObjects = min(NumTileCulledObjects, (uint)MAX_CULLED_DF_OBJECTS);
#endif
float3 WorldPosition = UpdateRegionVolumeMin + (DispatchThreadId.xyz + .5f) * VolumeTexelSize * GlobalVolumeExtent * 2;
float MinDistance = AOGlobalMaxSphereQueryRadius;
LOOP
for (uint ListObjectIndex = 0; ListObjectIndex < NumCulledObjects; ListObjectIndex++)
{
#if USE_OBJECT_COMPOSITING_TILE_CULLING
uint ObjectIndex = SharedCulledObjectList[ListObjectIndex];
#else
uint ObjectIndex = ListObjectIndex;
#endif
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
bool bGeneratedAsTwoSided;
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex, bGeneratedAsTwoSided);
float3 VolumePosition = mul(float4(WorldPosition, 1), WorldToVolume).xyz;
float BoxDistance = ComputeDistanceFromBoxToPoint(-LocalPositionExtent, LocalPositionExtent, VolumePosition) * UVScaleAndVolumeScale.w;
BRANCH
if (BoxDistance < AOGlobalMaxSphereQueryRadius)
{
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
float3 ClampedSamplePosition = clamp(VolumePosition, -LocalPositionExtent, LocalPositionExtent);
float DistanceToClamped = length(VolumePosition - ClampedSamplePosition);
float3 StepVolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
float DistanceToOccluder = (Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, StepVolumeUV, 0).x + DistanceToClamped) * UVScaleAndVolumeScale.w;
MinDistance = min(MinDistance, DistanceToOccluder);
}
}
if (all(DispatchThreadId.xyz < UpdateRegionSize))
{
float3 GlobalUV = ComputeGlobalUV(WorldPosition, ClipmapIndex);
uint3 WriteCoordinate = (uint3)(frac(GlobalUV) * GlobalVolumeDimension);
RWGlobalDistanceFieldTexture[WriteCoordinate] = MinDistance;
}
}