You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
302 lines
9.9 KiB
Plaintext
302 lines
9.9 KiB
Plaintext
// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
DistanceFieldAOShared.usf
|
|
=============================================================================*/
|
|
|
|
#ifndef SORTTHREADGROUP_SIZEX
|
|
#define SORTTHREADGROUP_SIZEX 1
|
|
#endif
|
|
|
|
#ifndef SORTTHREADGROUP_SIZEY
|
|
#define SORTTHREADGROUP_SIZEY 1
|
|
#endif
|
|
|
|
#ifndef CONE_TRACE_PHASE
|
|
#define CONE_TRACE_PHASE -1
|
|
#endif
|
|
|
|
// Must match C++
|
|
#define NUM_CONE_STEPS 10
|
|
|
|
// Must match C++
|
|
#define NUM_CONE_DIRECTIONS 9
|
|
|
|
// Must match C++
|
|
#define AO_DOWNSAMPLE_FACTOR 2
|
|
|
|
float AOObjectMaxDistance;
|
|
float AOStepScale;
|
|
float AOStepExponentScale;
|
|
float AOMaxViewDistance;
|
|
|
|
float AOGlobalMaxOcclusionDistance;
|
|
|
|
// Gives the max occlusion distance regardless of whether the global distance field is in use
|
|
float GetAOMaxDistance()
|
|
{
|
|
return max(AOObjectMaxDistance, AOGlobalMaxOcclusionDistance);
|
|
}
|
|
|
|
#define USE_DEPTH_RANGE_LISTS 1
|
|
|
|
#if USE_DEPTH_RANGE_LISTS
|
|
// One for near, one for far
|
|
#define NUM_CULLED_OBJECT_LISTS 2
|
|
#else
|
|
// Objects are culled into one of 3 lists based on depth from the tile
|
|
#define NUM_CULLED_OBJECT_LISTS 3
|
|
#endif
|
|
|
|
void GetPhaseParameters(uint Phase, out uint StartIndex, out uint EndIndex)
|
|
{
|
|
if (Phase == 0)
|
|
{
|
|
StartIndex = 0;
|
|
EndIndex = 4;
|
|
}
|
|
else if (Phase == 1)
|
|
{
|
|
StartIndex = 5;
|
|
EndIndex = 7;
|
|
}
|
|
else
|
|
{
|
|
StartIndex = 8;
|
|
EndIndex = 9;
|
|
}
|
|
}
|
|
|
|
float GetStepOffset(float StepIndex)
|
|
{
|
|
return AOStepScale * exp2(AOStepExponentScale * StepIndex);
|
|
}
|
|
|
|
Buffer<uint> TileHeadDataUnpacked;
|
|
Buffer<uint> TileArrayData;
|
|
uint2 TileListGroupSize;
|
|
|
|
uint4 GetTileHead(uint2 TileCoordinate)
|
|
{
|
|
uint TileIndex = TileCoordinate.y * TileListGroupSize.x + TileCoordinate.x;
|
|
|
|
return uint4(
|
|
TileHeadDataUnpacked[TileIndex * 4 + 0],
|
|
min(TileHeadDataUnpacked[TileIndex * 4 + 1], (uint)MAX_OBJECTS_PER_TILE),
|
|
min(TileHeadDataUnpacked[TileIndex * 4 + 2], (uint)MAX_OBJECTS_PER_TILE),
|
|
min(TileHeadDataUnpacked[TileIndex * 4 + 3], (uint)MAX_OBJECTS_PER_TILE));
|
|
}
|
|
|
|
Texture2D DistanceFieldNormalTexture;
|
|
SamplerState DistanceFieldNormalSampler;
|
|
|
|
float4 EncodeDownsampledGBuffer(FGBufferData GBufferData, float SceneDepth)
|
|
{
|
|
float DepthSign = GBufferData.HasDistanceFieldRepresentation > 0 ? 1 : -1;
|
|
float ZSign = GBufferData.HasHeightfieldRepresentation > 0 ? 1 : -1;
|
|
float ZChannel = GBufferData.WorldNormal.z > 0 ? .5f : 1;
|
|
float4 OutValue = float4(GBufferData.WorldNormal.xy, ZChannel * ZSign, DepthSign * SceneDepth);
|
|
return OutValue;
|
|
}
|
|
|
|
void GetDownsampledGBuffer(float2 ScreenUV, out float3 OutNormal, out float OutDepth, out bool bHasDistanceFieldRepresentation, out bool bHasHeightfieldRepresentation)
|
|
{
|
|
float4 TextureValue = Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, ScreenUV, 0);
|
|
bHasDistanceFieldRepresentation = TextureValue.w > 0;
|
|
bHasHeightfieldRepresentation = TextureValue.z > 0;
|
|
OutDepth = abs(TextureValue.w);
|
|
OutNormal = float3(TextureValue.xy, sqrt(1 - dot(TextureValue.xy, TextureValue.xy)));
|
|
OutNormal.z *= abs(TextureValue.z) > .75f ? -1 : 1;
|
|
}
|
|
|
|
float GetDownsampledDepth(float2 ScreenUV)
|
|
{
|
|
return abs(Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, ScreenUV, 0).w);
|
|
}
|
|
|
|
uint CurrentLevelDownsampleFactor;
|
|
float2 AOBufferSize;
|
|
|
|
uint DownsampleFactorToBaseLevel;
|
|
float2 BaseLevelTexelSize;
|
|
|
|
Texture2D BentNormalAOTexture;
|
|
SamplerState BentNormalAOSampler;
|
|
|
|
Texture2D IrradianceTexture;
|
|
SamplerState IrradianceSampler;
|
|
|
|
bool SphereIntersectCone(float4 SphereCenterAndRadius, float3 ConeVertex, float3 ConeAxis, float ConeAngleCos, float ConeAngleSin)
|
|
{
|
|
float3 U = ConeVertex - (SphereCenterAndRadius.w / ConeAngleSin) * ConeAxis;
|
|
float3 D = SphereCenterAndRadius.xyz - U;
|
|
float DSizeSq = dot(D, D);
|
|
float E = dot(ConeAxis, D);
|
|
|
|
if (E > 0 && E * E >= DSizeSq * ConeAngleCos * ConeAngleCos)
|
|
{
|
|
D = SphereCenterAndRadius.xyz - ConeVertex;
|
|
DSizeSq = dot(D, D);
|
|
E = -dot(ConeAxis, D);
|
|
|
|
if (E > 0 && E * E >= DSizeSq * ConeAngleSin * ConeAngleSin)
|
|
{
|
|
return DSizeSq <= SphereCenterAndRadius.w * SphereCenterAndRadius.w;
|
|
}
|
|
else
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool SphereIntersectConeWithDepthRanges(float4 SphereCenterAndRadius, float3 ConeVertex, float3 ConeAxis, float ConeAngleCos, float ConeAngleSin, float4 ConeAxisDepthRanges)
|
|
{
|
|
if (SphereIntersectCone(SphereCenterAndRadius, ConeVertex, ConeAxis, ConeAngleCos, ConeAngleSin))
|
|
{
|
|
float ConeAxisDistance = dot(SphereCenterAndRadius.xyz - ConeVertex, ConeAxis);
|
|
float2 ConeAxisDistanceMinMax = float2(ConeAxisDistance + SphereCenterAndRadius.w, ConeAxisDistance - SphereCenterAndRadius.w);
|
|
|
|
if (ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.x && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.y
|
|
|| ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.z && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.w)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
Buffer<float> RecordConeVisibility;
|
|
float BentNormalNormalizeFactor;
|
|
|
|
float3 ComputeBentNormal(float3 RecordWorldNormal, uint RelativeRecordIndex)
|
|
{
|
|
float3 TangentX;
|
|
float3 TangentY;
|
|
FindBestAxisVectors2(RecordWorldNormal, TangentX, TangentY);
|
|
|
|
float3 UnoccludedDirection = 0;
|
|
|
|
for (uint ConeIndex = 0; ConeIndex < NUM_CONE_DIRECTIONS; ConeIndex++)
|
|
{
|
|
float3 ConeDirection = AOSamples2.SampleDirections[ConeIndex].xyz;
|
|
float3 RotatedConeDirection = ConeDirection.x * TangentX + ConeDirection.y * TangentY + ConeDirection.z * RecordWorldNormal;
|
|
|
|
float ConeVisibility = RecordConeVisibility[RelativeRecordIndex * NUM_CONE_DIRECTIONS + ConeIndex];
|
|
UnoccludedDirection += ConeVisibility * RotatedConeDirection;
|
|
}
|
|
|
|
float InvNumSamples = 1.0f / (float)NUM_CONE_DIRECTIONS;
|
|
UnoccludedDirection = UnoccludedDirection * (BentNormalNormalizeFactor * InvNumSamples);
|
|
|
|
return UnoccludedDirection;
|
|
}
|
|
|
|
#ifndef CULLED_TILE_SIZEX
|
|
#define CULLED_TILE_SIZEX 4
|
|
#endif
|
|
|
|
#ifndef TRACE_DOWNSAMPLE_FACTOR
|
|
#define TRACE_DOWNSAMPLE_FACTOR 1
|
|
#endif
|
|
|
|
uint2 ScreenGridConeVisibilitySize;
|
|
float2 JitterOffset;
|
|
|
|
uint2 ComputeTileCoordinateFromScreenGrid(uint2 OutputCoordinate)
|
|
{
|
|
uint2 TileCoordinate = OutputCoordinate * TRACE_DOWNSAMPLE_FACTOR / CULLED_TILE_SIZEX;
|
|
return TileCoordinate;
|
|
}
|
|
|
|
float2 GetBaseLevelScreenUVFromScreenGrid(uint2 OutputCoordinate)
|
|
{
|
|
float2 BaseLevelScreenUV = (OutputCoordinate * TRACE_DOWNSAMPLE_FACTOR + JitterOffset + float2(.5f, .5f)) * BaseLevelTexelSize;
|
|
return BaseLevelScreenUV;
|
|
}
|
|
|
|
float2 GetScreenUVFromScreenGrid(uint2 OutputCoordinate)
|
|
{
|
|
float2 ScreenUV = ((OutputCoordinate * TRACE_DOWNSAMPLE_FACTOR + JitterOffset) * AO_DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + float2(.5f, .5f)) * View.BufferSizeAndInvSize.zw;
|
|
return ScreenUV;
|
|
}
|
|
|
|
RWBuffer<uint> RWTileHeadDataUnpacked;
|
|
RWBuffer<uint> RWTileArrayData;
|
|
|
|
/** Used for object <-> tile culling */
|
|
void IntersectObjectWithConeDepthRange(
|
|
float3 TileConeVertex,
|
|
float3 TileConeAxis,
|
|
float TileConeAngleCos,
|
|
float TileConeAngleSin,
|
|
float2 ConeDepthRange,
|
|
float2 ConeAxisDistanceMinMax,
|
|
uint TileIndex,
|
|
uint ListIndex,
|
|
uint ObjectIndex,
|
|
uint TotalNumGroups)
|
|
{
|
|
BRANCH
|
|
if (ConeAxisDistanceMinMax.x > ConeDepthRange.x && ConeAxisDistanceMinMax.y < ConeDepthRange.y)
|
|
{
|
|
#define USE_DISTANCE_FIELD_FOR_OBJECT_CULLING 1
|
|
#if USE_DISTANCE_FIELD_FOR_OBJECT_CULLING
|
|
// Use the position halfway between the depth ranges as the center for the bounding sphere of this tile depth range
|
|
float3 ViewTileBoundingSphereCenter = TileConeVertex + TileConeAxis * (.5f * (ConeDepthRange.x + ConeDepthRange.y));
|
|
float3 WorldTileBoundingSphereCenter = mul(float4(ViewTileBoundingSphereCenter.xyz, 1), View.ViewToTranslatedWorld).xyz - View.PreViewTranslation.xyz;
|
|
float DistanceAlongAxis = .5f * (ConeDepthRange.x - ConeDepthRange.y);
|
|
float FarDepthDistanceToEdgeOfCone = ConeDepthRange.y * TileConeAngleSin / TileConeAngleCos;
|
|
float TileBoundingSphereRadius = sqrt(DistanceAlongAxis * DistanceAlongAxis + FarDepthDistanceToEdgeOfCone * FarDepthDistanceToEdgeOfCone);
|
|
|
|
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
|
|
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
|
|
bool bGeneratedAsTwoSided;
|
|
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex, bGeneratedAsTwoSided);
|
|
float3 VolumeTileBoundingSphereCenter = mul(float4(WorldTileBoundingSphereCenter, 1), WorldToVolume).xyz;
|
|
float BoxDistance = ComputeDistanceFromBoxToPoint(-LocalPositionExtent, LocalPositionExtent, VolumeTileBoundingSphereCenter) * UVScaleAndVolumeScale.w;
|
|
|
|
BRANCH
|
|
if (BoxDistance < TileBoundingSphereRadius + AOObjectMaxDistance)
|
|
{
|
|
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
|
|
|
|
float3 ClampedSamplePosition = clamp(VolumeTileBoundingSphereCenter, -LocalPositionExtent, LocalPositionExtent);
|
|
float DistanceToClamped = length(VolumeTileBoundingSphereCenter - ClampedSamplePosition);
|
|
|
|
float3 StepVolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
|
|
float DistanceToOccluder = (Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, StepVolumeUV, 0).x + DistanceToClamped) * UVScaleAndVolumeScale.w;
|
|
|
|
BRANCH
|
|
if (DistanceToOccluder < TileBoundingSphereRadius + AOObjectMaxDistance)
|
|
{
|
|
uint ArrayIndex;
|
|
InterlockedAdd(RWTileHeadDataUnpacked[TileIndex * 4 + 1 + ListIndex], 1U, ArrayIndex);
|
|
|
|
if (ArrayIndex < MAX_OBJECTS_PER_TILE)
|
|
{
|
|
// Note: indexing so that threads are writing to RWTileArrayData coherently, has a huge impact on speed, even though the array data for one record is no longer dense
|
|
uint DataIndex = (ArrayIndex * TotalNumGroups + TileIndex) * NUM_CULLED_OBJECT_LISTS + ListIndex;
|
|
|
|
RWTileArrayData[DataIndex] = ObjectIndex;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
uint ListIndex = 0;
|
|
uint ArrayIndex;
|
|
InterlockedAdd(RWTileHeadDataUnpacked[TileIndex * 4 + 1 + ListIndex], 1U, ArrayIndex);
|
|
|
|
if (ArrayIndex < MAX_OBJECTS_PER_TILE)
|
|
{
|
|
// Note: indexing so that threads are writing to RWTileArrayData coherently, has a huge impact on speed, even though the array data for one record is no longer dense
|
|
uint DataIndex = (ArrayIndex * TotalNumGroups + TileIndex) * NUM_CULLED_OBJECT_LISTS + ListIndex;
|
|
|
|
RWTileArrayData[DataIndex] = ObjectIndex;
|
|
}
|
|
#endif
|
|
}
|
|
} |