Files
UnrealEngineUWP/Engine/Shaders/DistanceFieldAOShared.usf
Guillaume Abadie bf8c47df64 Exposes the view property's inverse and the TanHalfFieldOfView view property in the material editor
#code_review: Martin.Mittring

[CL 2597659 by Guillaume Abadie in Main branch]
2015-06-23 15:19:21 -04:00

302 lines
9.9 KiB
Plaintext

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
DistanceFieldAOShared.usf
=============================================================================*/
#ifndef SORTTHREADGROUP_SIZEX
#define SORTTHREADGROUP_SIZEX 1
#endif
#ifndef SORTTHREADGROUP_SIZEY
#define SORTTHREADGROUP_SIZEY 1
#endif
#ifndef CONE_TRACE_PHASE
#define CONE_TRACE_PHASE -1
#endif
// Must match C++
#define NUM_CONE_STEPS 10
// Must match C++
#define NUM_CONE_DIRECTIONS 9
// Must match C++
#define AO_DOWNSAMPLE_FACTOR 2
float AOObjectMaxDistance;
float AOStepScale;
float AOStepExponentScale;
float AOMaxViewDistance;
float AOGlobalMaxOcclusionDistance;
// Gives the max occlusion distance regardless of whether the global distance field is in use
float GetAOMaxDistance()
{
return max(AOObjectMaxDistance, AOGlobalMaxOcclusionDistance);
}
#define USE_DEPTH_RANGE_LISTS 1
#if USE_DEPTH_RANGE_LISTS
// One for near, one for far
#define NUM_CULLED_OBJECT_LISTS 2
#else
// Objects are culled into one of 3 lists based on depth from the tile
#define NUM_CULLED_OBJECT_LISTS 3
#endif
void GetPhaseParameters(uint Phase, out uint StartIndex, out uint EndIndex)
{
if (Phase == 0)
{
StartIndex = 0;
EndIndex = 4;
}
else if (Phase == 1)
{
StartIndex = 5;
EndIndex = 7;
}
else
{
StartIndex = 8;
EndIndex = 9;
}
}
float GetStepOffset(float StepIndex)
{
return AOStepScale * exp2(AOStepExponentScale * StepIndex);
}
Buffer<uint> TileHeadDataUnpacked;
Buffer<uint> TileArrayData;
uint2 TileListGroupSize;
uint4 GetTileHead(uint2 TileCoordinate)
{
uint TileIndex = TileCoordinate.y * TileListGroupSize.x + TileCoordinate.x;
return uint4(
TileHeadDataUnpacked[TileIndex * 4 + 0],
min(TileHeadDataUnpacked[TileIndex * 4 + 1], (uint)MAX_OBJECTS_PER_TILE),
min(TileHeadDataUnpacked[TileIndex * 4 + 2], (uint)MAX_OBJECTS_PER_TILE),
min(TileHeadDataUnpacked[TileIndex * 4 + 3], (uint)MAX_OBJECTS_PER_TILE));
}
Texture2D DistanceFieldNormalTexture;
SamplerState DistanceFieldNormalSampler;
float4 EncodeDownsampledGBuffer(FGBufferData GBufferData, float SceneDepth)
{
float DepthSign = GBufferData.HasDistanceFieldRepresentation > 0 ? 1 : -1;
float ZSign = GBufferData.HasHeightfieldRepresentation > 0 ? 1 : -1;
float ZChannel = GBufferData.WorldNormal.z > 0 ? .5f : 1;
float4 OutValue = float4(GBufferData.WorldNormal.xy, ZChannel * ZSign, DepthSign * SceneDepth);
return OutValue;
}
void GetDownsampledGBuffer(float2 ScreenUV, out float3 OutNormal, out float OutDepth, out bool bHasDistanceFieldRepresentation, out bool bHasHeightfieldRepresentation)
{
float4 TextureValue = Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, ScreenUV, 0);
bHasDistanceFieldRepresentation = TextureValue.w > 0;
bHasHeightfieldRepresentation = TextureValue.z > 0;
OutDepth = abs(TextureValue.w);
OutNormal = float3(TextureValue.xy, sqrt(1 - dot(TextureValue.xy, TextureValue.xy)));
OutNormal.z *= abs(TextureValue.z) > .75f ? -1 : 1;
}
float GetDownsampledDepth(float2 ScreenUV)
{
return abs(Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, ScreenUV, 0).w);
}
uint CurrentLevelDownsampleFactor;
float2 AOBufferSize;
uint DownsampleFactorToBaseLevel;
float2 BaseLevelTexelSize;
Texture2D BentNormalAOTexture;
SamplerState BentNormalAOSampler;
Texture2D IrradianceTexture;
SamplerState IrradianceSampler;
bool SphereIntersectCone(float4 SphereCenterAndRadius, float3 ConeVertex, float3 ConeAxis, float ConeAngleCos, float ConeAngleSin)
{
float3 U = ConeVertex - (SphereCenterAndRadius.w / ConeAngleSin) * ConeAxis;
float3 D = SphereCenterAndRadius.xyz - U;
float DSizeSq = dot(D, D);
float E = dot(ConeAxis, D);
if (E > 0 && E * E >= DSizeSq * ConeAngleCos * ConeAngleCos)
{
D = SphereCenterAndRadius.xyz - ConeVertex;
DSizeSq = dot(D, D);
E = -dot(ConeAxis, D);
if (E > 0 && E * E >= DSizeSq * ConeAngleSin * ConeAngleSin)
{
return DSizeSq <= SphereCenterAndRadius.w * SphereCenterAndRadius.w;
}
else
{
return true;
}
}
return false;
}
bool SphereIntersectConeWithDepthRanges(float4 SphereCenterAndRadius, float3 ConeVertex, float3 ConeAxis, float ConeAngleCos, float ConeAngleSin, float4 ConeAxisDepthRanges)
{
if (SphereIntersectCone(SphereCenterAndRadius, ConeVertex, ConeAxis, ConeAngleCos, ConeAngleSin))
{
float ConeAxisDistance = dot(SphereCenterAndRadius.xyz - ConeVertex, ConeAxis);
float2 ConeAxisDistanceMinMax = float2(ConeAxisDistance + SphereCenterAndRadius.w, ConeAxisDistance - SphereCenterAndRadius.w);
if (ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.x && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.y
|| ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.z && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.w)
{
return true;
}
}
return false;
}
Buffer<float> RecordConeVisibility;
float BentNormalNormalizeFactor;
float3 ComputeBentNormal(float3 RecordWorldNormal, uint RelativeRecordIndex)
{
float3 TangentX;
float3 TangentY;
FindBestAxisVectors2(RecordWorldNormal, TangentX, TangentY);
float3 UnoccludedDirection = 0;
for (uint ConeIndex = 0; ConeIndex < NUM_CONE_DIRECTIONS; ConeIndex++)
{
float3 ConeDirection = AOSamples2.SampleDirections[ConeIndex].xyz;
float3 RotatedConeDirection = ConeDirection.x * TangentX + ConeDirection.y * TangentY + ConeDirection.z * RecordWorldNormal;
float ConeVisibility = RecordConeVisibility[RelativeRecordIndex * NUM_CONE_DIRECTIONS + ConeIndex];
UnoccludedDirection += ConeVisibility * RotatedConeDirection;
}
float InvNumSamples = 1.0f / (float)NUM_CONE_DIRECTIONS;
UnoccludedDirection = UnoccludedDirection * (BentNormalNormalizeFactor * InvNumSamples);
return UnoccludedDirection;
}
#ifndef CULLED_TILE_SIZEX
#define CULLED_TILE_SIZEX 4
#endif
#ifndef TRACE_DOWNSAMPLE_FACTOR
#define TRACE_DOWNSAMPLE_FACTOR 1
#endif
uint2 ScreenGridConeVisibilitySize;
float2 JitterOffset;
uint2 ComputeTileCoordinateFromScreenGrid(uint2 OutputCoordinate)
{
uint2 TileCoordinate = OutputCoordinate * TRACE_DOWNSAMPLE_FACTOR / CULLED_TILE_SIZEX;
return TileCoordinate;
}
float2 GetBaseLevelScreenUVFromScreenGrid(uint2 OutputCoordinate)
{
float2 BaseLevelScreenUV = (OutputCoordinate * TRACE_DOWNSAMPLE_FACTOR + JitterOffset + float2(.5f, .5f)) * BaseLevelTexelSize;
return BaseLevelScreenUV;
}
float2 GetScreenUVFromScreenGrid(uint2 OutputCoordinate)
{
float2 ScreenUV = ((OutputCoordinate * TRACE_DOWNSAMPLE_FACTOR + JitterOffset) * AO_DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + float2(.5f, .5f)) * View.BufferSizeAndInvSize.zw;
return ScreenUV;
}
RWBuffer<uint> RWTileHeadDataUnpacked;
RWBuffer<uint> RWTileArrayData;
/** Used for object <-> tile culling */
void IntersectObjectWithConeDepthRange(
float3 TileConeVertex,
float3 TileConeAxis,
float TileConeAngleCos,
float TileConeAngleSin,
float2 ConeDepthRange,
float2 ConeAxisDistanceMinMax,
uint TileIndex,
uint ListIndex,
uint ObjectIndex,
uint TotalNumGroups)
{
BRANCH
if (ConeAxisDistanceMinMax.x > ConeDepthRange.x && ConeAxisDistanceMinMax.y < ConeDepthRange.y)
{
#define USE_DISTANCE_FIELD_FOR_OBJECT_CULLING 1
#if USE_DISTANCE_FIELD_FOR_OBJECT_CULLING
// Use the position halfway between the depth ranges as the center for the bounding sphere of this tile depth range
float3 ViewTileBoundingSphereCenter = TileConeVertex + TileConeAxis * (.5f * (ConeDepthRange.x + ConeDepthRange.y));
float3 WorldTileBoundingSphereCenter = mul(float4(ViewTileBoundingSphereCenter.xyz, 1), View.ViewToTranslatedWorld).xyz - View.PreViewTranslation.xyz;
float DistanceAlongAxis = .5f * (ConeDepthRange.x - ConeDepthRange.y);
float FarDepthDistanceToEdgeOfCone = ConeDepthRange.y * TileConeAngleSin / TileConeAngleCos;
float TileBoundingSphereRadius = sqrt(DistanceAlongAxis * DistanceAlongAxis + FarDepthDistanceToEdgeOfCone * FarDepthDistanceToEdgeOfCone);
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
bool bGeneratedAsTwoSided;
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex, bGeneratedAsTwoSided);
float3 VolumeTileBoundingSphereCenter = mul(float4(WorldTileBoundingSphereCenter, 1), WorldToVolume).xyz;
float BoxDistance = ComputeDistanceFromBoxToPoint(-LocalPositionExtent, LocalPositionExtent, VolumeTileBoundingSphereCenter) * UVScaleAndVolumeScale.w;
BRANCH
if (BoxDistance < TileBoundingSphereRadius + AOObjectMaxDistance)
{
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
float3 ClampedSamplePosition = clamp(VolumeTileBoundingSphereCenter, -LocalPositionExtent, LocalPositionExtent);
float DistanceToClamped = length(VolumeTileBoundingSphereCenter - ClampedSamplePosition);
float3 StepVolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
float DistanceToOccluder = (Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, StepVolumeUV, 0).x + DistanceToClamped) * UVScaleAndVolumeScale.w;
BRANCH
if (DistanceToOccluder < TileBoundingSphereRadius + AOObjectMaxDistance)
{
uint ArrayIndex;
InterlockedAdd(RWTileHeadDataUnpacked[TileIndex * 4 + 1 + ListIndex], 1U, ArrayIndex);
if (ArrayIndex < MAX_OBJECTS_PER_TILE)
{
// Note: indexing so that threads are writing to RWTileArrayData coherently, has a huge impact on speed, even though the array data for one record is no longer dense
uint DataIndex = (ArrayIndex * TotalNumGroups + TileIndex) * NUM_CULLED_OBJECT_LISTS + ListIndex;
RWTileArrayData[DataIndex] = ObjectIndex;
}
}
}
#else
uint ListIndex = 0;
uint ArrayIndex;
InterlockedAdd(RWTileHeadDataUnpacked[TileIndex * 4 + 1 + ListIndex], 1U, ArrayIndex);
if (ArrayIndex < MAX_OBJECTS_PER_TILE)
{
// Note: indexing so that threads are writing to RWTileArrayData coherently, has a huge impact on speed, even though the array data for one record is no longer dense
uint DataIndex = (ArrayIndex * TotalNumGroups + TileIndex) * NUM_CULLED_OBJECT_LISTS + ListIndex;
RWTileArrayData[DataIndex] = ObjectIndex;
}
#endif
}
}