Files
UnrealEngineUWP/Engine/Shaders/DistanceFieldScreenGridLighting.usf
Daniel Wright a973772ec9 GPU particle collision with distance fields
Global distance field can be accessed via new DistanceToNearestSurface and DistanceFieldGradient material nodes

[CL 2555411 by Daniel Wright in Main branch]
2015-05-18 13:21:23 -04:00

589 lines
24 KiB
Plaintext

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
DistanceFieldScreenGridLighting.usf
=============================================================================*/
#include "Common.usf"
#include "DeferredShadingCommon.usf"
#include "DistanceFieldLightingShared.usf"
#include "DistanceFieldAOShared.usf"
#include "GlobalDistanceFieldShared.usf"
#define NUM_THREADS_PER_RECORD 4
#define CONE_TRACE_DISPATCH_SIZEX (CULLED_TILE_SIZEX / TRACE_DOWNSAMPLE_FACTOR)
#define CONE_TRACE_DISPATCH_SIZEY (CONE_TRACE_DISPATCH_SIZEX * NUM_THREADS_PER_RECORD)
#define SHARED_VISIBILITY_DIM (CONE_TRACE_DISPATCH_SIZEX * CONE_TRACE_DISPATCH_SIZEX * NUM_CONE_DIRECTIONS)
groupshared uint SharedConeVisibility[SHARED_VISIBILITY_DIM];
groupshared uint SharedConeRawVisibility[CONE_TRACE_DISPATCH_SIZEX * CONE_TRACE_DISPATCH_SIZEX * NUM_CONE_DIRECTIONS][NUM_VISIBILITY_STEPS];
Buffer<float4> TileConeDepthRanges;
float TanConeHalfAngle;
RWBuffer<float> RWScreenGridConeVisibility;
RWBuffer<float> RWConeDepthVisibilityFunction;
#define CONE_TRACE_AGAINST_OBJECTS 1
void HemisphereConeTraceAgainstTileCulledObjects(uint2 OutputCoordinate, uint SharedVisibilityBaseIndex, uint ThreadIndex, float3 WorldShadingPosition, float SceneDepth, float3 WorldNormal, float3 TangentX, float3 TangentY)
{
uint2 TileCoordinate = ComputeTileCoordinateFromScreenGrid(OutputCoordinate);
uint4 TileHead = GetTileHead(TileCoordinate);
#if USE_DEPTH_RANGE_LISTS
uint TileIndex = TileCoordinate.y * TileListGroupSize.x + TileCoordinate.x;
float4 ConeAxisDepthRanges = TileConeDepthRanges.Load(TileIndex);
uint ListIndex = SceneDepth < ConeAxisDepthRanges.y ? 0 : 1;
uint NumObjectsAffectingTile = SceneDepth < ConeAxisDepthRanges.y ? TileHead.y : TileHead.z;
#else
uint NumObjectsAffectingTile = TileHead.y + TileHead.z + TileHead.w;
#endif
uint NumCulledObjects = NumObjectsAffectingTile;
float MaxWorldStepOffset = GetStepOffset(NUM_CONE_STEPS);
float InvMaxOcclusionDistance = 1.0f / AOObjectMaxDistance;
#if USE_GLOBAL_DISTANCE_FIELD
InvMaxOcclusionDistance = 1.0f / AOGlobalMaxOcclusionDistance;
#endif
LOOP
for (uint ListObjectIndex = ThreadIndex; ListObjectIndex < NumCulledObjects; ListObjectIndex += NUM_THREADS_PER_RECORD)
{
#if USE_DEPTH_RANGE_LISTS
uint ArrayIndex = ListObjectIndex;
#else
uint ListIndex = 0;
uint ArrayIndex = ListObjectIndex;
FLATTEN
if (ListObjectIndex >= TileHead.y + TileHead.z)
{
ListIndex = 2;
ArrayIndex = ListObjectIndex - TileHead.y - TileHead.z;
}
else if (ListObjectIndex >= TileHead.y)
{
ListIndex = 1;
ArrayIndex = ListObjectIndex - TileHead.y;
}
#endif
uint ObjectIndex = TileArrayData.Load((ArrayIndex * TileListGroupSize.x * TileListGroupSize.y + TileHead.x) * NUM_CULLED_OBJECT_LISTS + ListIndex);
float4 ObjectPositionAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
float ObjectDistanceSq = dot(ObjectPositionAndRadius.xyz - WorldShadingPosition, ObjectPositionAndRadius.xyz - WorldShadingPosition);
BRANCH
// Skip tracing objects with a small projected angle
if (ObjectPositionAndRadius.w * ObjectPositionAndRadius.w / ObjectDistanceSq > Square(.25f))
{
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
bool bGeneratedAsTwoSided;
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex, bGeneratedAsTwoSided);
float3 VolumeShadingPosition = mul(float4(WorldShadingPosition, 1), WorldToVolume).xyz;
float BoxDistance = ComputeDistanceFromBoxToPoint(-LocalPositionExtent, LocalPositionExtent, VolumeShadingPosition) * UVScaleAndVolumeScale.w;
BRANCH
if (BoxDistance < AOObjectMaxDistance)
{
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
uint StartStepIndex = 0;
#if !USE_DEPTH_RANGE_LISTS
FLATTEN
if (ListObjectIndex >= TileHead.y + TileHead.z)
{
StartStepIndex = 8;
}
else if (ListObjectIndex >= TileHead.y)
{
StartStepIndex = 5;
}
#endif
float ObjectOccluderRadius = length(LocalPositionExtent) * .5f * UVScaleAndVolumeScale.w;
LOOP
for (uint ConeIndex = 0; ConeIndex < NUM_CONE_DIRECTIONS; ConeIndex++)
{
float3 ConeDirection = AOSamples2.SampleDirections[ConeIndex].xyz;
float3 RotatedConeDirection = ConeDirection.x * TangentX + ConeDirection.y * TangentY + ConeDirection.z * WorldNormal;
float MinVisibility = 1;
float MinRawVisibility[NUM_VISIBILITY_STEPS];
#if SUPPORT_IRRADIANCE
UNROLL
for (uint i = 0; i < NUM_VISIBILITY_STEPS; i++)
{
MinRawVisibility[i] = 1;
}
#endif
float WorldStepOffset = GetStepOffset(StartStepIndex);
#if USE_GLOBAL_DISTANCE_FIELD
WorldStepOffset += 10;
#endif
LOOP
for (uint StepIndex = StartStepIndex; StepIndex < NUM_CONE_STEPS && WorldStepOffset < MaxWorldStepOffset; StepIndex++)
{
float3 WorldSamplePosition = WorldShadingPosition + RotatedConeDirection * WorldStepOffset;
float3 StepSamplePosition = mul(float4(WorldSamplePosition, 1), WorldToVolume).xyz;
float3 ClampedSamplePosition = clamp(StepSamplePosition, -LocalPositionExtent, LocalPositionExtent);
float DistanceToClamped = length(StepSamplePosition - ClampedSamplePosition);
float3 StepVolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
float DistanceToOccluder = (Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, StepVolumeUV, 0).x + DistanceToClamped) * UVScaleAndVolumeScale.w;
float SphereRadius = WorldStepOffset * TanConeHalfAngle;
//@todo - have to bias away from surface further for this to work
float ShadingSphereRadius = SphereRadius * 1.0f;
// Derive visibility from 1d intersection
float Visibility = saturate(DistanceToOccluder / ShadingSphereRadius);
// Don't allow small objects to fully occlude a cone step
Visibility = max(Visibility, 1 - saturate(ObjectOccluderRadius / SphereRadius));
#if SUPPORT_IRRADIANCE
uint VisibilityIndex = NUM_VISIBILITY_STEPS * WorldStepOffset * InvMaxOcclusionDistance;
// Less GI occlusion for two sided meshes, which can't separate self-occlusion
//@todo - expose
float TwoSidedVisibilityScale = bGeneratedAsTwoSided ? 100 : 1;
// Track raw visibility before the distance fade for GI shadowing
MinRawVisibility[VisibilityIndex] = min(MinRawVisibility[VisibilityIndex], TwoSidedVisibilityScale * Visibility);
#endif
float OccluderDistanceFraction = (WorldStepOffset + DistanceToOccluder) * InvMaxOcclusionDistance;
// Fade out occlusion based on distance to occluder to avoid a discontinuity at the max AO distance
Visibility = max(Visibility, saturate(OccluderDistanceFraction * OccluderDistanceFraction * .6f));
MinVisibility = min(MinVisibility, Visibility);
float MinStepScale = .6f;
#if USE_GLOBAL_DISTANCE_FIELD
MinStepScale = 2;
#endif
float MinStepSize = MinStepScale * (GetStepOffset(StepIndex + 1) - GetStepOffset(StepIndex));
WorldStepOffset += max(DistanceToOccluder, MinStepSize);
}
InterlockedMin(SharedConeVisibility[SharedVisibilityBaseIndex + ConeIndex], asuint(MinVisibility));
#if SUPPORT_IRRADIANCE
UNROLL
for (uint i = 0; i < NUM_VISIBILITY_STEPS; i++)
{
InterlockedMin(SharedConeRawVisibility[SharedVisibilityBaseIndex + ConeIndex][i], asuint(MinRawVisibility[i]));
}
#endif
}
}
}
}
}
/** Traces cones of a hemisphere against intersecting object distance fields. */
[numthreads(CONE_TRACE_DISPATCH_SIZEX, CONE_TRACE_DISPATCH_SIZEY, 1)]
void ConeTraceObjectOcclusionCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 OutputCoordinate = uint2(DispatchThreadId.x, DispatchThreadId.y / NUM_THREADS_PER_RECORD);
uint2 RecordTileCoordinate = uint2(GroupThreadId.x, GroupThreadId.y / NUM_THREADS_PER_RECORD);
uint SharedVisibilityBaseIndex = (RecordTileCoordinate.y * CONE_TRACE_DISPATCH_SIZEX + RecordTileCoordinate.x) * NUM_CONE_DIRECTIONS;
uint ThreadIndex = GroupThreadId.y - RecordTileCoordinate.y * NUM_THREADS_PER_RECORD;
uint FloatOneEncoded = asuint(1.0f);
for (uint Index = GroupThreadId.y * CONE_TRACE_DISPATCH_SIZEX + GroupThreadId.x; Index < SHARED_VISIBILITY_DIM; Index += CONE_TRACE_DISPATCH_SIZEX * CONE_TRACE_DISPATCH_SIZEY)
{
SharedConeVisibility[Index] = FloatOneEncoded;
#if SUPPORT_IRRADIANCE
UNROLL
for (uint i = 0; i < NUM_VISIBILITY_STEPS; i++)
{
SharedConeRawVisibility[Index][i] = FloatOneEncoded;
}
#endif
}
GroupMemoryBarrierWithGroupSync();
float2 BaseLevelScreenUV = GetBaseLevelScreenUVFromScreenGrid(OutputCoordinate);
float3 WorldNormal;
float SceneDepth;
bool bHasDistanceFieldRepresentation;
bool bHasHeightfieldRepresentation;
GetDownsampledGBuffer(BaseLevelScreenUV, WorldNormal, SceneDepth, bHasDistanceFieldRepresentation, bHasHeightfieldRepresentation);
float3 TangentX;
float3 TangentY;
FindBestAxisVectors2(WorldNormal, TangentX, TangentY);
{
float2 ScreenUV = GetScreenUVFromScreenGrid(OutputCoordinate);
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
float3 WorldShadingPosition = OpaqueWorldPosition;
#if CONE_TRACE_AGAINST_OBJECTS
HemisphereConeTraceAgainstTileCulledObjects(OutputCoordinate, SharedVisibilityBaseIndex, ThreadIndex, WorldShadingPosition, SceneDepth, WorldNormal, TangentX, TangentY);
#endif
}
GroupMemoryBarrierWithGroupSync();
if (all(OutputCoordinate < ScreenGridConeVisibilitySize))
{
uint OutputBaseIndex = (OutputCoordinate.y * ScreenGridConeVisibilitySize.x + OutputCoordinate.x) * NUM_CONE_DIRECTIONS;
for (uint ConeIndex = ThreadIndex; ConeIndex < NUM_CONE_DIRECTIONS; ConeIndex += NUM_THREADS_PER_RECORD)
{
float ConeVisibility = asfloat(SharedConeVisibility[SharedVisibilityBaseIndex + ConeIndex]);
RWScreenGridConeVisibility[OutputBaseIndex + ConeIndex] = ConeVisibility;
#if SUPPORT_IRRADIANCE
float MinStepVisibility = 1;
UNROLL
for (uint StepIndex = 0; StepIndex < NUM_VISIBILITY_STEPS; StepIndex++)
{
float StepVisibility = asfloat(SharedConeRawVisibility[SharedVisibilityBaseIndex + ConeIndex][StepIndex]);
// Propagate min visibility down the cone
MinStepVisibility = min(MinStepVisibility, StepVisibility);
RWConeDepthVisibilityFunction[(OutputBaseIndex + ConeIndex) * NUM_VISIBILITY_STEPS + StepIndex] = MinStepVisibility;
}
#endif
}
}
}
void HemisphereConeTraceAgainstGlobalDistanceFieldClipmap(
uniform uint ClipmapIndex,
uint OutputBaseIndex,
float3 WorldShadingPosition,
float SceneDepth,
float3 WorldNormal,
float3 TangentX,
float3 TangentY)
{
LOOP
for (uint ConeIndex = 0; ConeIndex < NUM_CONE_DIRECTIONS; ConeIndex++)
{
float3 ConeDirection = AOSamples2.SampleDirections[ConeIndex].xyz;
float3 RotatedConeDirection = ConeDirection.x * TangentX + ConeDirection.y * TangentY + ConeDirection.z * WorldNormal;
float MinVisibility = 1;
float MaxObjectWorldStepOffset = GetStepOffset(NUM_CONE_STEPS);
float WorldStepOffset = CONE_TRACE_AGAINST_OBJECTS ? MaxObjectWorldStepOffset : 20;
LOOP
for (uint StepIndex = 0; StepIndex < NUM_CONE_STEPS && WorldStepOffset < AOGlobalMaxOcclusionDistance; StepIndex++)
{
float3 WorldSamplePosition = WorldShadingPosition + RotatedConeDirection * WorldStepOffset;
float3 StepVolumeUV = ComputeGlobalUV(WorldSamplePosition, ClipmapIndex);
float DistanceToOccluder = Texture3DSampleLevel(GlobalDistanceFieldTexture[ClipmapIndex], GlobalDistanceFieldSampler[ClipmapIndex], StepVolumeUV, 0).x;
float SphereRadius = WorldStepOffset * TanConeHalfAngle;
//@todo - have to bias away from surface further for this to work
float ShadingSphereRadius = SphereRadius * 1.0f;
// Derive visibility from 1d intersection
float Visibility = saturate(DistanceToOccluder / ShadingSphereRadius);
float OccluderDistanceFraction = (WorldStepOffset + DistanceToOccluder) / AOGlobalMaxOcclusionDistance;
// Fade out occlusion based on distance to occluder to avoid a discontinuity at the max AO distance
Visibility = max(Visibility, saturate(OccluderDistanceFraction * OccluderDistanceFraction * .6f));
MinVisibility = min(MinVisibility, Visibility);
float MinStepSize = GlobalVolumeCenterAndExtent[ClipmapIndex].w * 2 / 300.0f;
WorldStepOffset += max(DistanceToOccluder, MinStepSize);
}
// Don't have to use atomics here, each cone visibility value will be modified by one thread
RWScreenGridConeVisibility[OutputBaseIndex + ConeIndex] = min(MinVisibility, RWScreenGridConeVisibility[OutputBaseIndex + ConeIndex]);
}
}
void HemisphereConeTraceAgainstGlobalDistanceField(uint OutputBaseIndex, float3 WorldShadingPosition, float SceneDepth, float3 WorldNormal, float3 TangentX, float3 TangentY)
{
#define USE_GLOBAL_CLIPMAPS 1
#if USE_GLOBAL_CLIPMAPS
float DistanceFromClipmap = ComputeDistanceFromBoxToPointInside(GlobalVolumeCenterAndExtent[0].xyz, GlobalVolumeCenterAndExtent[0].www, WorldShadingPosition);
BRANCH
if (DistanceFromClipmap > AOGlobalMaxOcclusionDistance)
{
HemisphereConeTraceAgainstGlobalDistanceFieldClipmap(0, OutputBaseIndex, WorldShadingPosition, SceneDepth, WorldNormal, TangentX, TangentY);
}
else
{
DistanceFromClipmap = ComputeDistanceFromBoxToPointInside(GlobalVolumeCenterAndExtent[1].xyz, GlobalVolumeCenterAndExtent[1].www, WorldShadingPosition);
BRANCH
if (DistanceFromClipmap > AOGlobalMaxOcclusionDistance)
{
HemisphereConeTraceAgainstGlobalDistanceFieldClipmap(1, OutputBaseIndex, WorldShadingPosition, SceneDepth, WorldNormal, TangentX, TangentY);
}
else
{
DistanceFromClipmap = ComputeDistanceFromBoxToPointInside(GlobalVolumeCenterAndExtent[2].xyz, GlobalVolumeCenterAndExtent[2].www, WorldShadingPosition);
float DistanceFromLastClipmap = ComputeDistanceFromBoxToPointInside(GlobalVolumeCenterAndExtent[3].xyz, GlobalVolumeCenterAndExtent[3].www, WorldShadingPosition);
BRANCH
if (DistanceFromClipmap > AOGlobalMaxOcclusionDistance)
{
HemisphereConeTraceAgainstGlobalDistanceFieldClipmap(2, OutputBaseIndex, WorldShadingPosition, SceneDepth, WorldNormal, TangentX, TangentY);
}
else if (DistanceFromLastClipmap > AOGlobalMaxOcclusionDistance)
{
HemisphereConeTraceAgainstGlobalDistanceFieldClipmap(3, OutputBaseIndex, WorldShadingPosition, SceneDepth, WorldNormal, TangentX, TangentY);
}
}
}
#else
HemisphereConeTraceAgainstGlobalDistanceFieldClipmap(0, SharedVisibilityBaseIndex, ThreadIndex, ThreadStride, WorldShadingPosition, SceneDepth, WorldNormal, TangentX, TangentY);
#endif
}
#ifndef CONE_TRACE_GLOBAL_DISPATCH_SIZEX
#define CONE_TRACE_GLOBAL_DISPATCH_SIZEX 1
#endif
/** */
[numthreads(CONE_TRACE_GLOBAL_DISPATCH_SIZEX, CONE_TRACE_GLOBAL_DISPATCH_SIZEX, 1)]
void ConeTraceGlobalOcclusionCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 OutputCoordinate = DispatchThreadId.xy;
if (all(OutputCoordinate < ScreenGridConeVisibilitySize))
{
float2 BaseLevelScreenUV = GetBaseLevelScreenUVFromScreenGrid(OutputCoordinate);
float3 WorldNormal;
float SceneDepth;
bool bHasDistanceFieldRepresentation;
bool bHasHeightfieldRepresentation;
GetDownsampledGBuffer(BaseLevelScreenUV, WorldNormal, SceneDepth, bHasDistanceFieldRepresentation, bHasHeightfieldRepresentation);
float3 TangentX;
float3 TangentY;
FindBestAxisVectors2(WorldNormal, TangentX, TangentY);
float2 ScreenUV = GetScreenUVFromScreenGrid(OutputCoordinate);
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
float3 WorldShadingPosition = OpaqueWorldPosition;
uint OutputBaseIndex = (OutputCoordinate.y * ScreenGridConeVisibilitySize.x + OutputCoordinate.x) * NUM_CONE_DIRECTIONS;
HemisphereConeTraceAgainstGlobalDistanceField(OutputBaseIndex, WorldShadingPosition, SceneDepth, WorldNormal, TangentX, TangentY);
}
}
Buffer<float> ScreenGridConeVisibility;
RWTexture2D<float4> RWDistanceFieldBentNormal;
#ifndef COMBINE_CONES_SIZEX
#define COMBINE_CONES_SIZEX 1
#endif
/** */
[numthreads(COMBINE_CONES_SIZEX, COMBINE_CONES_SIZEX, 1)]
void CombineConeVisibilityCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 OutputCoordinate = DispatchThreadId.xy;
float2 BaseLevelScreenUV = GetBaseLevelScreenUVFromScreenGrid(OutputCoordinate);
float3 WorldNormal;
float SceneDepth;
bool bHasDistanceFieldRepresentation;
bool bHasHeightfieldRepresentation;
GetDownsampledGBuffer(BaseLevelScreenUV, WorldNormal, SceneDepth, bHasDistanceFieldRepresentation, bHasHeightfieldRepresentation);
float3 TangentX;
float3 TangentY;
FindBestAxisVectors2(WorldNormal, TangentX, TangentY);
uint InputBaseIndex = (OutputCoordinate.y * ScreenGridConeVisibilitySize.x + OutputCoordinate.x) * NUM_CONE_DIRECTIONS;
float3 UnoccludedDirection = 0;
for (uint ConeIndex = 0; ConeIndex < NUM_CONE_DIRECTIONS; ConeIndex++)
{
float ConeVisibility = ScreenGridConeVisibility[InputBaseIndex + ConeIndex];
float3 ConeDirection = AOSamples2.SampleDirections[ConeIndex].xyz;
float3 RotatedConeDirection = ConeDirection.x * TangentX + ConeDirection.y * TangentY + ConeDirection.z * WorldNormal;
UnoccludedDirection += ConeVisibility * RotatedConeDirection;
}
float InvNumSamples = 1.0f / (float)NUM_CONE_DIRECTIONS;
float3 BentNormal = UnoccludedDirection * (BentNormalNormalizeFactor * InvNumSamples);
RWDistanceFieldBentNormal[OutputCoordinate] = float4(BentNormal, SceneDepth);
}
float2 DistanceFieldGBufferTexelSize;
float4 BentNormalBufferAndTexelSize;
float MinDownsampleFactorToBaseLevel;
float4 GetNormalWeights(float2 Corner00UV, float2 LowResTexelSize, float3 WorldNormal)
{
float4 NormalWeights;
{
float3 SampleWorldNormal;
float Unused; bool Unused2; bool Unused3;
GetDownsampledGBuffer(Corner00UV, SampleWorldNormal, Unused, Unused2, Unused3);
NormalWeights.x = dot(SampleWorldNormal, WorldNormal);
}
{
float3 SampleWorldNormal;
float Unused; bool Unused2; bool Unused3;
GetDownsampledGBuffer(Corner00UV + float2(LowResTexelSize.x, 0), SampleWorldNormal, Unused, Unused2, Unused3);
NormalWeights.y = dot(SampleWorldNormal, WorldNormal);
}
{
float3 SampleWorldNormal;
float Unused; bool Unused2; bool Unused3;
GetDownsampledGBuffer(Corner00UV + float2(0, LowResTexelSize.y), SampleWorldNormal, Unused, Unused2, Unused3);
NormalWeights.z = dot(SampleWorldNormal, WorldNormal);
}
{
float3 SampleWorldNormal;
float Unused; bool Unused2; bool Unused3;
GetDownsampledGBuffer(Corner00UV + LowResTexelSize, SampleWorldNormal, Unused, Unused2, Unused3);
NormalWeights.w = dot(SampleWorldNormal, WorldNormal);
}
return max(NormalWeights, .0001f);
}
// 1 / ((1 - FadeDistanceFraction) * AOMaxViewDistance)
float DistanceFadeScale;
/** */
void GeometryAwareUpsamplePS(
in float4 UVAndScreenPos : TEXCOORD0
,out float4 OutBentNormal : SV_Target0
#if SUPPORT_IRRADIANCE
,out float4 OutIrradiance : SV_Target1
#endif
)
{
float3 WorldNormal;
float SceneDepth;
bool bHasDistanceFieldRepresentation;
bool bHasHeightfieldRepresentation;
GetDownsampledGBuffer(UVAndScreenPos.xy, WorldNormal, SceneDepth, bHasDistanceFieldRepresentation, bHasHeightfieldRepresentation);
float2 LowResBufferSize = BentNormalBufferAndTexelSize.xy;
float2 LowResTexelSize = BentNormalBufferAndTexelSize.zw;
float2 Corner00UV = floor((UVAndScreenPos.xy - JitterOffset * DistanceFieldGBufferTexelSize) * LowResBufferSize) * LowResTexelSize;
float2 BilinearWeights = (UVAndScreenPos.xy - (Corner00UV + JitterOffset * DistanceFieldGBufferTexelSize)) * LowResBufferSize;
float2 LowResCorner00UV = Corner00UV + .5f * LowResTexelSize;
float4 TextureValues00 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, LowResCorner00UV, 0);
float4 TextureValues10 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, LowResCorner00UV + float2(LowResTexelSize.x, 0), 0);
float4 TextureValues01 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, LowResCorner00UV + float2(0, LowResTexelSize.y), 0);
float4 TextureValues11 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, LowResCorner00UV + LowResTexelSize, 0);
float4 CornerWeights = float4(
(1 - BilinearWeights.y) * (1 - BilinearWeights.x),
(1 - BilinearWeights.y) * BilinearWeights.x,
BilinearWeights.y * (1 - BilinearWeights.x),
BilinearWeights.y * BilinearWeights.x);
float4 CornerDepths = abs(float4(TextureValues00.w, TextureValues10.w, TextureValues01.w, TextureValues11.w));
//float4 DepthWeights = max(exp2(-abs(CornerDepths - SceneDepth.xxxx) * .01f), .001f);
float Epsilon = .0001f;
float4 DepthWeights = min(10.0f / (abs(CornerDepths - SceneDepth.xxxx) + Epsilon), 1);
float2 FullResCorner00UV = Corner00UV + (JitterOffset + .5f) * DistanceFieldGBufferTexelSize;
float4 NormalWeights = GetNormalWeights(FullResCorner00UV, LowResTexelSize, WorldNormal);
float4 FinalWeights = CornerWeights * DepthWeights * NormalWeights;
float InvSafeWeight = 1.0f / max(dot(FinalWeights, 1), .00001f);
float3 AverageBentNormal =
(FinalWeights.x * TextureValues00.xyz
+ FinalWeights.y * TextureValues10.xyz
+ FinalWeights.z * TextureValues01.xyz
+ FinalWeights.w * TextureValues11.xyz)
* InvSafeWeight;
float BentNormalLength = length(AverageBentNormal);
float AverageLength =
(FinalWeights.x * length(TextureValues00.xyz)
+ FinalWeights.y * length(TextureValues10.xyz)
+ FinalWeights.z * length(TextureValues01.xyz)
+ FinalWeights.w * length(TextureValues11.xyz))
* InvSafeWeight;
if (BentNormalLength < AverageLength && BentNormalLength > 0)
{
// Fixup normal shortening due to weighted average of vectors
AverageBentNormal = AverageBentNormal / BentNormalLength * AverageLength;
}
OutBentNormal = float4(AverageBentNormal, SceneDepth);
// Fade to unoccluded in the distance
//@todo - box distance from largest cascade
BentNormalLength = length(OutBentNormal.rgb);
float FadeAlpha = saturate((AOMaxViewDistance - SceneDepth) * DistanceFadeScale);
float3 NormalizedBentNormal = OutBentNormal.rgb / max(BentNormalLength, .0001f);
OutBentNormal.rgb = NormalizedBentNormal * lerp(1, BentNormalLength, FadeAlpha);
//OutBentNormal = float4(WorldNormal, SceneDepth);
#if SUPPORT_IRRADIANCE
float4 IrradianceValues00 = Texture2DSampleLevel(IrradianceTexture, IrradianceSampler, LowResCorner00UV, 0);
float4 IrradianceValues10 = Texture2DSampleLevel(IrradianceTexture, IrradianceSampler, LowResCorner00UV + float2(LowResTexelSize.x, 0), 0);
float4 IrradianceValues01 = Texture2DSampleLevel(IrradianceTexture, IrradianceSampler, LowResCorner00UV + float2(0, LowResTexelSize.y), 0);
float4 IrradianceValues11 = Texture2DSampleLevel(IrradianceTexture, IrradianceSampler, LowResCorner00UV + LowResTexelSize, 0);
float3 AverageIrradiance =
(FinalWeights.x * IrradianceValues00.xyz
+ FinalWeights.y * IrradianceValues10.xyz
+ FinalWeights.z * IrradianceValues01.xyz
+ FinalWeights.w * IrradianceValues11.xyz)
* InvSafeWeight;
OutIrradiance = float4(AverageIrradiance, 0);
#endif
}