You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
a7375cd95c
Fixed skylight crash when building lighting with no specified cubemap #PR 491 [CL 2329568 by Daniel Wright in Main branch]
1073 lines
42 KiB
Plaintext
1073 lines
42 KiB
Plaintext
// Copyright 1998-2014 Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
DistanceFieldSurfaceCacheLighting.usf
|
|
=============================================================================*/
|
|
|
|
#include "Common.usf"
|
|
#include "DeferredShadingCommon.usf"
|
|
#include "DistanceFieldLightingShared.usf"
|
|
#include "DistanceFieldAOShared.usf"
|
|
|
|
struct FObjectCullVertexOutput
|
|
{
|
|
float4 Position : SV_POSITION;
|
|
nointerpolation float4 PositionAndRadius : TEXCOORD0;
|
|
nointerpolation uint ObjectIndex : TEXCOORD1;
|
|
};
|
|
|
|
float ConservativeRadiusScale;
|
|
|
|
/** Used when culling objects into screenspace tile lists */
|
|
void ObjectCullVS(
|
|
float4 InPosition : ATTRIBUTE0,
|
|
uint ObjectIndex : SV_InstanceID,
|
|
out FObjectCullVertexOutput Output
|
|
)
|
|
{
|
|
//@todo - implement ConservativelyBoundSphere
|
|
float4 ObjectPositionAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
|
//@todo - expand to handle conservative rasterization
|
|
float EffectiveRadius = (ObjectPositionAndRadius.w + AOMaxDistance) * ConservativeRadiusScale;
|
|
float3 WorldPosition = InPosition.xyz * EffectiveRadius + ObjectPositionAndRadius.xyz;
|
|
Output.Position = mul(float4(WorldPosition, 1), View.WorldToClip);
|
|
Output.PositionAndRadius = ObjectPositionAndRadius;
|
|
Output.ObjectIndex = ObjectIndex;
|
|
}
|
|
|
|
RWBuffer<uint> RWTileHeadDataUnpacked;
|
|
RWBuffer<uint> RWTileArrayData;
|
|
|
|
Buffer<float4> TileConeAxisAndCos;
|
|
Buffer<float4> TileConeDepthRanges;
|
|
|
|
float2 NumGroups;
|
|
|
|
/** Intersects a single object with the tile and adds to the intersection list if needed. */
|
|
void ObjectCullPS(
|
|
FObjectCullVertexOutput Input,
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : COLOR0)
|
|
{
|
|
OutColor = 0;
|
|
|
|
uint2 TilePosition = (uint2)SVPos.xy;
|
|
uint TileIndex = TilePosition.y * NumGroups.x + TilePosition.x;
|
|
float4 ConeAxisAndCos = TileConeAxisAndCos.Load(TileIndex);
|
|
float4 ConeAxisDepthRanges = TileConeDepthRanges.Load(TileIndex);
|
|
float3 TileConeVertex = 0;
|
|
float3 TileConeAxis = ConeAxisAndCos.xyz;
|
|
float TileConeAngleCos = ConeAxisAndCos.w;
|
|
float TileConeAngleSin = sqrt(1 - TileConeAngleCos * TileConeAngleCos);
|
|
|
|
// A value of 1 is conservative, but has a huge impact on performance
|
|
float RadiusScale = .5f;
|
|
float4 SphereCenterAndRadius = Input.PositionAndRadius;
|
|
float3 ViewSpaceSphereCenter = mul(float4(SphereCenterAndRadius.xyz + View.PreViewTranslation.xyz, 1), View.TranslatedWorldToView).xyz;
|
|
|
|
int SmallestGroupIndex = -1;
|
|
|
|
UNROLL
|
|
for (int GroupIndex = NUM_CULLED_OBJECT_LISTS - 1; GroupIndex >= 0; GroupIndex--)
|
|
{
|
|
uint StartIndex;
|
|
uint EndIndex;
|
|
GetPhaseParameters(GroupIndex, StartIndex, EndIndex);
|
|
float GroupMaxSampleRadius = GetStepOffset(EndIndex) * 2 * RadiusScale;
|
|
|
|
BRANCH
|
|
if (SphereIntersectConeWithDepthRanges(float4(ViewSpaceSphereCenter, SphereCenterAndRadius.w + GroupMaxSampleRadius), TileConeVertex, TileConeAxis, TileConeAngleCos, TileConeAngleSin, ConeAxisDepthRanges))
|
|
{
|
|
SmallestGroupIndex = GroupIndex;
|
|
}
|
|
}
|
|
|
|
if (SmallestGroupIndex >= 0)
|
|
{
|
|
uint ArrayIndex;
|
|
InterlockedAdd(RWTileHeadDataUnpacked[TileIndex * 4 + 1 + SmallestGroupIndex], 1U, ArrayIndex);
|
|
|
|
if (ArrayIndex < MAX_OBJECTS_PER_TILE)
|
|
{
|
|
#if COHERENT_OBJECT_LIST_WRITES
|
|
uint DataIndex = (ArrayIndex * (uint)(NumGroups.x * NumGroups.y + .5f) + TileIndex) * NUM_CULLED_OBJECT_LISTS + SmallestGroupIndex;
|
|
#else
|
|
uint TileArrayStart = TileIndex * MAX_OBJECTS_PER_TILE;
|
|
uint DataIndex = TileArrayStart + ArrayIndex;
|
|
#endif
|
|
|
|
RWTileArrayData[DataIndex] = Input.ObjectIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
void ComputeDistanceFieldNormal(uint2 TileCoordinate, float2 ScreenUV, float SceneDepth, uint HasDistanceFieldRepresentation, inout float3 WorldNormal)
|
|
{
|
|
uint4 TileHead = GetTileHead(TileCoordinate);
|
|
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
|
|
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
|
|
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
|
|
|
|
int ClosestDistanceFieldObject = -1;
|
|
float ClosestDistanceFieldSurface = 10000;
|
|
|
|
LOOP
|
|
// Iterate over objects whose surface we might be on
|
|
for (uint ListObjectIndex = 0; ListObjectIndex < TileHead.y; ListObjectIndex += 1)
|
|
{
|
|
#if COHERENT_OBJECT_LIST_WRITES
|
|
uint ListIndex = 0;
|
|
uint ObjectIndex = TileArrayData.Load((ListObjectIndex * TileListGroupSize.x * TileListGroupSize.y + TileHead.x) * NUM_CULLED_OBJECT_LISTS + ListIndex);
|
|
#else
|
|
uint ObjectIndex = TileArrayData.Load(TileHead.x + ListObjectIndex);
|
|
#endif
|
|
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
|
|
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
|
|
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex);
|
|
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
|
|
float3 VolumeShadingPosition = mul(float4(OpaqueWorldPosition, 1), WorldToVolume).xyz;
|
|
|
|
BRANCH
|
|
if (all(VolumeShadingPosition > -LocalPositionExtent) && all(VolumeShadingPosition < LocalPositionExtent))
|
|
{
|
|
float3 LocalShadingUV = DistanceFieldVolumePositionToUV(VolumeShadingPosition, UVScaleAndVolumeScale.xyz, UVAdd);
|
|
float DistanceFromSurface = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, LocalShadingUV, 0).x;
|
|
float WorldSpaceDistanceFromSurface = DistanceFromSurface * UVScaleAndVolumeScale.w;
|
|
|
|
if (WorldSpaceDistanceFromSurface < ClosestDistanceFieldSurface)
|
|
{
|
|
ClosestDistanceFieldSurface = WorldSpaceDistanceFromSurface;
|
|
ClosestDistanceFieldObject = ObjectIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
BRANCH
|
|
if (HasDistanceFieldRepresentation > 0 && ClosestDistanceFieldObject >= 0)
|
|
{
|
|
uint ObjectIndex = ClosestDistanceFieldObject;
|
|
|
|
float4 LocalPositionExtentAndTwoSided = LoadObjectLocalPositionExtentAndTwoSided(ObjectIndex);
|
|
|
|
// Can't compute a distance field normal if all faces were treated as frontfaces,
|
|
// Because that creates a non-linearity in the distance field at surface intersections, which does not interpolate properly
|
|
// Fall back to GBuffer normal, which may have normalmap information or just bogus imported normals that cause surface cache artifacts
|
|
BRANCH
|
|
if (LocalPositionExtentAndTwoSided.w == 0)
|
|
{
|
|
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
|
|
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex);
|
|
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
|
|
|
|
float4x4 VolumeToWorld = LoadObjectVolumeToWorld(ObjectIndex);
|
|
|
|
float3 VolumeShadingPosition = mul(float4(OpaqueWorldPosition, 1), WorldToVolume).xyz;
|
|
float3 LocalShadingUV = DistanceFieldVolumePositionToUV(VolumeShadingPosition, UVScaleAndVolumeScale.xyz, UVAdd);
|
|
|
|
// Used to clamp UVs inside valid space of this object's distance field
|
|
float3 UVMin = DistanceFieldVolumePositionToUV(-LocalPositionExtentAndTwoSided.xyz, UVScaleAndVolumeScale.xyz, UVAdd);
|
|
float3 UVMax = DistanceFieldVolumePositionToUV(LocalPositionExtentAndTwoSided.xyz, UVScaleAndVolumeScale.xyz, UVAdd);
|
|
|
|
float R = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, float3(min(LocalShadingUV.x + DistanceFieldAtlasTexelSize.x, UVMax.x), LocalShadingUV.y, LocalShadingUV.z), 0).x;
|
|
float L = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, float3(max(LocalShadingUV.x - DistanceFieldAtlasTexelSize.x, UVMin.x), LocalShadingUV.y, LocalShadingUV.z), 0).x;
|
|
float F = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, float3(LocalShadingUV.x, min(LocalShadingUV.y + DistanceFieldAtlasTexelSize.y, UVMax.y), LocalShadingUV.z), 0).x;
|
|
float B = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, float3(LocalShadingUV.x, max(LocalShadingUV.y - DistanceFieldAtlasTexelSize.y, UVMin.y), LocalShadingUV.z), 0).x;
|
|
float U = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, float3(LocalShadingUV.x, LocalShadingUV.y, min(LocalShadingUV.z + DistanceFieldAtlasTexelSize.z, UVMax.z)), 0).x;
|
|
float D = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, float3(LocalShadingUV.x, LocalShadingUV.y, max(LocalShadingUV.z - DistanceFieldAtlasTexelSize.z, UVMin.z)), 0).x;
|
|
|
|
float3 Gradient = .5f * float3(R - L, F - B, U - D);
|
|
float3 LocalNormal = normalize(Gradient);
|
|
WorldNormal = normalize(mul(LocalNormal, (float3x3)VolumeToWorld));
|
|
float DistanceAlpha = saturate(abs(ClosestDistanceFieldSurface) / 2);
|
|
//WorldNormal = normalize(lerp(WorldNormal, GBufferData.WorldNormal, .5f * DistanceAlpha));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Using the distance field normal prevents incorrect self-intersection but adds seams between modular objects and has a high cost
|
|
#define USE_DISTANCE_FIELD_NORMAL 0
|
|
|
|
/** Computes the distance field normal, using a search through all the nearby objects to find the closest one, whose normal is used. */
|
|
void ComputeDistanceFieldNormalPS(
|
|
in float4 UVAndScreenPos : TEXCOORD0,
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
float2 ScreenUV = float2((((uint2)SVPos.xy) * DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + .5f) * View.ViewSizeAndSceneTexelSize.zw);
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
FGBufferData GBufferData = GetGBufferData(ScreenUV);
|
|
|
|
float3 WorldNormal = GBufferData.WorldNormal;
|
|
|
|
#if USE_DISTANCE_FIELD_NORMAL
|
|
|
|
uint2 TileCoordinate = (uint2)(SVPos) / uint2(THREADGROUP_SIZEX, THREADGROUP_SIZEY);
|
|
ComputeDistanceFieldNormal(TileCoordinate, ScreenUV, SceneDepth, GBufferData.HasDistanceFieldRepresentation, WorldNormal);
|
|
|
|
#endif
|
|
|
|
float DepthSign = GBufferData.HasDistanceFieldRepresentation > 0 ? 1 : -1;
|
|
// Note: Encoding whether the pixel has a distance field representation in the sign bit of the alpha
|
|
OutColor = float4(EncodeNormalForAO(WorldNormal), DepthSign * SceneDepth);
|
|
}
|
|
|
|
RWTexture2D<float4> RWDistanceFieldNormal;
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void ComputeDistanceFieldNormalCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
float2 ScreenUV = float2((DispatchThreadId.xy * DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + .5f) * View.ViewSizeAndSceneTexelSize.zw);
|
|
float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
FGBufferData GBufferData = GetGBufferData(ScreenUV);
|
|
|
|
float3 WorldNormal = GBufferData.WorldNormal;
|
|
|
|
#if USE_DISTANCE_FIELD_NORMAL
|
|
|
|
uint2 TileCoordinate = DispatchThreadId.xy / uint2(THREADGROUP_SIZEX, THREADGROUP_SIZEY);
|
|
ComputeDistanceFieldNormal(TileCoordinate, ScreenUV, SceneDepth, GBufferData.HasDistanceFieldRepresentation, WorldNormal);
|
|
|
|
#endif
|
|
|
|
float DepthSign = GBufferData.HasDistanceFieldRepresentation > 0 ? 1 : -1;
|
|
float4 OutValue = float4(EncodeNormalForAO(WorldNormal), DepthSign * SceneDepth);
|
|
RWDistanceFieldNormal[DispatchThreadId.xy] = OutValue;
|
|
}
|
|
|
|
void WriteDownsampledDepthPS(
|
|
in float4 UVAndScreenPos : TEXCOORD0,
|
|
out float4 OutColor : SV_Target0,
|
|
out float OutDepth : SV_Depth)
|
|
{
|
|
float2 DistanceFieldUVs = UVAndScreenPos.xy;
|
|
float Depth = Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, DistanceFieldUVs, 0).w;
|
|
|
|
OutColor = 0;
|
|
OutDepth = ConvertToDeviceZ(Depth);
|
|
}
|
|
|
|
// For some reason gives innaccurate results at lower resolutions
|
|
#define USE_SCREENVECTOR_WORLD_POSITION FINAL_INTERPOLATION_PASS
|
|
|
|
struct FIrradianceCacheSplatVertexOutput
|
|
{
|
|
float4 Position : SV_POSITION;
|
|
nointerpolation float4 PositionRadius : TEXCOORD0;
|
|
nointerpolation float3 Normal : TEXCOORD1;
|
|
nointerpolation float3 BentNormal : TEXCOORD2;
|
|
#if USE_SCREENVECTOR_WORLD_POSITION
|
|
float3 ScreenVector : TEXCOORD4;
|
|
#endif
|
|
};
|
|
|
|
Buffer<uint> CompactedRedirect;
|
|
Buffer<uint> IrradianceCacheUsed;
|
|
Buffer<float4> IrradianceCachePositionRadius;
|
|
Buffer<float> IrradianceCacheOccluderRadius;
|
|
Buffer<float4> IrradianceCacheNormal;
|
|
Buffer<float4> IrradianceCacheBentNormal;
|
|
float InterpolationRadiusScale;
|
|
float2 NormalizedOffsetToPixelCenter;
|
|
float HackExpand;
|
|
|
|
/** Expands a screen-facing polygon to cover a surface cache record for splatting. */
|
|
void IrradianceCacheSplatVS(
|
|
float2 InPosition : ATTRIBUTE0,
|
|
float2 InUV : ATTRIBUTE1,
|
|
uint JobIndex : SV_InstanceID,
|
|
out FIrradianceCacheSplatVertexOutput Output
|
|
)
|
|
{
|
|
float4 PositionAndRadius = IrradianceCachePositionRadius.Load(JobIndex);
|
|
float OccluderRadius = IrradianceCacheOccluderRadius.Load(JobIndex);
|
|
PositionAndRadius.w = min(PositionAndRadius.w, OccluderRadius);
|
|
|
|
float ViewToCenterLength = length(PositionAndRadius.xyz - View.ViewOrigin.xyz);
|
|
float3 NormalizedViewToCenter = (PositionAndRadius.xyz - View.ViewOrigin.xyz) / ViewToCenterLength;
|
|
|
|
// Only allow full interpolation expanding for small samples, it won't be noticeable for large samples but adds a lot of splatting cost
|
|
float EffectiveInterpolationRadiusScale = lerp(InterpolationRadiusScale, 1, max(saturate(PositionAndRadius.w / 80), .2f));
|
|
PositionAndRadius.w = max(PositionAndRadius.w, ViewToCenterLength * .01f) * EffectiveInterpolationRadiusScale;
|
|
|
|
// +1 = behind volume (safe from near plane clipping without depth testing),
|
|
// -1 = in front of volume (required for conservative results with depth testing)
|
|
//@todo - using -1 currently causes artifacts
|
|
float BoundingDirection = +1;
|
|
float OffsetFromCenter = BoundingDirection * PositionAndRadius.w;
|
|
// Distance from the camera position that NormalizedViewToCenter will intersect the near plane
|
|
float NearPlaneDistance = View.NearPlane / dot(View.ViewForward, NormalizedViewToCenter);
|
|
|
|
// Don't move closer than the near plane to avoid getting clipped
|
|
// Clamping at the vertex level only works because it's a screen-aligned triangle
|
|
// Small bias to push just off the near plane
|
|
if (ViewToCenterLength + OffsetFromCenter < NearPlaneDistance + .001f)
|
|
{
|
|
OffsetFromCenter = -ViewToCenterLength + NearPlaneDistance + .001f;
|
|
}
|
|
|
|
// Construct a virtual sample position that won't be near plane clipped and will have a positive z after projection
|
|
float3 VirtualPosition = PositionAndRadius.xyz + OffsetFromCenter * NormalizedViewToCenter;
|
|
// Clipping the edge of the circle a bit can save perf, but introduces artifacts in the interpolation
|
|
float RadiusSlack = 1.0f;
|
|
// Compute new radius since we moved the center away from the camera, approximate as similar triangles
|
|
// This is really incorrect because it's not taking into account perspective correction on a sphere - the widest part in screenspace is not at the world space center
|
|
float VirtualRadius = RadiusSlack * PositionAndRadius.w * (ViewToCenterLength + OffsetFromCenter) / ViewToCenterLength;
|
|
|
|
#if !FINAL_INTERPOLATION_PASS
|
|
// Combat the mysterious bug where samples from last frame do not cover this frame's pixel even with no camera movement, seems to be a precision issue
|
|
VirtualRadius += HackExpand * ViewToCenterLength * 4;
|
|
#endif
|
|
|
|
float3 RecordNormal = DecodeNormalForAO(IrradianceCacheNormal.Load(JobIndex).xyz);
|
|
|
|
float2 CornerUVs = InUV;
|
|
float3 CornerPosition = VirtualPosition + (View.ViewRight * CornerUVs.x + View.ViewUp * CornerUVs.y) * VirtualRadius;
|
|
|
|
Output.Position = mul(float4(CornerPosition.xyz, 1), View.WorldToClip);
|
|
|
|
#if USE_SCREENVECTOR_WORLD_POSITION
|
|
Output.ScreenVector = mul(float4(Output.Position.xy / Output.Position.w, 1, 0), View.ScreenToWorld).xyz;
|
|
#endif
|
|
|
|
// Move the vertex over to compensate for the difference between the low res pixel center and the high res pixel center where shading is done
|
|
Output.Position.xy += NormalizedOffsetToPixelCenter * Output.Position.w;
|
|
|
|
Output.PositionRadius = PositionAndRadius;
|
|
Output.Normal = RecordNormal;
|
|
Output.BentNormal = DecodeNormalForAO(IrradianceCacheBentNormal.Load(JobIndex).xyz);
|
|
}
|
|
|
|
float InterpolationAngleNormalization;
|
|
float InvMinCosPointBehindPlane;
|
|
|
|
/** Computes surface cache weighting between the current pixel and the record being splatted. */
|
|
void IrradianceCacheSplatPS(
|
|
FIrradianceCacheSplatVertexOutput Input,
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : COLOR0)
|
|
{
|
|
OutColor = 0;
|
|
|
|
float2 BaseLevelScreenUV = float2((((uint2)SVPos.xy) * DownsampleFactorToBaseLevel + .5f) * BaseLevelTexelSize);
|
|
float4 DistanceFieldNormalValue = Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, BaseLevelScreenUV, 0);
|
|
|
|
#if USE_SCREENVECTOR_WORLD_POSITION
|
|
|
|
float SceneW = abs(DistanceFieldNormalValue.w);
|
|
float3 OpaqueWorldPosition = View.ViewOrigin.xyz + Input.ScreenVector * SceneW;
|
|
|
|
#else
|
|
|
|
float2 ScreenUV = float2((((uint2)SVPos.xy) * CurrentLevelDownsampleFactor + View.ViewRectMin.xy + float2(.5f, .5f)) * View.ViewSizeAndSceneTexelSize.zw);
|
|
float SceneW = CalcSceneDepth(ScreenUV);
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition.xy * SceneW, SceneW, 1), View.ScreenToWorld);
|
|
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
|
|
|
|
#endif
|
|
|
|
float Distance = length(OpaqueWorldPosition - Input.PositionRadius.xyz);
|
|
float DistanceError = saturate(Distance / Input.PositionRadius.w);
|
|
float Weight = 0;
|
|
|
|
BRANCH
|
|
if (DistanceError < 1 && SceneW < AOMaxViewDistance)
|
|
{
|
|
float3 WorldNormal = DecodeNormalForAO(DistanceFieldNormalValue.xyz);
|
|
float NormalError = InterpolationAngleNormalization * sqrt(saturate(1 - dot(WorldNormal, Input.Normal)));
|
|
|
|
// Don't use a lighting record if it's in front of the query point.
|
|
// Query points behind the lighting record may have nearby occluders that the lighting record does not see.
|
|
// Offset the comparison point along the negative normal to prevent self-occlusion
|
|
float3 RecordToVertexVector = OpaqueWorldPosition - (Input.PositionRadius.xyz - 1 * Input.Normal.xyz);
|
|
float DistanceToVertex = length(RecordToVertexVector);
|
|
float PlaneDistance = dot(Input.Normal.xyz, RecordToVertexVector) / DistanceToVertex;
|
|
|
|
// Setup an error metric that goes from 0 if the points are coplanar, to 1 if the point being shaded is at the angle corresponding to MinCosPointBehindPlane behind the plane
|
|
float PointBehindPlaneError = min(max(PlaneDistance * InvMinCosPointBehindPlane, 0.0f), DistanceToVertex / 3.0f);
|
|
|
|
float PrecisionScale = .1f;
|
|
Weight = saturate(PrecisionScale * (1 - max(DistanceError, max(NormalError, PointBehindPlaneError))));
|
|
//float Weight = saturate(PrecisionScale * (1 - max(DistanceError, NormalError)));
|
|
//float Weight = saturate(PrecisionScale * (1 - DistanceError));
|
|
//float Weight = saturate(PrecisionScale * (1 - NormalError));
|
|
|
|
float VisualizePlacement = Distance < .1f * Input.PositionRadius.w;
|
|
OutColor.rgb = Input.BentNormal * Weight;
|
|
OutColor.a = Weight;
|
|
}
|
|
|
|
#define VISUALIZE_SPLAT_OVERDRAW 0
|
|
#if VISUALIZE_SPLAT_OVERDRAW && FINAL_INTERPOLATION_PASS
|
|
OutColor.rgb = 0;
|
|
//OutColor.a = Input.Position.w <= SceneW ? .01f : 0.0f;
|
|
OutColor.a = .005f + (Weight > .001f ? .01f : 0);
|
|
#endif
|
|
|
|
#define VISUALIZE_RECORD_POINTS 0
|
|
#if VISUALIZE_RECORD_POINTS && FINAL_INTERPOLATION_PASS
|
|
OutColor.rgb = 1;
|
|
OutColor.a = (DistanceError < .05f) * .1f;
|
|
#endif
|
|
}
|
|
|
|
|
|
#ifndef NUM_INTERPOLATE_LEVELS
|
|
#define NUM_INTERPOLATE_LEVELS 1
|
|
#endif
|
|
|
|
RWTexture2D<float4> RWIrradianceCacheSplat;
|
|
|
|
Buffer<uint> LevelScatterDrawParameters[NUM_INTERPOLATE_LEVELS];
|
|
Buffer<float4> LevelIrradianceCachePositionRadius[NUM_INTERPOLATE_LEVELS];
|
|
Buffer<float> LevelIrradianceCacheOccluderRadius[NUM_INTERPOLATE_LEVELS];
|
|
Buffer<float4> LevelIrradianceCacheNormal[NUM_INTERPOLATE_LEVELS];
|
|
Buffer<float4> LevelIrradianceCacheBentNormal[NUM_INTERPOLATE_LEVELS];
|
|
|
|
#define MAX_RECORDS_PER_TILE 512
|
|
|
|
groupshared uint IntegerTileMinZ;
|
|
groupshared uint IntegerTileMaxZ;
|
|
groupshared uint IntegerTileMinZ2;
|
|
groupshared uint IntegerTileMaxZ2;
|
|
groupshared uint TileNumSamples;
|
|
groupshared uint TileSampleIndices[MAX_RECORDS_PER_TILE];
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void InterpolateCacheCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
|
|
uint2 PixelIndex = DispatchThreadId.xy;
|
|
|
|
float2 ScreenUV = (PixelIndex * CurrentLevelDownsampleFactor + float2(.5f, .5f)) * View.ViewSizeAndSceneTexelSize.zw;
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
|
|
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
|
|
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
|
|
float2 BaseLevelScreenUV = (PixelIndex * DownsampleFactorToBaseLevel + float2(.5f, .5f)) * BaseLevelTexelSize;
|
|
float3 WorldNormal = DecodeNormalForAO(Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, BaseLevelScreenUV, 0).xyz);
|
|
|
|
// Initialize per-tile variables
|
|
if (ThreadIndex == 0)
|
|
{
|
|
IntegerTileMinZ = 0x7F7FFFFF;
|
|
IntegerTileMaxZ = 0;
|
|
IntegerTileMinZ2 = 0x7F7FFFFF;
|
|
IntegerTileMaxZ2 = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
InterlockedMin(IntegerTileMinZ, asuint(SceneDepth));
|
|
InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth));
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinTileZ = asfloat(IntegerTileMinZ);
|
|
float MaxTileZ = asfloat(IntegerTileMaxZ);
|
|
|
|
#define USE_TILE_FRUSTUM 1
|
|
|
|
#define USE_DUAL_DEPTH_RANGES (1 && !USE_TILE_FRUSTUM)
|
|
#if USE_DUAL_DEPTH_RANGES
|
|
float HalfZ = .5f * (MinTileZ + MaxTileZ);
|
|
|
|
// Compute a second min and max Z, clipped by HalfZ, so that we get two depth bounds per tile
|
|
// This results in more conservative tile depth bounds and fewer intersections
|
|
if (SceneDepth >= HalfZ)
|
|
{
|
|
InterlockedMin(IntegerTileMinZ2, asuint(SceneDepth));
|
|
}
|
|
|
|
if (SceneDepth <= HalfZ)
|
|
{
|
|
InterlockedMax(IntegerTileMaxZ2, asuint(SceneDepth));
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinTileZ2 = asfloat(IntegerTileMinZ2);
|
|
float MaxTileZ2 = asfloat(IntegerTileMaxZ2);
|
|
|
|
#endif
|
|
|
|
#if USE_TILE_FRUSTUM
|
|
|
|
// Setup tile frustum planes
|
|
//@todo - use view size not buffer size
|
|
float2 TileScale = AOBufferSize * rcp(2 * float2(THREADGROUP_SIZEX, THREADGROUP_SIZEY));
|
|
float2 TileBias = TileScale - GroupId.xy;
|
|
|
|
float4 C1 = float4(View.ViewToClip._11 * TileScale.x, 0.0f, View.ViewToClip._31 * TileScale.x + TileBias.x, 0.0f);
|
|
float4 C2 = float4(0.0f, -View.ViewToClip._22 * TileScale.y, View.ViewToClip._32 * TileScale.y + TileBias.y, 0.0f);
|
|
float4 C4 = float4(0.0f, 0.0f, 1.0f, 0.0f);
|
|
|
|
float4 frustumPlanes[6];
|
|
frustumPlanes[0] = C4 - C1;
|
|
frustumPlanes[1] = C4 + C1;
|
|
frustumPlanes[2] = C4 - C2;
|
|
frustumPlanes[3] = C4 + C2;
|
|
frustumPlanes[4] = float4(0.0f, 0.0f, 1.0f, -MinTileZ);
|
|
frustumPlanes[5] = float4(0.0f, 0.0f, -1.0f, MaxTileZ);
|
|
|
|
// Normalize tile frustum planes
|
|
UNROLL
|
|
for (uint i = 0; i < 4; ++i)
|
|
{
|
|
frustumPlanes[i] *= rcp(length(frustumPlanes[i].xyz));
|
|
}
|
|
|
|
#else
|
|
|
|
float3 ViewTileMin;
|
|
float3 ViewTileMax;
|
|
|
|
float3 ViewTileMin2;
|
|
float3 ViewTileMax2;
|
|
|
|
float2 TanViewFOV = float2(1 / View.ViewToClip[0][0], 1 / View.ViewToClip[1][1]);
|
|
float2 TileSize = TanViewFOV * 2 / NumGroups;
|
|
float2 UnitPlaneTileMin = (GroupId.xy * TileSize - TanViewFOV) * float2(1, -1);
|
|
float2 UnitPlaneTileMax = ((GroupId.xy + 1) * TileSize - TanViewFOV) * float2(1, -1);
|
|
|
|
float ExpandRadius = 0;
|
|
|
|
#if USE_DUAL_DEPTH_RANGES
|
|
ViewTileMin.xy = min(MinTileZ * UnitPlaneTileMin, MaxTileZ2 * UnitPlaneTileMin) - ExpandRadius;
|
|
ViewTileMax.xy = max(MinTileZ * UnitPlaneTileMax, MaxTileZ2 * UnitPlaneTileMax) + ExpandRadius;
|
|
ViewTileMin.z = MinTileZ - ExpandRadius;
|
|
ViewTileMax.z = MaxTileZ2 + ExpandRadius;
|
|
ViewTileMin2.xy = min(MinTileZ2 * UnitPlaneTileMin, MaxTileZ * UnitPlaneTileMin) - ExpandRadius;
|
|
ViewTileMax2.xy = max(MinTileZ2 * UnitPlaneTileMax, MaxTileZ * UnitPlaneTileMax) + ExpandRadius;
|
|
ViewTileMin2.z = MinTileZ2 - ExpandRadius;
|
|
ViewTileMax2.z = MaxTileZ + ExpandRadius;
|
|
#else
|
|
ViewTileMin.xy = min(MinTileZ * UnitPlaneTileMin, MaxTileZ * UnitPlaneTileMin) - ExpandRadius;
|
|
ViewTileMax.xy = max(MinTileZ * UnitPlaneTileMax, MaxTileZ * UnitPlaneTileMax) + ExpandRadius;
|
|
ViewTileMin.z = MinTileZ - ExpandRadius;
|
|
ViewTileMax.z = MaxTileZ + ExpandRadius;
|
|
#endif
|
|
|
|
#endif
|
|
|
|
float4 LocalAccumulation = 0;
|
|
|
|
UNROLL
|
|
for (uint LevelIndex = 0; LevelIndex < NUM_INTERPOLATE_LEVELS; LevelIndex++)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
TileNumSamples = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint NumRecords = LevelScatterDrawParameters[LevelIndex][1];
|
|
|
|
LOOP
|
|
for (uint SampleIndex = ThreadIndex; SampleIndex < NumRecords; SampleIndex += THREADGROUP_TOTALSIZE)
|
|
{
|
|
float4 PositionAndRadius = LevelIrradianceCachePositionRadius[LevelIndex].Load(SampleIndex);
|
|
|
|
float OccluderRadius = LevelIrradianceCacheOccluderRadius[LevelIndex].Load(SampleIndex);
|
|
PositionAndRadius.w = min(PositionAndRadius.w, OccluderRadius);
|
|
|
|
float ViewToCenterLength = length(PositionAndRadius.xyz - View.ViewOrigin.xyz);
|
|
PositionAndRadius.w = max(PositionAndRadius.w, ViewToCenterLength * .01f) * InterpolationRadiusScale;
|
|
|
|
float3 ViewSpaceSamplePosition = mul(float4(PositionAndRadius.xyz + View.PreViewTranslation.xyz, 1), View.TranslatedWorldToView).xyz;
|
|
|
|
#if USE_TILE_FRUSTUM
|
|
// Cull the light against the tile's frustum planes
|
|
// Note: this has some false positives, a light that is intersecting three different axis frustum planes yet not intersecting the volume of the tile will be treated as intersecting
|
|
bool bInTile = true;
|
|
|
|
// Test against the screen x and y oriented planes first
|
|
UNROLL
|
|
for (uint i = 0; i < 4; ++i)
|
|
{
|
|
float PlaneDistance = dot(frustumPlanes[i], float4(ViewSpaceSamplePosition, 1.0f));
|
|
bInTile = bInTile && (PlaneDistance >= -PositionAndRadius.w);
|
|
}
|
|
|
|
BRANCH
|
|
if (bInTile)
|
|
{
|
|
bool bInDepthRange = true;
|
|
|
|
// Test against the depth range
|
|
UNROLL
|
|
for (uint i = 4; i < 6; ++i)
|
|
{
|
|
float PlaneDistance = dot(frustumPlanes[i], float4(ViewSpaceSamplePosition, 1.0f));
|
|
bInDepthRange = bInDepthRange && (PlaneDistance >= -PositionAndRadius.w);
|
|
}
|
|
|
|
// Add this light to the list of indices if it intersects
|
|
BRANCH
|
|
if (bInDepthRange)
|
|
{
|
|
uint ListIndex;
|
|
InterlockedAdd(TileNumSamples, 1U, ListIndex);
|
|
|
|
if (ListIndex < MAX_RECORDS_PER_TILE)
|
|
{
|
|
TileSampleIndices[ListIndex] = SampleIndex;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
//@todo - why needed?
|
|
PositionAndRadius.w *= 2;
|
|
|
|
float BoxDistance = ComputeDistanceFromBoxToPoint(ViewTileMin, ViewTileMax, ViewSpaceSamplePosition);
|
|
|
|
#if USE_DUAL_DEPTH_RANGES
|
|
float BoxDistance2 = ComputeDistanceFromBoxToPoint(ViewTileMin2, ViewTileMax2, ViewSpaceSamplePosition);
|
|
#endif
|
|
|
|
BRANCH
|
|
if (BoxDistance < PositionAndRadius.w
|
|
#if USE_DUAL_DEPTH_RANGES
|
|
|| BoxDistance2 < PositionAndRadius.w
|
|
#endif
|
|
)
|
|
{
|
|
uint ListIndex;
|
|
InterlockedAdd(TileNumSamples, 1U, ListIndex);
|
|
|
|
if (ListIndex < MAX_RECORDS_PER_TILE)
|
|
{
|
|
TileSampleIndices[ListIndex] = SampleIndex;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint LocalNumSamples = TileNumSamples;
|
|
|
|
#define VISUALIZE_OVERDRAW 0
|
|
#if VISUALIZE_OVERDRAW
|
|
LocalAccumulation += LocalNumSamples * .005f;
|
|
#else
|
|
|
|
LOOP
|
|
for (uint ListIndex = 0; ListIndex < LocalNumSamples; ListIndex++)
|
|
{
|
|
uint SampleIndex = TileSampleIndices[ListIndex];
|
|
float4 PositionAndRadius = LevelIrradianceCachePositionRadius[LevelIndex].Load(SampleIndex);
|
|
float OccluderRadius = LevelIrradianceCacheOccluderRadius[LevelIndex].Load(SampleIndex);
|
|
|
|
PositionAndRadius.w = min(PositionAndRadius.w, OccluderRadius);
|
|
|
|
float ViewToCenterLength = length(PositionAndRadius.xyz - View.ViewOrigin.xyz);
|
|
PositionAndRadius.w = max(PositionAndRadius.w, ViewToCenterLength * .01f) * InterpolationRadiusScale;
|
|
|
|
float Distance = length(OpaqueWorldPosition - PositionAndRadius.xyz);
|
|
float DistanceError = saturate(Distance / PositionAndRadius.w);
|
|
|
|
BRANCH
|
|
if (DistanceError < 1)
|
|
{
|
|
float3 RecordNormal = DecodeNormalForAO(LevelIrradianceCacheNormal[LevelIndex].Load(SampleIndex).xyz);
|
|
float3 RecordBentNormal = DecodeNormalForAO(LevelIrradianceCacheBentNormal[LevelIndex].Load(SampleIndex).xyz);
|
|
|
|
float NormalError = InterpolationAngleNormalization * sqrt(saturate(1 - dot(WorldNormal, RecordNormal)));
|
|
|
|
// Don't use a lighting record if it's in front of the query point.
|
|
// Query points behind the lighting record may have nearby occluders that the lighting record does not see.
|
|
float3 RecordToVertexVector = OpaqueWorldPosition - (PositionAndRadius.xyz - 1 * RecordNormal.xyz);
|
|
float DistanceToVertex = length(RecordToVertexVector);
|
|
float PlaneDistance = dot(RecordNormal, RecordToVertexVector) / DistanceToVertex;
|
|
|
|
// Setup an error metric that goes from 0 if the points are coplanar, to 1 if the point being shaded is at the angle corresponding to MinCosPointBehindPlane behind the plane
|
|
float PointBehindPlaneError = min(max(PlaneDistance * InvMinCosPointBehindPlane, 0.0f), DistanceToVertex / 3.0f);
|
|
|
|
float PrecisionScale = .1f;
|
|
float Weight = saturate(PrecisionScale * (1 - max(DistanceError, max(NormalError, PointBehindPlaneError))));
|
|
//float Weight = saturate(PrecisionScale * (1 - DistanceError));
|
|
|
|
|
|
LocalAccumulation.rgb += RecordBentNormal * Weight;
|
|
LocalAccumulation.a += Weight;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
RWIrradianceCacheSplat[PixelIndex.xy] = LocalAccumulation;
|
|
}
|
|
|
|
Texture2D BentNormalAOTexture3;
|
|
SamplerState BentNormalAOSampler3;
|
|
|
|
/** Normalizes the splatted surface cache values, packs depth in alpha. */
|
|
void AOCombinePS2(
|
|
in float4 UVAndScreenPos : TEXCOORD0,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
float4 DistanceFieldNormalValue = Texture2DSampleLevel(DistanceFieldNormalTexture, DistanceFieldNormalSampler, UVAndScreenPos.xy, 0);
|
|
float SceneDepth = abs(DistanceFieldNormalValue.w);
|
|
|
|
#define VISUALIZE_ACCUMULATED_WEIGHTS 0
|
|
#if VISUALIZE_ACCUMULATED_WEIGHTS
|
|
|
|
float3 BentNormalAO = Texture2DSampleLevel(BentNormalAOTexture3, BentNormalAOSampler3, UVAndScreenPos.xy, 0).aaa;
|
|
OutColor = float4(BentNormalAO, SceneDepth);
|
|
|
|
#else
|
|
|
|
float4 IrradianceCacheAccumulation = Texture2DSampleLevel(BentNormalAOTexture3, BentNormalAOSampler3, UVAndScreenPos.xy, 0);
|
|
|
|
if (DistanceFieldNormalValue.w < 0)
|
|
{
|
|
// Add a low weight completely unshadowed contribution for pixels with no distance field representation
|
|
float Weight = .001f;
|
|
IrradianceCacheAccumulation += float4(DistanceFieldNormalValue.xyz * Weight, Weight);
|
|
}
|
|
|
|
if (IrradianceCacheAccumulation.w > 0)
|
|
{
|
|
OutColor.rgb = IrradianceCacheAccumulation.xyz / IrradianceCacheAccumulation.w;
|
|
OutColor.a = SceneDepth;
|
|
}
|
|
else
|
|
{
|
|
OutColor.rgb = float3(0, 0, .1f);
|
|
// Sign bit stores whether texel is valid
|
|
OutColor.a = -SceneDepth;
|
|
}
|
|
|
|
float BentNormalLength = length(OutColor.rgb);
|
|
// Fade to unoccluded in the distance
|
|
float FadeAlpha = saturate(.001f * (.95f * AOMaxViewDistance - SceneDepth));
|
|
OutColor.rgb = normalize(OutColor.rgb) * lerp(1, BentNormalLength, FadeAlpha);
|
|
|
|
// Clamp Nan's before they get to the filter
|
|
OutColor.rgb = clamp(OutColor.rgb, -1, 1);
|
|
|
|
FLATTEN
|
|
if (SceneDepth > AOMaxViewDistance)
|
|
{
|
|
// Mark as valid for the gap filling pass, so we don't get overwritten
|
|
OutColor.a = abs(OutColor.a);
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
float2 BentNormalAOTexelSize;
|
|
Texture2D BentNormalAOTexture;
|
|
SamplerState BentNormalAOSampler;
|
|
|
|
#define HALF_FILL_KERNEL_SIZE 2
|
|
|
|
/** Fills in texels with no splatted weight from screenspace neighbors. */
|
|
void FillGapsPS(
|
|
in float4 UVAndScreenPos : TEXCOORD0,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
float4 CenterValue = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, UVAndScreenPos.xy, 0);
|
|
float SceneDepth = abs(CenterValue.w);
|
|
|
|
float4 Accumulation = 0;
|
|
|
|
for (float y = -HALF_FILL_KERNEL_SIZE; y <= HALF_FILL_KERNEL_SIZE; y++)
|
|
{
|
|
for (float x = -HALF_FILL_KERNEL_SIZE; x <= HALF_FILL_KERNEL_SIZE; x++)
|
|
{
|
|
float2 UVOffset = BentNormalAOTexelSize * float2(x, y);
|
|
float4 TextureValue = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, UVAndScreenPos.xy + UVOffset, 0);
|
|
float SampleSceneDepth = abs(TextureValue.w);
|
|
float ValidMask = TextureValue.w > 0;
|
|
|
|
float DepthWeight = 1 / exp2(abs(SceneDepth - SampleSceneDepth) * .01f);
|
|
|
|
float2 Weight2D = 1 / exp2(abs(float2(x, y) * 10.0f / HALF_FILL_KERNEL_SIZE));
|
|
float ScreenSpaceSpatialWeight = max(Weight2D.x, Weight2D.y);
|
|
|
|
float Weight = ValidMask * ScreenSpaceSpatialWeight * DepthWeight;
|
|
|
|
Accumulation.rgb += TextureValue.rgb * Weight;
|
|
Accumulation.a += Weight;
|
|
}
|
|
}
|
|
|
|
OutColor = float4(Accumulation.rgb / max(Accumulation.a, .00001f), CenterValue.w);
|
|
}
|
|
|
|
float4 CameraMotion[5];
|
|
Texture2D HistoryTexture;
|
|
SamplerState HistorySampler;
|
|
float HistoryWeight;
|
|
|
|
/** Reprojection the occlusion history, only correct for pixels belonging to static objects (camera motion only). */
|
|
void UpdateHistoryPS(
|
|
in float4 UVAndScreenPos : TEXCOORD0,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
float3 xyd;
|
|
xyd.xy = UVAndScreenPos.zw * float2(0.5, -0.5) + 0.5;
|
|
xyd.z = Texture2DSampleLevel(SceneDepthTexture, SceneDepthTextureSampler, UVAndScreenPos.xy, 0).r;
|
|
|
|
float scaleM = 1.0 / (dot(xyd, CameraMotion[0].xyz) + CameraMotion[0].w);
|
|
float2 mv;
|
|
mv.x = ((xyd.x * ((CameraMotion[1].x * xyd.y) + (CameraMotion[1].y * xyd.z) + CameraMotion[1].z)) + (CameraMotion[1].w * xyd.y) + (CameraMotion[2].x * xyd.x * xyd.x) + (CameraMotion[2].y * xyd.z) + CameraMotion[2].z) * scaleM;
|
|
mv.y = ((xyd.y * ((CameraMotion[3].x * xyd.x) + (CameraMotion[3].y * xyd.z) + CameraMotion[3].z)) + (CameraMotion[3].w * xyd.x) + (CameraMotion[4].x * xyd.y * xyd.y) + (CameraMotion[4].y * xyd.z) + CameraMotion[4].z) * scaleM;
|
|
|
|
float2 OldUV = UVAndScreenPos.xy + mv / float2(0.5, -0.5) * View.ScreenPositionScaleBias.xy;
|
|
float2 OldScreenPosition = (OldUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
|
|
// Distance field AO was computed at 0,0 regardless of viewrect min
|
|
float2 DistanceFieldUVs = UVAndScreenPos.xy - View.ViewRectMin.xy * View.ViewSizeAndSceneTexelSize.zw;
|
|
float4 NewValue = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, DistanceFieldUVs, 0);
|
|
float2 OldDistanceFieldUVs = OldUV.xy - View.ViewRectMin.xy * View.ViewSizeAndSceneTexelSize.zw;
|
|
float4 HistoryValue = Texture2DSampleLevel(HistoryTexture, HistorySampler, OldDistanceFieldUVs, 0);
|
|
|
|
//@todo - this doesn't take camera movement into account
|
|
float DepthDeltaFraction = abs(abs(NewValue.a) - abs(HistoryValue.a)) / abs(NewValue.a);
|
|
float EffectiveHistoryWeight = HistoryWeight;
|
|
|
|
FLATTEN
|
|
if (any(OldScreenPosition > 1)
|
|
|| any(OldScreenPosition < -1)
|
|
|| DepthDeltaFraction > .03f
|
|
// Toss history value from pixels that don't have a distance field representation
|
|
|| HistoryValue.a < 0)
|
|
{
|
|
EffectiveHistoryWeight = 0;
|
|
}
|
|
|
|
OutColor.rgb = lerp(NewValue.rgb, HistoryValue.rgb, EffectiveHistoryWeight);
|
|
|
|
OutColor.rgb = isnan(OutColor.rgb) ? 0 : OutColor.rgb;
|
|
|
|
OutColor.a = NewValue.a;
|
|
}
|
|
|
|
/** Upsamples the AO results to full resolution using a bilateral filter. */
|
|
void AOUpsamplePS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
|
|
{
|
|
// Distance field AO was computed at 0,0 regardless of viewrect min
|
|
float2 DistanceFieldUVs = UVAndScreenPos.xy - View.ViewRectMin.xy * View.ViewSizeAndSceneTexelSize.zw;
|
|
|
|
#define BILATERAL_UPSAMPLE 1
|
|
#if BILATERAL_UPSAMPLE
|
|
float2 LowResBufferSize = floor(View.RenderTargetSize / DOWNSAMPLE_FACTOR);
|
|
float2 LowResTexelSize = 1.0f / LowResBufferSize;
|
|
float2 Corner00UV = floor(DistanceFieldUVs * LowResBufferSize - .5f) / LowResBufferSize + .5f * LowResTexelSize;
|
|
float2 BilinearWeights = (DistanceFieldUVs - Corner00UV) * LowResBufferSize;
|
|
|
|
float4 TextureValues00 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, Corner00UV, 0);
|
|
float4 TextureValues10 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, Corner00UV + float2(LowResTexelSize.x, 0), 0);
|
|
float4 TextureValues01 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, Corner00UV + float2(0, LowResTexelSize.y), 0);
|
|
float4 TextureValues11 = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, Corner00UV + LowResTexelSize, 0);
|
|
|
|
float4 CornerWeights = float4(
|
|
(1 - BilinearWeights.y) * (1 - BilinearWeights.x),
|
|
(1 - BilinearWeights.y) * BilinearWeights.x,
|
|
BilinearWeights.y * (1 - BilinearWeights.x),
|
|
BilinearWeights.y * BilinearWeights.x);
|
|
|
|
float Epsilon = .0001f;
|
|
|
|
float4 CornerDepths = abs(float4(TextureValues00.w, TextureValues10.w, TextureValues01.w, TextureValues11.w));
|
|
float SceneDepth = CalcSceneDepth(UVAndScreenPos.xy);
|
|
float4 DepthWeights = 1.0f / (abs(CornerDepths - SceneDepth.xxxx) + Epsilon);
|
|
float4 FinalWeights = CornerWeights * DepthWeights;
|
|
|
|
float3 InterpolatedResult =
|
|
(FinalWeights.x * TextureValues00.xyz
|
|
+ FinalWeights.y * TextureValues10.xyz
|
|
+ FinalWeights.z * TextureValues01.xyz
|
|
+ FinalWeights.w * TextureValues11.xyz)
|
|
/ dot(FinalWeights, 1);
|
|
|
|
float3 BentNormal = InterpolatedResult.xyz;
|
|
|
|
#else
|
|
float3 BentNormal = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, DistanceFieldUVs, 0).xyz;
|
|
#endif
|
|
|
|
#if OUTPUT_BENT_NORMAL
|
|
OutColor = float4(BentNormal, 1);
|
|
#else
|
|
OutColor = float4(length(BentNormal).xxx, 1);
|
|
#endif
|
|
}
|
|
|
|
RWTexture2D<float4> RWVisualizeMeshDistanceFields;
|
|
|
|
bool RayHitSphere( float3 RayOrigin, float3 UnitRayDirection, float3 SphereCenter, float SphereRadius )
|
|
{
|
|
float3 ClosestPointOnRay = max( 0, dot( SphereCenter - RayOrigin, UnitRayDirection ) ) * UnitRayDirection;
|
|
float3 CenterToRay = RayOrigin + ClosestPointOnRay - SphereCenter;
|
|
return dot( CenterToRay, CenterToRay ) <= Square( SphereRadius );
|
|
}
|
|
|
|
#define MAX_INTERSECTING_OBJECTS 256
|
|
groupshared uint IntersectingObjectIndices[MAX_INTERSECTING_OBJECTS];
|
|
|
|
groupshared uint NumIntersectingObjects;
|
|
|
|
void RayTraceThroughDistanceFields(float3 WorldRayStart, float3 WorldRayEnd, float MaxRayTime, out float MinRayTime, out float TotalStepsTaken)
|
|
{
|
|
MinRayTime = MaxRayTime;
|
|
TotalStepsTaken = 0;
|
|
|
|
LOOP
|
|
for (uint ListObjectIndex = 0; ListObjectIndex < NumIntersectingObjects; ListObjectIndex++)
|
|
{
|
|
uint ObjectIndex = IntersectingObjectIndices[ListObjectIndex];
|
|
float4 SphereCenterAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
|
|
|
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
|
|
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
|
|
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex);
|
|
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
|
|
|
|
float3 VolumeRayStart = mul(float4(WorldRayStart, 1), WorldToVolume).xyz;
|
|
float3 VolumeRayEnd = mul(float4(WorldRayEnd, 1), WorldToVolume).xyz;
|
|
float3 VolumeRayDirection = VolumeRayEnd - VolumeRayStart;
|
|
float VolumeRayLength = length(VolumeRayDirection);
|
|
VolumeRayDirection /= VolumeRayLength;
|
|
|
|
float2 IntersectionTimes = LineBoxIntersect(VolumeRayStart, VolumeRayEnd, -LocalPositionExtent, LocalPositionExtent);
|
|
|
|
if (IntersectionTimes.x < IntersectionTimes.y && IntersectionTimes.x < 1)
|
|
{
|
|
float SampleRayTime = IntersectionTimes.x * VolumeRayLength;
|
|
|
|
float MinDistance = 1000000;
|
|
float3 IntersectionPosition = float3(0, 0, 0);
|
|
|
|
uint StepIndex = 0;
|
|
uint MaxSteps = 256;
|
|
|
|
LOOP
|
|
for (; StepIndex < MaxSteps; StepIndex++)
|
|
{
|
|
float3 SampleVolumePosition = VolumeRayStart + VolumeRayDirection * SampleRayTime;
|
|
float3 ClampedSamplePosition = clamp(SampleVolumePosition, -LocalPositionExtent, LocalPositionExtent);
|
|
float3 VolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
|
|
float DistanceField = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, VolumeUV, 0).x;
|
|
MinDistance = min(MinDistance, DistanceField);
|
|
IntersectionPosition = SampleVolumePosition;
|
|
|
|
float MinStepSize = 1.0f / (4 * MaxSteps);
|
|
float StepDistance = max(DistanceField, MinStepSize);
|
|
SampleRayTime += StepDistance;
|
|
|
|
// Terminate the trace if we reached a negative area or went past the end of the ray
|
|
if (DistanceField < 0
|
|
|| SampleRayTime > IntersectionTimes.y * VolumeRayLength)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
//Result = max(Result, StepIndex / (float)MaxSteps);
|
|
|
|
if (MinDistance * UVScaleAndVolumeScale.w < 0 || StepIndex == MaxSteps)
|
|
{
|
|
MinRayTime = min(MinRayTime, SampleRayTime * UVScaleAndVolumeScale.w);
|
|
}
|
|
|
|
TotalStepsTaken += StepIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void VisualizeMeshDistanceFieldCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
|
|
|
|
float2 ScreenUV = float2((DispatchThreadId.xy * DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + .5f) * View.ViewSizeAndSceneTexelSize.zw);
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
|
|
float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
|
|
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
|
|
|
|
float TraceDistance = 10000;
|
|
float3 WorldRayStart = View.ViewOrigin.xyz;
|
|
float3 WorldRayEnd = WorldRayStart + normalize(OpaqueWorldPosition - View.ViewOrigin.xyz) * TraceDistance;
|
|
float3 WorldRayDirection = WorldRayEnd - WorldRayStart;
|
|
float3 UnitWorldRayDirection = normalize(WorldRayDirection);
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
NumIntersectingObjects = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
LOOP
|
|
for (uint ObjectIndex = ThreadIndex; ObjectIndex < NumObjects; ObjectIndex += THREADGROUP_TOTALSIZE)
|
|
{
|
|
float4 SphereCenterAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
|
|
|
//@todo - make independent of current pixel
|
|
BRANCH
|
|
if (RayHitSphere(WorldRayStart, UnitWorldRayDirection, SphereCenterAndRadius.xyz, SphereCenterAndRadius.w))
|
|
{
|
|
uint ListIndex;
|
|
InterlockedAdd(NumIntersectingObjects, 1U, ListIndex);
|
|
IntersectingObjectIndices[ListIndex] = ObjectIndex;
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinRayTime;
|
|
float TotalStepsTaken;
|
|
|
|
// Trace once to find the distance to first intersection
|
|
RayTraceThroughDistanceFields(WorldRayStart, WorldRayEnd, TraceDistance, MinRayTime, TotalStepsTaken);
|
|
|
|
float TempMinRayTime;
|
|
// Recompute the ray end point
|
|
WorldRayEnd = WorldRayStart + UnitWorldRayDirection * MinRayTime;
|
|
// Trace a second time to only accumulate steps taken before the first intersection, improves visualization
|
|
RayTraceThroughDistanceFields(WorldRayStart, WorldRayEnd, MinRayTime, TempMinRayTime, TotalStepsTaken);
|
|
|
|
float3 Result = saturate(TotalStepsTaken / 200.0f);
|
|
|
|
if (MinRayTime < TraceDistance)
|
|
{
|
|
Result += .1f;
|
|
}
|
|
|
|
RWVisualizeMeshDistanceFields[DispatchThreadId.xy] = float4(Result, 0);
|
|
}
|
|
|
|
Texture2D VisualizeDistanceFieldTexture;
|
|
SamplerState VisualizeDistanceFieldSampler;
|
|
|
|
void VisualizeDistanceFieldUpsamplePS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
|
|
{
|
|
// Distance field AO was computed at 0,0 regardless of viewrect min
|
|
float2 DistanceFieldUVs = UVAndScreenPos.xy - View.ViewRectMin.xy * View.ViewSizeAndSceneTexelSize.zw;
|
|
|
|
float3 Value = Texture2DSampleLevel(VisualizeDistanceFieldTexture, VisualizeDistanceFieldSampler, DistanceFieldUVs, 0).xyz;
|
|
|
|
OutColor = float4(Value, 1);
|
|
} |