You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
* DF lighting GBuffer dependencies packed into 2 textures (saves .1ms) * Reverted CULL_OBJECTS_TO_RECORD as performance savings are inconsistent * Added low quality gap fill pass that just does a bilateral filter (saves .4ms) * Enabled interpolation depth testing (saves .2ms) * Disabled the temporal stability filter (saves .2ms) * Overall empty scene cost on PS4 3.77ms -> 2.83ms [CL 2501856 by Daniel Wright in Main branch]
603 lines
22 KiB
Plaintext
603 lines
22 KiB
Plaintext
// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
DistanceFieldSurfaceCacheLighting.usf
|
|
=============================================================================*/
|
|
|
|
#include "Common.usf"
|
|
#include "DeferredShadingCommon.usf"
|
|
#include "DistanceFieldLightingShared.usf"
|
|
#include "DistanceFieldAOShared.usf"
|
|
|
|
struct FObjectCullVertexOutput
|
|
{
|
|
nointerpolation float4 PositionAndRadius : TEXCOORD0;
|
|
nointerpolation uint ObjectIndex : TEXCOORD1;
|
|
};
|
|
|
|
float ConservativeRadiusScale;
|
|
|
|
/** Used when culling objects into screenspace tile lists */
|
|
void ObjectCullVS(
|
|
float4 InPosition : ATTRIBUTE0,
|
|
uint ObjectIndex : SV_InstanceID,
|
|
out FObjectCullVertexOutput Output,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
//@todo - implement ConservativelyBoundSphere
|
|
float4 ObjectPositionAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
|
//@todo - expand to handle conservative rasterization
|
|
float EffectiveRadius = (ObjectPositionAndRadius.w + AOMaxDistance) * ConservativeRadiusScale;
|
|
float3 WorldPosition = InPosition.xyz * EffectiveRadius + ObjectPositionAndRadius.xyz;
|
|
OutPosition = mul(float4(WorldPosition, 1), View.WorldToClip);
|
|
Output.PositionAndRadius = ObjectPositionAndRadius;
|
|
Output.ObjectIndex = ObjectIndex;
|
|
}
|
|
|
|
RWBuffer<uint> RWTileHeadDataUnpacked;
|
|
RWBuffer<uint> RWTileArrayData;
|
|
|
|
Buffer<float4> TileConeAxisAndCos;
|
|
Buffer<float4> TileConeDepthRanges;
|
|
|
|
float2 NumGroups;
|
|
|
|
/** Intersects a single object with the tile and adds to the intersection list if needed. */
|
|
void ObjectCullPS(
|
|
FObjectCullVertexOutput Input,
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
OutColor = 0;
|
|
|
|
uint2 TilePosition = (uint2)SVPos.xy;
|
|
uint TileIndex = TilePosition.y * NumGroups.x + TilePosition.x;
|
|
float4 ConeAxisAndCos = TileConeAxisAndCos.Load(TileIndex);
|
|
float4 ConeAxisDepthRanges = TileConeDepthRanges.Load(TileIndex);
|
|
float3 TileConeVertex = 0;
|
|
float3 TileConeAxis = ConeAxisAndCos.xyz;
|
|
float TileConeAngleCos = ConeAxisAndCos.w;
|
|
float TileConeAngleSin = sqrt(1 - TileConeAngleCos * TileConeAngleCos);
|
|
|
|
// A value of 1 is conservative, but has a huge impact on performance
|
|
float RadiusScale = .5f;
|
|
float4 SphereCenterAndRadius = Input.PositionAndRadius;
|
|
float3 ViewSpaceSphereCenter = mul(float4(SphereCenterAndRadius.xyz + View.PreViewTranslation.xyz, 1), View.TranslatedWorldToView).xyz;
|
|
|
|
int SmallestGroupIndex = -1;
|
|
|
|
UNROLL
|
|
for (int GroupIndex = NUM_CULLED_OBJECT_LISTS - 1; GroupIndex >= 0; GroupIndex--)
|
|
{
|
|
uint StartIndex;
|
|
uint EndIndex;
|
|
GetPhaseParameters(GroupIndex, StartIndex, EndIndex);
|
|
float GroupMaxSampleRadius = GetStepOffset(EndIndex) * 2 * RadiusScale;
|
|
|
|
BRANCH
|
|
if (SphereIntersectConeWithDepthRanges(float4(ViewSpaceSphereCenter, SphereCenterAndRadius.w + GroupMaxSampleRadius), TileConeVertex, TileConeAxis, TileConeAngleCos, TileConeAngleSin, ConeAxisDepthRanges))
|
|
{
|
|
SmallestGroupIndex = GroupIndex;
|
|
}
|
|
}
|
|
|
|
if (SmallestGroupIndex >= 0)
|
|
{
|
|
uint ArrayIndex;
|
|
InterlockedAdd(RWTileHeadDataUnpacked[TileIndex * 4 + 1 + (uint)SmallestGroupIndex], 1U, ArrayIndex);
|
|
|
|
if (ArrayIndex < MAX_OBJECTS_PER_TILE)
|
|
{
|
|
// Note: indexing so that threads are writing to RWTileArrayData coherently, has a huge impact on speed, even though the array data for one record is no longer dense
|
|
uint DataIndex = (ArrayIndex * (uint)(NumGroups.x * NumGroups.y + .5f) + TileIndex) * NUM_CULLED_OBJECT_LISTS + SmallestGroupIndex;
|
|
|
|
RWTileArrayData[DataIndex] = Input.ObjectIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Computes the distance field normal, using a search through all the nearby objects to find the closest one, whose normal is used. */
|
|
void ComputeDistanceFieldNormalPS(
|
|
in float4 UVAndScreenPos : TEXCOORD0,
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
// Sample from the center of the top left full resolution texel
|
|
float2 ScreenUV = float2((floor(SVPos.xy) * DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + .5f) * View.ViewSizeAndSceneTexelSize.zw);
|
|
float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
FGBufferData GBufferData = GetGBufferData(ScreenUV);
|
|
|
|
OutColor = EncodeDownsampledGBuffer(GBufferData, SceneDepth);
|
|
}
|
|
|
|
RWTexture2D<float4> RWDistanceFieldNormal;
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void ComputeDistanceFieldNormalCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
float2 ScreenUV = float2((DispatchThreadId.xy * DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + .5f) * View.ViewSizeAndSceneTexelSize.zw);
|
|
float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
FGBufferData GBufferData = GetGBufferData(ScreenUV);
|
|
|
|
float4 OutValue = EncodeDownsampledGBuffer(GBufferData, SceneDepth);
|
|
RWDistanceFieldNormal[DispatchThreadId.xy] = OutValue;
|
|
}
|
|
|
|
void WriteDownsampledDepthPS(
|
|
in float4 UVAndScreenPos : TEXCOORD0,
|
|
out float4 OutColor : SV_Target0,
|
|
out float OutDepth : SV_DEPTH)
|
|
{
|
|
float2 DistanceFieldUVs = UVAndScreenPos.xy;
|
|
|
|
OutColor = 0;
|
|
OutDepth = ConvertToDeviceZ(GetDownsampledDepth(DistanceFieldUVs));
|
|
}
|
|
|
|
// For some reason gives innaccurate results at lower resolutions
|
|
#define USE_SCREENVECTOR_WORLD_POSITION FINAL_INTERPOLATION_PASS
|
|
|
|
struct FIrradianceCacheSplatVertexOutput
|
|
{
|
|
nointerpolation float4 PositionRadius : TEXCOORD0;
|
|
nointerpolation float4 NormalAndFade : TEXCOORD1;
|
|
#if FINAL_INTERPOLATION_PASS
|
|
nointerpolation float3 BentNormal : TEXCOORD2;
|
|
#if SUPPORT_IRRADIANCE
|
|
nointerpolation float3 Irradiance : TEXCOORD3;
|
|
#endif
|
|
#endif
|
|
#if USE_SCREENVECTOR_WORLD_POSITION
|
|
float3 ScreenVector : TEXCOORD5;
|
|
#endif
|
|
};
|
|
|
|
float InterpolationRadiusScale;
|
|
float2 NormalizedOffsetToPixelCenter;
|
|
float HackExpand;
|
|
|
|
// +1 = behind volume (safe from near plane clipping without depth testing),
|
|
// -1 = in front of volume (required for conservative results with depth testing)
|
|
//@todo - using -1 currently causes artifacts with architectural scenes
|
|
float InterpolationBoundingDirection;
|
|
|
|
/** Expands a screen-facing polygon to cover a surface cache record for splatting. */
|
|
void IrradianceCacheSplatVS(
|
|
float2 InPosition : ATTRIBUTE0,
|
|
float2 InUV : ATTRIBUTE1,
|
|
uint JobIndex : SV_InstanceID,
|
|
out FIrradianceCacheSplatVertexOutput Output,
|
|
out float4 OutPosition : SV_POSITION
|
|
)
|
|
{
|
|
float4 PositionAndPackedRadius = IrradianceCachePositionRadius[JobIndex];
|
|
|
|
float3 RecordPosition = PositionAndPackedRadius.xyz;
|
|
float RecordRadius = abs(PositionAndPackedRadius.w);
|
|
|
|
float OccluderRadius = IrradianceCacheOccluderRadius[JobIndex];
|
|
RecordRadius = min(RecordRadius, OccluderRadius);
|
|
|
|
float ViewToCenterLength = length(RecordPosition - View.ViewOrigin.xyz);
|
|
float3 NormalizedViewToCenter = (RecordPosition - View.ViewOrigin.xyz) / ViewToCenterLength;
|
|
|
|
// Only allow full interpolation expanding for small samples, it won't be noticeable for large samples but adds a lot of splatting cost
|
|
float EffectiveInterpolationRadiusScale = lerp(InterpolationRadiusScale, 1, max(saturate(RecordRadius / 80), .2f));
|
|
RecordRadius = max(RecordRadius, ViewToCenterLength * .01f) * EffectiveInterpolationRadiusScale;
|
|
|
|
float OffsetFromCenter = InterpolationBoundingDirection * RecordRadius;
|
|
// Distance from the camera position that NormalizedViewToCenter will intersect the near plane
|
|
float NearPlaneDistance = View.NearPlane / dot(View.ViewForward, NormalizedViewToCenter);
|
|
|
|
// Don't move closer than the near plane to avoid getting clipped
|
|
// Clamping at the vertex level only works because it's a screen-aligned triangle
|
|
// Small bias to push just off the near plane
|
|
if (ViewToCenterLength + OffsetFromCenter < NearPlaneDistance + .001f)
|
|
{
|
|
OffsetFromCenter = -ViewToCenterLength + NearPlaneDistance + .001f;
|
|
}
|
|
|
|
// Construct a virtual sample position that won't be near plane clipped and will have a positive z after projection
|
|
float3 VirtualPosition = RecordPosition + OffsetFromCenter * NormalizedViewToCenter;
|
|
// Clipping the edge of the circle a bit can save perf, but introduces artifacts in the interpolation
|
|
float RadiusSlack = 1.0f;
|
|
// Compute new radius since we moved the center away from the camera, approximate as similar triangles
|
|
// This is really incorrect because it's not taking into account perspective correction on a sphere - the widest part in screenspace is not at the world space center
|
|
float VirtualRadius = RadiusSlack * RecordRadius * (ViewToCenterLength + OffsetFromCenter) / ViewToCenterLength;
|
|
|
|
#if !FINAL_INTERPOLATION_PASS
|
|
// Combat the mysterious bug where samples from last frame do not cover this frame's pixel even with no camera movement, seems to be a precision issue
|
|
VirtualRadius += HackExpand * ViewToCenterLength * 4;
|
|
#endif
|
|
|
|
float4 RecordNormalAndFade = IrradianceCacheNormal[JobIndex];
|
|
|
|
float2 CornerUVs = InUV;
|
|
float3 CornerPosition = VirtualPosition + (View.ViewRight * CornerUVs.x + View.ViewUp * CornerUVs.y) * VirtualRadius;
|
|
|
|
OutPosition = mul(float4(CornerPosition.xyz, 1), View.WorldToClip);
|
|
|
|
#if USE_SCREENVECTOR_WORLD_POSITION
|
|
Output.ScreenVector = mul(float4(OutPosition.xy / OutPosition.w, 1, 0), View.ScreenToWorld).xyz;
|
|
#endif
|
|
|
|
// Move the vertex over to compensate for the difference between the low res pixel center and the high res pixel center where shading is done
|
|
OutPosition.xy += NormalizedOffsetToPixelCenter * OutPosition.w;
|
|
|
|
Output.PositionRadius = float4(RecordPosition, RecordRadius);
|
|
|
|
#if FADE_RECORDS_OVER_TIME
|
|
float Fade = 1 - (abs(RecordNormalAndFade.w) - 1);
|
|
/*
|
|
FLATTEN
|
|
if (PositionAndPackedRadius.w < 0)
|
|
{
|
|
// Fading out
|
|
Fade = 1 - Fade;
|
|
}
|
|
else
|
|
{
|
|
Fade = max(Fade, .001f);
|
|
}
|
|
*/
|
|
#else
|
|
float Fade = 1;
|
|
#endif
|
|
|
|
Output.NormalAndFade = float4(RecordNormalAndFade.xyz, Fade);
|
|
|
|
#if FINAL_INTERPOLATION_PASS
|
|
Output.BentNormal = IrradianceCacheBentNormal[JobIndex].xyz;
|
|
#if SUPPORT_IRRADIANCE
|
|
float ViewDistance = length(View.ViewOrigin.xyz - RecordPosition);
|
|
float FadeAlpha = saturate(.001f * (.95f * AOMaxViewDistance - ViewDistance));
|
|
Output.Irradiance = IrradianceCacheIrradiance[JobIndex].xyz * FadeAlpha;
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
float InterpolationAngleNormalization;
|
|
float InvMinCosPointBehindPlane;
|
|
|
|
/** Computes surface cache weighting between the current pixel and the record being splatted. */
|
|
void IrradianceCacheSplatPS(
|
|
FIrradianceCacheSplatVertexOutput Input,
|
|
in float4 SVPos : SV_POSITION
|
|
,out float4 OutBentNormal : SV_Target0
|
|
#if FINAL_INTERPOLATION_PASS && SUPPORT_IRRADIANCE
|
|
, out float4 OutIrradiance : SV_Target1
|
|
#endif
|
|
)
|
|
{
|
|
OutBentNormal = 0;
|
|
#if FINAL_INTERPOLATION_PASS && SUPPORT_IRRADIANCE
|
|
OutIrradiance = 0;
|
|
#endif
|
|
|
|
float2 BaseLevelScreenUV = float2(((floor(SVPos.xy)) * DownsampleFactorToBaseLevel + .5f) * BaseLevelTexelSize);
|
|
|
|
float3 WorldNormal;
|
|
float SceneDepth;
|
|
bool bHasDistanceFieldRepresentation;
|
|
bool bHasHeightfieldRepresentation;
|
|
GetDownsampledGBuffer(BaseLevelScreenUV, WorldNormal, SceneDepth, bHasDistanceFieldRepresentation, bHasHeightfieldRepresentation);
|
|
|
|
#if USE_SCREENVECTOR_WORLD_POSITION
|
|
|
|
float3 OpaqueWorldPosition = View.ViewOrigin.xyz + Input.ScreenVector * SceneDepth;
|
|
|
|
#else
|
|
|
|
float2 ScreenUV = float2(((floor(SVPos.xy)) * CurrentLevelDownsampleFactor + View.ViewRectMin.xy + float2(.5f, .5f)) * View.ViewSizeAndSceneTexelSize.zw);
|
|
SceneDepth = CalcSceneDepth(ScreenUV);
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition.xy * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
|
|
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
|
|
|
|
#endif
|
|
|
|
float Distance = length(OpaqueWorldPosition - Input.PositionRadius.xyz);
|
|
float DistanceError = saturate(Distance / Input.PositionRadius.w);
|
|
float Weight = 0;
|
|
|
|
BRANCH
|
|
if (DistanceError < 1 && SceneDepth < AOMaxViewDistance)
|
|
{
|
|
float3 RecordNormal = Input.NormalAndFade.xyz;
|
|
float NormalError = InterpolationAngleNormalization * sqrt(saturate(1 - dot(WorldNormal, RecordNormal)));
|
|
|
|
// Don't use a lighting record if it's in front of the query point.
|
|
// Query points behind the lighting record may have nearby occluders that the lighting record does not see.
|
|
// Offset the comparison point along the negative normal to prevent self-occlusion
|
|
float3 RecordToVertexVector = OpaqueWorldPosition - (Input.PositionRadius.xyz - 1 * RecordNormal.xyz);
|
|
float DistanceToVertex = length(RecordToVertexVector);
|
|
float PlaneDistance = dot(RecordNormal.xyz, RecordToVertexVector) / DistanceToVertex;
|
|
|
|
// Setup an error metric that goes from 0 if the points are coplanar, to 1 if the point being shaded is at the angle corresponding to MinCosPointBehindPlane behind the plane
|
|
float PointBehindPlaneError = min(max(PlaneDistance * InvMinCosPointBehindPlane, 0.0f), DistanceToVertex / 3.0f);
|
|
|
|
float PrecisionScale = .1f;
|
|
Weight = saturate(PrecisionScale * Input.NormalAndFade.w * (1 - max(DistanceError, max(NormalError, PointBehindPlaneError))));
|
|
//Weight = saturate(PrecisionScale * Input.NormalAndFade.w * (1 - DistanceError));
|
|
//Weight = Input.NormalAndFade.w;
|
|
|
|
float VisualizePlacement = Distance < .1f * Input.PositionRadius.w;
|
|
|
|
#if FINAL_INTERPOLATION_PASS
|
|
// Pixels without a distance field representation interpolate AO instead of a bent normal
|
|
float3 InterpolatedValue = bHasDistanceFieldRepresentation ? Input.BentNormal : length(Input.BentNormal).xxx;
|
|
OutBentNormal.rgb = InterpolatedValue * Weight;
|
|
#if SUPPORT_IRRADIANCE
|
|
OutIrradiance.rgb = Input.Irradiance * Weight;
|
|
#endif
|
|
#endif
|
|
OutBentNormal.a = Weight;
|
|
}
|
|
|
|
#define VISUALIZE_SPLAT_OVERDRAW 0
|
|
#if VISUALIZE_SPLAT_OVERDRAW && FINAL_INTERPOLATION_PASS
|
|
OutBentNormal.rgb = 0;
|
|
//OutBentNormal.a = Input.Position.w <= SceneDepth ? .01f : 0.0f;
|
|
OutBentNormal.a = .005f;// + (Weight > .001f ? .01f : 0);
|
|
#endif
|
|
|
|
#define VISUALIZE_RECORD_POINTS 0
|
|
#if VISUALIZE_RECORD_POINTS && FINAL_INTERPOLATION_PASS
|
|
OutBentNormal.rgb = 1;
|
|
OutBentNormal.a = (DistanceError < .05f) * .1f;
|
|
#endif
|
|
}
|
|
|
|
Buffer<float4> DebugBuffer;
|
|
|
|
// 1 / ((1 - FadeDistanceFraction) * AOMaxViewDistance)
|
|
float DistanceFadeScale;
|
|
float SelfOcclusionReplacement;
|
|
|
|
/** Normalizes the splatted surface cache values, packs depth in alpha. */
|
|
void AOCombinePS(
|
|
in float4 UVAndScreenPos : TEXCOORD0
|
|
,out float4 OutBentNormal : SV_Target0
|
|
#if SUPPORT_IRRADIANCE
|
|
,out float4 OutIrradiance : SV_Target1
|
|
#endif
|
|
)
|
|
{
|
|
#if SUPPORT_IRRADIANCE
|
|
OutIrradiance = 0;
|
|
#endif
|
|
|
|
float3 WorldNormal;
|
|
float SceneDepth;
|
|
bool bHasDistanceFieldRepresentation;
|
|
bool bHasHeightfieldRepresentation;
|
|
GetDownsampledGBuffer(UVAndScreenPos.xy, WorldNormal, SceneDepth, bHasDistanceFieldRepresentation, bHasHeightfieldRepresentation);
|
|
|
|
#define VISUALIZE_ACCUMULATED_WEIGHTS 0
|
|
#if VISUALIZE_ACCUMULATED_WEIGHTS
|
|
|
|
float3 BentNormalAO = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, UVAndScreenPos.xy, 0).aaa;
|
|
OutBentNormal = float4(BentNormalAO, SceneDepth);
|
|
|
|
#else
|
|
|
|
float4 BentNormalAccumulation = Texture2DSampleLevel(BentNormalAOTexture, BentNormalAOSampler, UVAndScreenPos.xy, 0);
|
|
|
|
if (BentNormalAccumulation.w > 0)
|
|
{
|
|
OutBentNormal.rgb = BentNormalAccumulation.xyz / BentNormalAccumulation.w;
|
|
OutBentNormal.a = SceneDepth;
|
|
|
|
if (!bHasDistanceFieldRepresentation)
|
|
{
|
|
// Pixels without a distance field representation interpolate AO instead of a bent normal
|
|
// Construct a bent normal from the interpolated AO
|
|
OutBentNormal.rgb = OutBentNormal.r * WorldNormal;
|
|
}
|
|
|
|
#if SUPPORT_IRRADIANCE
|
|
OutIrradiance.rgb = Texture2DSampleLevel(IrradianceTexture, IrradianceSampler, UVAndScreenPos.xy, 0).xyz / BentNormalAccumulation.w;
|
|
|
|
if (!bHasDistanceFieldRepresentation && !bHasHeightfieldRepresentation)
|
|
{
|
|
OutIrradiance.rgb *= SelfOcclusionReplacement;
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
OutBentNormal.rgb = float3(0, 0, .1f);
|
|
// Sign bit stores whether texel is valid
|
|
OutBentNormal.a = -SceneDepth;
|
|
}
|
|
|
|
float BentNormalLength = length(OutBentNormal.rgb);
|
|
// Fade to unoccluded in the distance
|
|
float FadeAlpha = saturate((AOMaxViewDistance - SceneDepth) * DistanceFadeScale);
|
|
float3 NormalizedBentNormal = OutBentNormal.rgb / max(BentNormalLength, .0001f);
|
|
OutBentNormal.rgb = NormalizedBentNormal * lerp(1, BentNormalLength, FadeAlpha);
|
|
|
|
// Clamp Nan's before they get to the filter
|
|
OutBentNormal.rgb = clamp(OutBentNormal.rgb, -1, 1);
|
|
|
|
FLATTEN
|
|
if (SceneDepth > AOMaxViewDistance)
|
|
{
|
|
// Mark as valid for the gap filling pass, so we don't get overwritten
|
|
OutBentNormal.a = abs(OutBentNormal.a);
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
RWTexture2D<float4> RWVisualizeMeshDistanceFields;
|
|
|
|
bool RayHitSphere( float3 RayOrigin, float3 UnitRayDirection, float3 SphereCenter, float SphereRadius )
|
|
{
|
|
float3 ClosestPointOnRay = max( 0, dot( SphereCenter - RayOrigin, UnitRayDirection ) ) * UnitRayDirection;
|
|
float3 CenterToRay = RayOrigin + ClosestPointOnRay - SphereCenter;
|
|
return dot( CenterToRay, CenterToRay ) <= Square( SphereRadius );
|
|
}
|
|
|
|
#define MAX_INTERSECTING_OBJECTS 512
|
|
groupshared uint IntersectingObjectIndices[MAX_INTERSECTING_OBJECTS];
|
|
|
|
groupshared uint NumIntersectingObjects;
|
|
|
|
void RayTraceThroughTileCulledDistanceFields(float3 WorldRayStart, float3 WorldRayEnd, float MaxRayTime, out float MinRayTime, out float TotalStepsTaken)
|
|
{
|
|
MinRayTime = MaxRayTime;
|
|
TotalStepsTaken = 0;
|
|
|
|
LOOP
|
|
for (uint ListObjectIndex = 0; ListObjectIndex < min(NumIntersectingObjects, MAX_INTERSECTING_OBJECTS); ListObjectIndex++)
|
|
{
|
|
uint ObjectIndex = IntersectingObjectIndices[ListObjectIndex];
|
|
float4 SphereCenterAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
|
|
|
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
|
|
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
|
|
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex);
|
|
float3 UVAdd = LoadObjectUVAdd(ObjectIndex);
|
|
|
|
float3 VolumeRayStart = mul(float4(WorldRayStart, 1), WorldToVolume).xyz;
|
|
float3 VolumeRayEnd = mul(float4(WorldRayEnd, 1), WorldToVolume).xyz;
|
|
float3 VolumeRayDirection = VolumeRayEnd - VolumeRayStart;
|
|
float VolumeRayLength = length(VolumeRayDirection);
|
|
VolumeRayDirection /= VolumeRayLength;
|
|
|
|
float2 IntersectionTimes = LineBoxIntersect(VolumeRayStart, VolumeRayEnd, -LocalPositionExtent, LocalPositionExtent);
|
|
|
|
if (IntersectionTimes.x < IntersectionTimes.y && IntersectionTimes.x < 1)
|
|
{
|
|
float SampleRayTime = IntersectionTimes.x * VolumeRayLength;
|
|
|
|
float MinDistance = 1000000;
|
|
float3 IntersectionPosition = float3(0, 0, 0);
|
|
|
|
uint StepIndex = 0;
|
|
uint MaxSteps = 256;
|
|
|
|
LOOP
|
|
for (; StepIndex < MaxSteps; StepIndex++)
|
|
{
|
|
float3 SampleVolumePosition = VolumeRayStart + VolumeRayDirection * SampleRayTime;
|
|
float3 ClampedSamplePosition = clamp(SampleVolumePosition, -LocalPositionExtent, LocalPositionExtent);
|
|
float3 VolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
|
|
float DistanceField = Texture3DSampleLevel(DistanceFieldTexture, DistanceFieldSampler, VolumeUV, 0).x;
|
|
MinDistance = min(MinDistance, DistanceField);
|
|
IntersectionPosition = SampleVolumePosition;
|
|
|
|
float MinStepSize = 1.0f / (4 * MaxSteps);
|
|
float StepDistance = max(DistanceField, MinStepSize);
|
|
SampleRayTime += StepDistance;
|
|
|
|
// Terminate the trace if we reached a negative area or went past the end of the ray
|
|
if (DistanceField < 0
|
|
|| SampleRayTime > IntersectionTimes.y * VolumeRayLength)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
//Result = max(Result, StepIndex / (float)MaxSteps);
|
|
|
|
if (MinDistance * UVScaleAndVolumeScale.w < 0 || StepIndex == MaxSteps)
|
|
{
|
|
MinRayTime = min(MinRayTime, SampleRayTime * UVScaleAndVolumeScale.w);
|
|
}
|
|
|
|
TotalStepsTaken += StepIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
|
|
void VisualizeMeshDistanceFieldCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
|
|
|
|
float2 ScreenUV = float2((DispatchThreadId.xy * DOWNSAMPLE_FACTOR + View.ViewRectMin.xy + .5f) * View.ViewSizeAndSceneTexelSize.zw);
|
|
float2 ScreenPosition = (ScreenUV.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
|
|
|
|
float SceneDepth = CalcSceneDepth(ScreenUV);
|
|
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
|
|
float3 OpaqueWorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
|
|
|
|
float TraceDistance = 20000;
|
|
float3 WorldRayStart = View.ViewOrigin.xyz;
|
|
float3 WorldRayEnd = WorldRayStart + normalize(OpaqueWorldPosition - View.ViewOrigin.xyz) * TraceDistance;
|
|
float3 WorldRayDirection = WorldRayEnd - WorldRayStart;
|
|
float3 UnitWorldRayDirection = normalize(WorldRayDirection);
|
|
|
|
if (ThreadIndex == 0)
|
|
{
|
|
NumIntersectingObjects = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint NumCulledObjects = GetCulledNumObjects();
|
|
|
|
LOOP
|
|
for (uint ObjectIndex = ThreadIndex; ObjectIndex < NumCulledObjects; ObjectIndex += THREADGROUP_TOTALSIZE)
|
|
{
|
|
float4 SphereCenterAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
|
|
|
//@todo - make independent of current pixel
|
|
BRANCH
|
|
if (RayHitSphere(WorldRayStart, UnitWorldRayDirection, SphereCenterAndRadius.xyz, SphereCenterAndRadius.w))
|
|
{
|
|
uint ListIndex;
|
|
InterlockedAdd(NumIntersectingObjects, 1U, ListIndex);
|
|
|
|
if (ListIndex < MAX_INTERSECTING_OBJECTS)
|
|
{
|
|
IntersectingObjectIndices[ListIndex] = ObjectIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float MinRayTime;
|
|
float TotalStepsTaken;
|
|
|
|
// Trace once to find the distance to first intersection
|
|
RayTraceThroughTileCulledDistanceFields(WorldRayStart, WorldRayEnd, TraceDistance, MinRayTime, TotalStepsTaken);
|
|
|
|
float TempMinRayTime;
|
|
// Recompute the ray end point
|
|
WorldRayEnd = WorldRayStart + UnitWorldRayDirection * MinRayTime;
|
|
// Trace a second time to only accumulate steps taken before the first intersection, improves visualization
|
|
RayTraceThroughTileCulledDistanceFields(WorldRayStart, WorldRayEnd, MinRayTime, TempMinRayTime, TotalStepsTaken);
|
|
|
|
float3 Result = saturate(TotalStepsTaken / 200.0f);
|
|
|
|
if (MinRayTime < TraceDistance)
|
|
{
|
|
Result += .1f;
|
|
}
|
|
|
|
RWVisualizeMeshDistanceFields[DispatchThreadId.xy] = float4(Result, 0);
|
|
}
|
|
|
|
Texture2D VisualizeDistanceFieldTexture;
|
|
SamplerState VisualizeDistanceFieldSampler;
|
|
|
|
void VisualizeDistanceFieldUpsamplePS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
|
|
{
|
|
// Distance field AO was computed at 0,0 regardless of viewrect min
|
|
float2 DistanceFieldUVs = UVAndScreenPos.xy - View.ViewRectMin.xy * View.ViewSizeAndSceneTexelSize.zw;
|
|
|
|
float3 Value = Texture2DSampleLevel(VisualizeDistanceFieldTexture, VisualizeDistanceFieldSampler, DistanceFieldUVs, 0).xyz;
|
|
|
|
OutColor = float4(Value, 1);
|
|
} |