Files
UnrealEngineUWP/Engine/Shaders/ReflectionEnvironmentComputeShaders.usf
Martin Mittring b58ae4285d * added new shading model Subsurface_Profile, can specify SubsurfaceProfile asset on the material, create it in content browser
no backscattering yet, might replace Preintegrated and Subsurface shading models, can be optimized, postprocess pass only runs if an object on the screen is using it, uses SeparableSSS by Jorge Jimenez and Diego Gutierrez

[CL 2236313 by Martin Mittring in Main branch]
2014-07-29 17:33:28 -04:00

756 lines
25 KiB
Plaintext

// Copyright 1998-2014 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
ReflectionEnvironmentComputeShaders - functionality to apply local cubemaps.
=============================================================================*/
#include "Common.usf"
#include "DeferredShadingCommon.usf"
#include "BRDF.usf"
#include "ReflectionEnvironmentShared.usf"
#if TILED_DEFERRED_CULL_SHADER
/** Cube map array of reflection captures. */
TextureCubeArray ReflectionEnvironmentColorTexture;
SamplerState ReflectionEnvironmentColorSampler;
#define THREADGROUP_TOTALSIZE (THREADGROUP_SIZEX * THREADGROUP_SIZEY)
// Workaround performance issue with shared memory bank collisions in GLSL
#if GL4_PROFILE
#define ATOMIC_REDUCTION 0
#else
#define ATOMIC_REDUCTION 0
#endif
#define VISUALIZE_OVERLAP 0
uint NumCaptures;
/** View rect min in xy, max in zw. */
uint4 ViewDimensions;
/** Min and Max depth for this tile. */
groupshared uint IntegerTileMinZ;
groupshared uint IntegerTileMaxZ;
/** Inner Min and Max depth for this tile. */
groupshared uint IntegerTileMinZ2;
groupshared uint IntegerTileMaxZ2;
/** Number of reflection captures affecting this tile, after culling. */
groupshared uint TileNumReflectionCaptures;
/** Indices into the capture data buffer of captures that affect this tile, computed by culling. */
groupshared uint TileReflectionCaptureIndices[MAX_CAPTURES];
/** Capture indices after sorting. */
groupshared uint SortedTileReflectionCaptureIndices[MAX_CAPTURES];
#if !ATOMIC_REDUCTION
groupshared float TileZ[THREADGROUP_TOTALSIZE];
#endif
void ComputeTileMinMax(uint ThreadIndex, float SceneDepth, out float MinTileZ, out float MaxTileZ, out float MinTileZ2, out float MaxTileZ2)
{
#if ATOMIC_REDUCTION
// Initialize per-tile variables
if (ThreadIndex == 0)
{
IntegerTileMinZ = 0x7F7FFFFF;
IntegerTileMaxZ = 0;
IntegerTileMinZ2 = 0x7F7FFFFF;
IntegerTileMaxZ2 = 0;
}
GroupMemoryBarrierWithGroupSync();
// Use shared memory atomics to build the depth bounds for this tile
// Each thread is assigned to a pixel at this point
InterlockedMin(IntegerTileMinZ, asuint(SceneDepth));
InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth));
GroupMemoryBarrierWithGroupSync();
MinTileZ = asfloat(IntegerTileMinZ);
MaxTileZ = asfloat(IntegerTileMaxZ);
float HalfZ = .5f * (MinTileZ + MaxTileZ);
// Compute a second min and max Z, clipped by HalfZ, so that we get two depth bounds per tile
// This results in more conservative tile depth bounds and fewer intersections
if (SceneDepth >= HalfZ)
{
InterlockedMin(IntegerTileMinZ2, asuint(SceneDepth));
}
if (SceneDepth <= HalfZ)
{
InterlockedMax(IntegerTileMaxZ2, asuint(SceneDepth));
}
GroupMemoryBarrierWithGroupSync();
MinTileZ2 = asfloat(IntegerTileMinZ2);
MaxTileZ2 = asfloat(IntegerTileMaxZ2);
#else
TileZ[ThreadIndex] = SceneDepth;
GroupMemoryBarrierWithGroupSync();
THREADGROUP_TOTALSIZE;
if (ThreadIndex < 32)
{
float Min = SceneDepth;
float Max = SceneDepth;
for ( int i = ThreadIndex+32; i< THREADGROUP_TOTALSIZE; i+=32)
{
Min = min( Min, TileZ[i]);
Max = max( Max, TileZ[i]);
}
TileZ[ThreadIndex] = Min;
TileZ[ThreadIndex + 32] = Max;
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < 8)
{
float Min = TileZ[ThreadIndex];
float Max = TileZ[ThreadIndex + 32];
Min = min( Min, TileZ[ThreadIndex + 8]);
Max = max( Max, TileZ[ThreadIndex + 40]);
Min = min( Min, TileZ[ThreadIndex + 16]);
Max = max( Max, TileZ[ThreadIndex + 48]);
Min = min( Min, TileZ[ThreadIndex + 24]);
Max = max( Max, TileZ[ThreadIndex + 56]);
TileZ[ThreadIndex + 64] = Min;
TileZ[ThreadIndex + 96] = Max;
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex == 0)
{
float Min = TileZ[64];
float Max = TileZ[96];
for ( int i = 1; i< 8; i++)
{
Min = min( Min, TileZ[i+64]);
Max = max( Max, TileZ[i+96]);
}
IntegerTileMinZ = asuint(Min);
IntegerTileMaxZ = asuint(Max);
}
GroupMemoryBarrierWithGroupSync();
MinTileZ = asfloat(IntegerTileMinZ);
MaxTileZ = asfloat(IntegerTileMaxZ);
float HalfZ = .5f * (MinTileZ + MaxTileZ);
MinTileZ2 = HalfZ;
MaxTileZ2 = HalfZ;
#endif
}
// Culls reflection captures in the scene with the current tile
// Outputs are stored in shared memory
void DoTileCulling(uint3 GroupId, uint ThreadIndex, float MinTileZ, float MaxTileZ, float MinTileZ2, float MaxTileZ2)
{
// Setup tile frustum planes
float2 TileScale = float2(ViewDimensions.zw - ViewDimensions.xy) * rcp(2 * float2(THREADGROUP_SIZEX, THREADGROUP_SIZEY));
float2 TileBias = TileScale - GroupId.xy;
float4 C1 = float4(View.ViewToClip._11 * TileScale.x, 0.0f, View.ViewToClip._31 * TileScale.x + TileBias.x, 0.0f);
float4 C2 = float4(0.0f, -View.ViewToClip._22 * TileScale.y, View.ViewToClip._32 * TileScale.y + TileBias.y, 0.0f);
float4 C4 = float4(0.0f, 0.0f, 1.0f, 0.0f);
// TODO transform to world space
#if ATOMIC_REDUCTION
float4 frustumPlanes[8];
frustumPlanes[0] = C4 - C1;
frustumPlanes[1] = C4 + C1;
frustumPlanes[2] = C4 - C2;
frustumPlanes[3] = C4 + C2;
frustumPlanes[4] = float4(0.0f, 0.0f, 1.0f, -MinTileZ);
frustumPlanes[5] = float4(0.0f, 0.0f, -1.0f, MaxTileZ2);
frustumPlanes[6] = float4(0.0f, 0.0f, 1.0f, -MinTileZ2);
frustumPlanes[7] = float4(0.0f, 0.0f, -1.0f, MaxTileZ);
#else
float4 frustumPlanes[6];
frustumPlanes[0] = C4 - C1;
frustumPlanes[1] = C4 + C1;
frustumPlanes[2] = C4 - C2;
frustumPlanes[3] = C4 + C2;
frustumPlanes[4] = float4(0.0f, 0.0f, 1.0f, -MinTileZ);
frustumPlanes[5] = float4(0.0f, 0.0f, -1.0f, MaxTileZ);
#endif
// Normalize tile frustum planes
UNROLL
for (uint i = 0; i < 4; ++i)
{
frustumPlanes[i] *= rcp(length(frustumPlanes[i].xyz));
}
if (ThreadIndex == 0)
{
TileNumReflectionCaptures = 0;
}
GroupMemoryBarrierWithGroupSync();
// Compute per-tile lists of affecting captures through bounds culling
// Each thread now operates on a sample instead of a pixel
LOOP
for (uint CaptureIndex = ThreadIndex; CaptureIndex < NumCaptures && CaptureIndex < MAX_CAPTURES; CaptureIndex += THREADGROUP_TOTALSIZE)
{
float4 CapturePositionAndRadius = ReflectionCapture.PositionAndRadius[CaptureIndex];
float3 BoundsViewPosition = mul(float4(CapturePositionAndRadius.xyz + View.PreViewTranslation.xyz, 1), View.TranslatedWorldToView).xyz;
// Cull the light against the tile's frustum planes
// Note: this has some false positives, a light that is intersecting three different axis frustum planes yet not intersecting the volume of the tile will be treated as intersecting
bool bInTile = true;
// Test against the screen x and y oriented planes first
UNROLL
for (uint i = 0; i < 4; ++i)
{
float PlaneDistance = dot(frustumPlanes[i], float4(BoundsViewPosition, 1.0f));
bInTile = bInTile && (PlaneDistance >= -CapturePositionAndRadius.w);
}
BRANCH
if (bInTile)
{
#if ATOMIC_REDUCTION
bool bInNearDepthRange = true;
// Test against the near depth range
UNROLL
for (uint i = 4; i < 6; ++i)
{
float PlaneDistance = dot(frustumPlanes[i], float4(BoundsViewPosition, 1.0f));
bInNearDepthRange = bInNearDepthRange && (PlaneDistance >= -CapturePositionAndRadius.w);
}
bool bInFarDepthRange = true;
// Test against the far depth range
UNROLL
for (uint j = 6; j < 8; ++j)
{
float PlaneDistance = dot(frustumPlanes[j], float4(BoundsViewPosition, 1.0f));
bInFarDepthRange = bInFarDepthRange && (PlaneDistance >= -CapturePositionAndRadius.w);
}
bool bInDepthRange = bInNearDepthRange || bInFarDepthRange;
#else
bool bInDepthRange = true;
// Test against the depth range
UNROLL
for (uint i = 4; i < 6; ++i)
{
float PlaneDistance = dot(frustumPlanes[i], float4(BoundsViewPosition, 1.0f));
bInDepthRange = bInDepthRange && (PlaneDistance >= -CapturePositionAndRadius.w);
}
#endif
// Add this capture to the list of indices if it intersects
BRANCH
if (bInDepthRange)
{
uint ListIndex;
InterlockedAdd(TileNumReflectionCaptures, 1U, ListIndex);
TileReflectionCaptureIndices[ListIndex] = CaptureIndex;
}
}
}
GroupMemoryBarrierWithGroupSync();
uint NumCapturesAffectingTile = TileNumReflectionCaptures;
// Sort captures by their original capture index
// This is necessary because the culling used InterlockedAdd to generate compacted array indices,
// Which rearranged the original capture order, in which the captures were sorted smallest to largest on the CPU.
//@todo - parallel stream compaction could be faster than this
#define SORT_CAPTURES 1
#if SORT_CAPTURES
// O(N^2) simple parallel sort
LOOP
for (uint CaptureIndex2 = ThreadIndex; CaptureIndex2 < NumCapturesAffectingTile; CaptureIndex2 += THREADGROUP_TOTALSIZE)
{
// Sort by original capture index
int SortKey = TileReflectionCaptureIndices[CaptureIndex2];
uint NumSmaller = 0;
// Count how many items have a smaller key, so we can insert ourselves into the correct position, without requiring interaction between threads
for (uint OtherSampleIndex = 0; OtherSampleIndex < NumCapturesAffectingTile; OtherSampleIndex++)
{
int OtherSortKey = TileReflectionCaptureIndices[OtherSampleIndex];
if (OtherSortKey < SortKey)
{
NumSmaller++;
}
}
// Move this entry into its sorted position
SortedTileReflectionCaptureIndices[NumSmaller] = TileReflectionCaptureIndices[CaptureIndex2];
}
#endif
GroupMemoryBarrierWithGroupSync();
}
struct FLobe
{
float4 Color;
float3 Direction;
float Roughness;
float RayInfluance;
};
void GatherRadiance( inout FLobe Lobes[2], const uint NumLobes, float3 WorldPosition, float3 RayDirection )
{
float LobeMip[2];
UNROLL
for( uint Lobe = 0; Lobe < NumLobes; Lobe++ )
{
LobeMip[ Lobe ] = ComputeReflectionCaptureMipFromRoughness( Lobes[ Lobe ].Roughness );
}
uint NumCapturesAffectingTile = TileNumReflectionCaptures;
// Accumulate reflections from captures affecting this tile, applying largest captures first so that the smallest ones display on top
LOOP
for (uint TileCaptureIndex = 0; TileCaptureIndex < NumCapturesAffectingTile; TileCaptureIndex++)
{
#if SORT_CAPTURES
uint CaptureIndex = SortedTileReflectionCaptureIndices[TileCaptureIndex];
#else
uint CaptureIndex = TileReflectionCaptureIndices[TileCaptureIndex];
#endif
float4 CapturePositionAndRadius = ReflectionCapture.PositionAndRadius[CaptureIndex];
float4 CaptureProperties = ReflectionCapture.CaptureProperties[CaptureIndex];
float3 CaptureVector = WorldPosition - CapturePositionAndRadius.xyz;
float CaptureVectorLength = sqrt(dot(CaptureVector, CaptureVector));
BRANCH
if (CaptureVectorLength < CapturePositionAndRadius.w)
{
float NormalizedDistanceToCapture = saturate(CaptureVectorLength / CapturePositionAndRadius.w);
float3 ProjectedCaptureVector = RayDirection;
// Fade out based on distance to capture
float DistanceAlpha = 0;
#define PROJECT_ONTO_SHAPE 1
#if PROJECT_ONTO_SHAPE
#define SUPPORT_PLANE 0
#if SUPPORT_PLANE
BRANCH
// Plane
if (CaptureProperties.b > 1)
{
float4 ImagePlane = float4(ReflectionCapture.BoxTransform[CaptureIndex][0][0], ReflectionCapture.BoxTransform[CaptureIndex][1][0], ReflectionCapture.BoxTransform[CaptureIndex][2][0], ReflectionCapture.BoxTransform[CaptureIndex][3][0]);
float VectorDotPlaneNormal = dot(ImagePlane.xyz, RayDirection);
// VectorDotPlaneNormal < 0 means the ray hit the front face
BRANCH
if (VectorDotPlaneNormal < 0)
{
float PlaneDistance = dot(ImagePlane.xyz, WorldPosition) - ImagePlane.w;
// Time along the ray defined by WorldPosition + IntersectionTime * RayDirection that the intersection took place
float IntersectionTime = -PlaneDistance / VectorDotPlaneNormal;
BRANCH
// Skip intersections behind the pixel being shaded
if (IntersectionTime > 0)
{
// Calculate the world space intersection position
float3 IntersectPosition = WorldPosition + IntersectionTime * ReflectionVector;
float2 ReflectionUVs;
float4 CurrentReflectionXAxis = float4(ReflectionCapture.BoxTransform[CaptureIndex][0][1], ReflectionCapture.BoxTransform[CaptureIndex][1][1], ReflectionCapture.BoxTransform[CaptureIndex][2][1], ReflectionCapture.BoxTransform[CaptureIndex][3][1]);
float3 CurrentImageReflectionOrigin = CapturePositionAndRadius.xyz;
float XLength = length(CurrentReflectionXAxis.xyz);
float3 NormalizedXAxis = CurrentReflectionXAxis.xyz / XLength;
// Calculate the quad UVs by projecting the vector from the intersection to the quad origin onto each quad axis
ReflectionUVs.x = dot(NormalizedXAxis, IntersectPosition - CurrentImageReflectionOrigin.xyz);
float3 ReflectionYAxis = cross(ImagePlane.xyz, NormalizedXAxis) * CurrentReflectionXAxis.w;
ReflectionUVs.y = dot(ReflectionYAxis, IntersectPosition - CurrentImageReflectionOrigin.xyz);
ReflectionUVs = .5f * ReflectionUVs / float2(XLength, XLength * CurrentReflectionXAxis.w) + .5f;
ProjectedCaptureVector = IntersectPosition - CapturePositionAndRadius.xyz;
if (ReflectionUVs.x > 0 && ReflectionUVs.x < 1 && ReflectionUVs.y > 0 && ReflectionUVs.y < 1)
{
CompositedLighting.rgb += 1;
}
}
}
}
else
#endif
// Box
BRANCH if (CaptureProperties.b > 0)
{
// Transform the ray into the local space of the box, where it is an AABB with mins at -1 and maxs at 1
float3 LocalRayStart = mul(float4(WorldPosition, 1), ReflectionCapture.BoxTransform[CaptureIndex]).xyz;
float3 LocalRayDirection = mul(float4(RayDirection, 0), ReflectionCapture.BoxTransform[CaptureIndex]).xyz;
float3 InvRayDir = rcp(LocalRayDirection);
//find the ray intersection with each of the 3 planes defined by the minimum extrema.
float3 FirstPlaneIntersections = -InvRayDir - LocalRayStart * InvRayDir;
//find the ray intersection with each of the 3 planes defined by the maximum extrema.
float3 SecondPlaneIntersections = InvRayDir - LocalRayStart * InvRayDir;
//get the furthest of these intersections along the ray
float3 FurthestPlaneIntersections = max(FirstPlaneIntersections, SecondPlaneIntersections);
//clamp the intersections to be between RayOrigin and RayEnd on the ray
float Intersection = min(FurthestPlaneIntersections.x, min(FurthestPlaneIntersections.y, FurthestPlaneIntersections.z));
// Compute the reprojected vector
float3 IntersectPosition = WorldPosition + Intersection * RayDirection;
ProjectedCaptureVector = IntersectPosition - CapturePositionAndRadius.xyz;
// Compute the distance from the receiving pixel to the box for masking
// Apply local to world scale to take scale into account without transforming back to world space
// Shrink the box by the transition distance (BoxScales.w) so that the fade happens inside the box influence area
float4 BoxScales = ReflectionCapture.BoxScales[CaptureIndex];
float BoxDistance = ComputeDistanceFromBoxToPoint(-(BoxScales.xyz - .5f * BoxScales.w), BoxScales.xyz - .5f * BoxScales.w, LocalRayStart * BoxScales.xyz);
// Setup a fade based on receiver distance to the box, hides the box influence shape
DistanceAlpha = 1.0 - smoothstep(0, .7f * BoxScales.w, BoxDistance);
}
// Sphere
else
{
float ProjectionSphereRadius = CapturePositionAndRadius.w * 1.2f;
float SphereRadiusSquared = ProjectionSphereRadius * ProjectionSphereRadius;
float3 ReceiverToSphereCenter = WorldPosition - CapturePositionAndRadius.xyz;
float ReceiverToSphereCenterSq = dot(ReceiverToSphereCenter, ReceiverToSphereCenter);
// Find the intersection between the ray along the reflection vector and the capture's sphere
float3 QuadraticCoef;
QuadraticCoef.x = 1;
QuadraticCoef.y = 2 * dot(RayDirection, ReceiverToSphereCenter);
QuadraticCoef.z = ReceiverToSphereCenterSq - SphereRadiusSquared;
float Determinant = QuadraticCoef.y * QuadraticCoef.y - 4 * QuadraticCoef.z;
// Only continue if the ray intersects the sphere
if (Determinant >= 0)
{
float FarIntersection = (sqrt(Determinant) - QuadraticCoef.y) * 0.5;
float3 IntersectPosition = WorldPosition + FarIntersection * RayDirection;
ProjectedCaptureVector = IntersectPosition - CapturePositionAndRadius.xyz;
// Note: some compilers don't handle smoothstep min > max (this was 1, .6)
DistanceAlpha = 1.0 - smoothstep(.6, 1, NormalizedDistanceToCapture);
}
}
#else
DistanceAlpha = 1.0;
#endif
float CaptureArrayIndex = CaptureProperties.g;
float Opacity = 0;
UNROLL
for( uint Lobe = 0; Lobe < NumLobes; Lobe++ )
{
float3 SampleDir = lerp( Lobes[ Lobe ].Direction, ProjectedCaptureVector, Lobes[ Lobe ].RayInfluance );
float4 Sample = ReflectionEnvironmentColorTexture.SampleLevel( ReflectionEnvironmentColorSampler, float4( SampleDir, CaptureArrayIndex ), LobeMip[ Lobe ] );
Sample.rgb *= CaptureProperties.r;
Sample *= DistanceAlpha;
// Under operator (back to front)
Lobes[ Lobe ].Color.rgb += Sample.rgb * Lobes[ Lobe ].Color.a;
Lobes[ Lobe ].Color.a *= 1 - Sample.a;
Opacity += Lobes[ Lobe ].Color.a;
}
BRANCH
if( Opacity < 0.001 )
{
break;
}
}
}
#define APPLY_SKY_LIGHT 1
#if APPLY_SKY_LIGHT
float Opacity = 0;
UNROLL
for( uint Lobe = 0; Lobe < NumLobes; Lobe++ )
{
Opacity += Lobes[ Lobe ].Color.a;
}
BRANCH
if( SkyLightParameters.y > 0 && Opacity >= 0.001 )
{
UNROLL
for( uint Lobe = 0; Lobe < NumLobes; Lobe++ )
{
// TODO use LobeMip
Lobes[ Lobe ].Color.rgb += Lobes[ Lobe ].Color.a * GetSkyLightReflection( Lobes[ Lobe ].Direction, Lobes[ Lobe ].Roughness, USE_LIGHTMAPS );
}
}
#endif
}
float CountOverlap( float3 WorldPosition )
{
float Overlap = 0;
float Opacity = 1;
uint NumCapturesAffectingTile = TileNumReflectionCaptures;
// Accumulate reflections from captures affecting this tile, applying largest captures first so that the smallest ones display on top
LOOP
for (uint TileCaptureIndex = 0; TileCaptureIndex < NumCapturesAffectingTile; TileCaptureIndex++)
{
#if SORT_CAPTURES
uint CaptureIndex = SortedTileReflectionCaptureIndices[TileCaptureIndex];
#else
uint CaptureIndex = TileReflectionCaptureIndices[TileCaptureIndex];
#endif
float4 CapturePositionAndRadius = ReflectionCapture.PositionAndRadius[CaptureIndex];
float3 CaptureVector = WorldPosition - CapturePositionAndRadius.xyz;
float CaptureVectorLength = sqrt(dot(CaptureVector, CaptureVector));
BRANCH
if (CaptureVectorLength < CapturePositionAndRadius.w)
{
float NormalizedDistanceToCapture = saturate(CaptureVectorLength / CapturePositionAndRadius.w);
// Fade out based on distance to capture
float DistanceAlpha = 1.0 - smoothstep(.6, 1, NormalizedDistanceToCapture);
Overlap += 1;
Opacity *= 1 - DistanceAlpha;
BRANCH
if( Opacity < 0.001 )
{
break;
}
}
}
return Overlap;
}
Texture2D ScreenSpaceReflections;
Texture2D InSceneColor;
/** Output HDR target. */
RWTexture2D<float4> RWOutSceneColor;
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void ReflectionEnvironmentTiledDeferredMain(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
uint2 PixelPos = DispatchThreadId.xy + ViewDimensions.xy;
float2 ScreenUV = (float2(DispatchThreadId.xy) + .5f) / (ViewDimensions.zw - ViewDimensions.xy);
float2 ScreenPosition = float2(2.0f, -2.0f) * ScreenUV + float2(-1.0f, 1.0f);
float SceneDepth = CalcSceneDepth(PixelPos);
float MinTileZ;
float MaxTileZ;
float MinTileZ2;
float MaxTileZ2;
ComputeTileMinMax(ThreadIndex, SceneDepth, MinTileZ, MaxTileZ, MinTileZ2, MaxTileZ2);
DoTileCulling(GroupId, ThreadIndex, MinTileZ, MaxTileZ, MinTileZ2, MaxTileZ2);
// Lookup GBuffer properties once per pixel
FScreenSpaceData ScreenSpaceData = GetScreenSpaceDataUint(PixelPos);
FGBufferData GBuffer = ScreenSpaceData.GBuffer;
float4 HomogeneousWorldPosition = mul(float4(ScreenPosition * SceneDepth, SceneDepth, 1), View.ScreenToWorld);
float3 WorldPosition = HomogeneousWorldPosition.xyz / HomogeneousWorldPosition.w;
float3 CameraToPixel = normalize(WorldPosition - View.ViewOrigin.xyz);
float3 ReflectionVector = reflect(CameraToPixel, GBuffer.WorldNormal);
// Save GPRs by using R instead of V
float NoV = saturate( dot( GBuffer.WorldNormal, ReflectionVector ) );
FLobe Lobes[2];
UNROLL
for( uint Lobe = 0; Lobe < 2; Lobe++ )
{
Lobes[ Lobe ].Color = float4(0, 0, 0, 1);
Lobes[ Lobe ].Direction = ReflectionVector;
Lobes[ Lobe ].RayInfluance = 1;
Lobes[ Lobe ].Roughness = GBuffer.Roughness;
}
#if VISUALIZE_OVERLAP
float Overlap = CountOverlap( WorldPosition );
#else
#if 1//APPLY_SSR
float4 SSR = ScreenSpaceReflections.Load( int3(PixelPos, 0) );
Lobes[0].Color.rgb = SSR.rgb;
Lobes[0].Color.a = 1 - SSR.a;
#endif
#if USE_LIGHTMAPS
// We have high frequency directional data but low frequency spatial data in the envmap.
// We have high frequency spatial data but low frequency directional data in the lightmap.
// So, we combine the two for the best of both. This is done by removing the low spatial frequencies from the envmap and replacing them with the lightmap data.
// This is only done with luma so as to not get odd color shifting.
// Note: make sure this matches the lightmap mixing done for translucency (BasePassPixelShader.usf)
Lobes[0].Color.a *= GBuffer.IndirectIrradiance;
Lobes[1].Color.a *= GBuffer.IndirectIrradiance;
#else
// Diffuse lobe
Lobes[1].Direction = GBuffer.WorldNormal;
Lobes[1].RayInfluance = 0;
Lobes[1].Roughness = 1;
#endif
float AO = ScreenSpaceData.AmbientOcclusion;
float SpecularOcclusion = saturate( Square( NoV + AO ) - 1 + AO );
Lobes[0].Color.a *= SpecularOcclusion;
Lobes[1].Color.a *= AO;
BRANCH //[forcecase]
switch( GBuffer.ShadingModelID )
{
case SHADINGMODELID_UNLIT:
break;
case SHADINGMODELID_DEFAULT_LIT:
case SHADINGMODELID_SUBSURFACE:
case SHADINGMODELID_PREINTEGRATED_SKIN:
case SHADINGMODELID_SUBSURFACE_PROFILE:
{
BRANCH
if( GBuffer.ShadingModelID == SHADINGMODELID_SUBSURFACE )
{
GBuffer.DiffuseColor += ExtractSubsurfaceColor(GBuffer);
}
#if USE_LIGHTMAPS
GBuffer.DiffuseColor = 0;
#endif
GBuffer.SpecularColor = EnvBRDF( GBuffer.SpecularColor, GBuffer.Roughness, NoV );
GatherRadiance( Lobes, USE_LIGHTMAPS ? 1 : 2, WorldPosition, ReflectionVector );
Lobes[0].Color.rgb *= GBuffer.SpecularColor;
Lobes[1].Color.rgb *= GBuffer.DiffuseColor;
break;
}
case SHADINGMODELID_CLEAR_COAT:
#if 1
{
const float ClearCoat = GBuffer.CustomData.x;
const float ClearCoatRoughness = GBuffer.CustomData.y;
#if USE_LIGHTMAPS
Lobes[0].Roughness = ClearCoatRoughness;
#else
Lobes[0].Roughness = lerp( GBuffer.Roughness, ClearCoatRoughness, ClearCoat );
Lobes[1].Roughness = lerp( 1, GBuffer.Roughness, GBuffer.Metallic * ClearCoat );
Lobes[1].Direction = lerp( GBuffer.WorldNormal, ReflectionVector, ClearCoat );
Lobes[1].RayInfluance = ClearCoat;
#endif
// TODO EnvBRDF should have a mask param
float2 AB = PreIntegratedGF.SampleLevel( PreIntegratedGFSampler, float2( NoV, GBuffer.Roughness ), 0 ).rg;
GBuffer.SpecularColor = GBuffer.SpecularColor * AB.x + AB.y * saturate( 50 * GBuffer.SpecularColor.g ) * (1 - ClearCoat);
// F_Schlick
float F0 = 0.04;
float Fc = pow( 1 - NoV, 5 );
float F = Fc + (1 - Fc) * F0;
F *= ClearCoat;
float LayerAttenuation = (1 - F);
#if USE_LIGHTMAPS
Lobes[0].Color.a *= F;
Lobes[1].Color.a *= LayerAttenuation;
float3 Lobe0Reflectance = 1;
float3 Lobe1Reflectance = GBuffer.SpecularColor;
#else
float3 Lobe0Reflectance = lerp( GBuffer.SpecularColor, F, ClearCoat );
float3 Lobe1Reflectance = ( GBuffer.DiffuseColor + GBuffer.SpecularColor * ClearCoat ) * LayerAttenuation;
#endif
GatherRadiance( Lobes, 2, WorldPosition, ReflectionVector );
Lobes[0].Color.rgb *= Lobe0Reflectance;
Lobes[1].Color.rgb *= Lobe1Reflectance;
break;
}
#endif
default:
break;
}
#endif
// Only write to the buffer for threads inside the view
BRANCH
if (all(DispatchThreadId.xy < ViewDimensions.zw))
{
float4 OutColor = 0;
#if VISUALIZE_OVERLAP
//OutColor.rgb = 0.1 * TileNumReflectionCaptures;
OutColor.rgb = 0.1 * Overlap;
#else
OutColor.rgb += Lobes[0].Color.rgb;
OutColor.rgb += Lobes[1].Color.rgb;
#endif
// Transform NaNs to black, transform negative colors to black.
OutColor.rgb = -min(-OutColor.rgb, 0.0);
// alpha channel is also added to keep the alpha channel for screen space subsurface scattering
OutColor += InSceneColor.Load( int3(PixelPos, 0) );
RWOutSceneColor[PixelPos.xy] = OutColor;
}
}
#endif