UnrealEngineUWP/Engine/Shaders/Private/VirtualShadowMaps/ProjectionSpot.ush

// Copyright Epic Games, Inc. All Rights Reserved.

/*=============================================================================
ProjectionSpot.ush:
=============================================================================*/
#pragma once

#include "../DeferredShadingCommon.ush"
#include "../SceneTexturesCommon.ush"
#include "../LightShaderParameters.ush"
#include "../PathTracing/Utilities/PathTracingRandomSequence.ush"
#include "PageAccessCommon.ush"
#include "ProjectionCommon.ush"
#include "SMRTCommon.ush"

#ifndef SMRT_ADAPTIVE_RAY_COUNT
#define SMRT_ADAPTIVE_RAY_COUNT 0
#endif


struct FSMRTSingleRayState
{
	int VirtualShadowMapId;
	float3 RayStartUVz;
	float3 RayStepUVz;
	// Debug output
	int MipLevel;
	uint2 VirtualTexelAddress;
	uint2 PhysicalTexelAddress;
};

FSMRTSingleRayState SMRTSingleRayInitialize(
	int VirtualShadowMapId,
	float3 RayStart,
	float3 RayEnd)
{
	FVirtualShadowMapProjectionShaderData ProjectionData = GetVirtualShadowMapProjectionData(VirtualShadowMapId);

	float4 RayStartUVz = mul(float4(RayStart + ProjectionData.ShadowPreViewTranslation, 1), ProjectionData.TranslatedWorldToShadowUVMatrix).xyzw;
	float4 RayEndUVz   = mul(float4(RayEnd   + ProjectionData.ShadowPreViewTranslation, 1), ProjectionData.TranslatedWorldToShadowUVMatrix).xyzw;

	// NOTE: We assume by construction that ray ends are not behind the light near plane as the warping
	// due to SMRT ray tests gets severe long before the rays would otherwise clip at that plane.
	// If clipping is ever necessary, it must be done to the ray endpoints in world space so that lights that
	// need to walk multiple faces/clipmap levels in lock step can do it consistently.

	RayStartUVz.xyz = RayStartUVz.xyz / RayStartUVz.w;
	RayEndUVz.xyz = RayEndUVz.xyz / RayEndUVz.w;
	float3 RayStepUVz = RayEndUVz.xyz - RayStartUVz.xyz;

	FSMRTSingleRayState Result;
	Result.VirtualShadowMapId = VirtualShadowMapId;
	Result.RayStartUVz = RayStartUVz.xyz;
	Result.RayStepUVz = RayStepUVz.xyz;
	Result.MipLevel = 0;
	Result.VirtualTexelAddress = uint2(0xFFFFFFFF, 0xFFFFFFFF);
	Result.PhysicalTexelAddress = uint2(0xFFFFFFFF, 0xFFFFFFFF);
	return Result;
}

FSMRTSample SMRTFindSample(inout FSMRTSingleRayState RayState, float SampleTime)
{
	FSMRTSample Sample = (FSMRTSample)0;
	Sample.bValid = false;
	float3 UVz = RayState.RayStartUVz.xyz + RayState.RayStepUVz.xyz * SampleTime;

	if (all(UVz.xy == saturate(UVz.xy)))
	{
		FVirtualShadowMapSample SmSample = SampleVirtualShadowMap(RayState.VirtualShadowMapId, UVz.xy);
		if (SmSample.bValid)
		{
			Sample.bValid = true;
			Sample.SampleDepth = SmSample.Depth;
			Sample.ReferenceDepth = UVz.z;
			Sample.CompareToleranceScale = RayState.RayStepUVz.z;
			// Debug output
			RayState.MipLevel = SmSample.MipLevel;
			RayState.PhysicalTexelAddress = SmSample.PhysicalTexelAddress;
			RayState.VirtualTexelAddress = SmSample.VirtualTexelAddress;
		}
	}

	return Sample;
}

float SMRTComputeOccluderDistance(FSMRTSingleRayState RayState, float SampleDepth)
{
	// Avoid keeping around the registers
	float4x4 ViewToClip = GetVirtualShadowMapProjectionData(RayState.VirtualShadowMapId).ShadowViewToClipMatrix;

	// View space Z delta is a good enough stand-in for euclidean distance for now given our bent ray tests and so on
	float OccluderViewZ = ViewToClip._43 / (SampleDepth            - ViewToClip._33);
	float ReceiverViewZ = ViewToClip._43 / (RayState.RayStartUVz.z - ViewToClip._33);
	float Result = ReceiverViewZ - OccluderViewZ;
	return max(VIRTUAL_SHADOW_MAP_MIN_OCCLUDER_DISTANCE, Result);
}

// Instantiate SMRTRayCast for FSMRTSingleRayState
#define SMRT_TEMPLATE_RAY_STRUCT FSMRTSingleRayState
#include "SMRTTemplate.ush"
#undef SMRT_TEMPLATE_RAY_STRUCT


struct FSMRTTwoCubeFaceRayState
{
	FSMRTSingleRayState Face0;
	FSMRTSingleRayState Face1;
	bool bSampleInFace0;
};

FSMRTTwoCubeFaceRayState SMRTTwoCubeFaceRayInitialize(
	int VirtualShadowMapId0,
	int VirtualShadowMapId1,
	float3 RayStart,
	float3 RayEnd)
{
	FSMRTTwoCubeFaceRayState Result;
	Result.Face0 = SMRTSingleRayInitialize(VirtualShadowMapId0, RayStart, RayEnd);
	Result.Face1 = SMRTSingleRayInitialize(VirtualShadowMapId1, RayStart, RayEnd);
	Result.bSampleInFace0 = false;
	return Result;
}

FSMRTSample SMRTFindSample(inout FSMRTTwoCubeFaceRayState RayState, float SampleTime)
{
	FSMRTSample Sample = SMRTFindSample(RayState.Face0, SampleTime);
	RayState.bSampleInFace0 = Sample.bValid;
	if (!RayState.bSampleInFace0)
	{
		Sample = SMRTFindSample(RayState.Face1, SampleTime);
	}
	return Sample;
}

float SMRTComputeOccluderDistance(FSMRTTwoCubeFaceRayState RayState, float SampleDepth)
{
	if (RayState.bSampleInFace0)
	{
		return SMRTComputeOccluderDistance(RayState.Face0, SampleDepth);
	}
	else
	{
		return SMRTComputeOccluderDistance(RayState.Face1, SampleDepth);
	}
}

// Instantiate SMRTRayCast for FSMRTTwoCubeFaceRayState
#define SMRT_TEMPLATE_RAY_STRUCT FSMRTTwoCubeFaceRayState
#include "SMRTTemplate.ush"
#undef SMRT_TEMPLATE_RAY_STRUCT


FSMRTResult ShadowRayCastSpotLight(
	int VirtualShadowMapId,
	float3 RayStart,
	float3 RayEnd,
	int NumSteps,
	half StepOffset,
	// To get debug data out
	inout FSMRTSingleRayState OutRayState)
{
	OutRayState = SMRTSingleRayInitialize(VirtualShadowMapId, RayStart, RayEnd);
	return SMRTRayCast(OutRayState, NumSteps, StepOffset);
}

FSMRTResult ShadowRayCastPointLight(
	int VirtualShadowMapId,
	float3 RayStart,
	float3 RayEnd,
	int NumSteps,
	half StepOffset,
	// To get debug data out
	inout FSMRTSingleRayState OutRayState)
{
	FVirtualShadowMapProjectionShaderData BaseProjectionData = GetVirtualShadowMapProjectionData(VirtualShadowMapId);

	uint RayStartFace = VirtualShadowMapGetCubeFace( RayStart + BaseProjectionData.ShadowPreViewTranslation );
	uint RayEndFace   = VirtualShadowMapGetCubeFace( RayEnd   + BaseProjectionData.ShadowPreViewTranslation );

	FSMRTResult Result;
	if (RayStartFace != RayEndFace)
	{
		FSMRTTwoCubeFaceRayState RayState = SMRTTwoCubeFaceRayInitialize(
			VirtualShadowMapId + RayStartFace,
			VirtualShadowMapId + RayEndFace,
			RayStart, RayEnd);
		Result = SMRTRayCast(RayState, NumSteps, StepOffset);

		if (RayState.bSampleInFace0)
		{
			OutRayState = RayState.Face0;
		}
		else
		{
			OutRayState = RayState.Face1;
		}
	}
	else
	{
		// Fast path: ray stays on a single cube map face (effectively a spot light)
		OutRayState = SMRTSingleRayInitialize(VirtualShadowMapId + RayEndFace, RayStart, RayEnd);
		Result = SMRTRayCast(OutRayState, NumSteps, StepOffset);
	}

	return Result;
}

// MaxBits version reduces unrolled iterations
float4 SobolSampler(uint SampleIndex, inout uint Seed, uint MaxBits)
{
	// first scramble the index to decorelate from other 4-tuples
	uint SobolIndex = FastOwenScrambling(SampleIndex, EvolveSobolSeed(Seed)) & ( (1U << MaxBits) - 1U );
	// now get Sobol' point from this index
	uint4 Result = uint4(SobolIndex, 0, 0, 0);
	UNROLL for (uint b = 0, v = 1; b < MaxBits; b++)
	{
		uint IndexBit = (SobolIndex >> b) & 1;
		Result.y ^= IndexBit * v;
		Result.zw ^= IndexBit * SobolMatrices[b];
		v ^= v << 1;
	}

	// finally scramble the points to avoid structured artifacts
	Result.x = FastOwenScrambling(Result.x, EvolveSobolSeed(Seed));
	Result.y = FastOwenScrambling(Result.y, EvolveSobolSeed(Seed));
	Result.z = FastOwenScrambling(Result.z, EvolveSobolSeed(Seed));
	Result.w = FastOwenScrambling(Result.w, EvolveSobolSeed(Seed));

	// output as float in [0,1) taking care not to skew the distribution
	// due to the non-uniform spacing of floats in this range
	return (Result >> 8) * 5.96046447754e-08; // * 2^-24
}


FVirtualShadowMapSampleResult TraceLocalLight(
	int VirtualShadowMapId,
	FLightShaderParameters Light,
	uint LightType,
	uint2 PixelPos,
	const float SceneDepth,
	float3 TranslatedWorldPosition,
	float RayStartOffset,
	uint MaxRayCount,
	uint SamplesPerRay,
	float CotMaxRayAngleFromLight,
	const half Noise,
	const bool bCullBackface)
{
	const half3 N = GetGBufferDataUint( PixelPos, true ).WorldNormal;
	const float3 WorldPosition = TranslatedWorldPosition - View.PreViewTranslation;

	float3 ToLight = Light.Position - WorldPosition;
	float DistSqr = dot(ToLight, ToLight);
	float Falloff = rcp(DistSqr + 1);

	float InvDist = rsqrt( DistSqr );
	float DistToLight = DistSqr * InvDist;

	half3 ConeAxis = ToLight * InvDist;

	half SinAlphaSqr = saturate(Pow2(Light.SourceRadius) * Falloff);
	half SinAlpha = sqrt(SinAlphaSqr);
	bool bBackFace = dot( N, ConeAxis ) < -SinAlpha;

	FVirtualShadowMapSampleResult Result = InitVirtualShadowMapSampleResult();
	Result.bValid = true;
	Result.ShadowFactor = 0.0f;

	half RayMissFactor = 0;
	if (!bBackFace || !bCullBackface)
	{
		float StepOffset = Noise;

		half ConeSin = Light.SourceRadius * InvDist;
		half ConeCos = sqrt( 1 - Pow2( ConeSin ) );

		uint TileID = MortonEncode( PixelPos );

		uint i = 0;
		for( ; i < MaxRayCount; i++ )
		{
			RandomSequence RandSequence;
#if 0
			RandomSequence_Initialize( RandSequence, PixelPos, i, View.StateFrameIndex, MaxRayCount );

			float4 RandSample = RandomSequence_GenerateSample4D( RandSequence );
#else
			// Combine pixel-level and sample-level bits into the sample index (visible structure will be hidden by owen scrambling of the index)
			RandSequence.SampleIndex = reversebits((65536 * View.StateFrameIndex + TileID) * MaxRayCount + i);
			RandSequence.SampleSeed = 0; // always use the same sequence

			float4 RandSample = SobolSampler( RandSequence.SampleIndex, RandSequence.SampleSeed, 8 );
#endif

			float2 E = RandSample.xy;

			half2 LightUV = UniformSampleDiskConcentricApprox( E ).xy;
			LightUV *= ConeSin;

			half SinTheta2 = dot( LightUV, LightUV );
			half SinTheta = sqrt( SinTheta2 );
			half CosTheta = sqrt( 1 - SinTheta2 );

			half3 Dir = TangentToWorld( half3( LightUV, CosTheta ), ConeAxis );

			// The further afield our rays go the poorer our approximation is as the "bend" due to our testing
			// against a shadow map instead of along the ray increases. Thus we avoid going all the way to the light
			// where the projection becomes extreme.

			// Clip ray to max angle to limit penumbra sizes
			float ClampedLength = DistToLight * 0.75 * saturate( 1.5 / ( CosTheta + CotMaxRayAngleFromLight * SinTheta ) );

			float3 RayStart = WorldPosition + Dir * RayStartOffset;
			float3 RayEnd = WorldPosition + Dir * ClampedLength;

			FSMRTResult SMRTResult;
			FSMRTSingleRayState RayState;	// For debug output
			if ( LightType == LIGHT_TYPE_SPOT )
			{
				SMRTResult = ShadowRayCastSpotLight(
					VirtualShadowMapId,
					RayStart,
					RayEnd,
					SamplesPerRay,
					StepOffset,
					RayState);
			}
			else
			{
				SMRTResult = ShadowRayCastPointLight(
					VirtualShadowMapId,
					RayStart,
					RayEnd,
					SamplesPerRay,
					StepOffset,
					RayState);
			}

			RayMissFactor += SMRTResult.bValidHit ? 0 : 1;

			// Debug output (DCE'd if not used)
			Result.ClipmapIndexOrMipLevel = RayState.MipLevel;
			Result.VirtualTexelAddress = RayState.VirtualTexelAddress;
			Result.PhysicalTexelAddress = RayState.PhysicalTexelAddress;
			Result.OccluderDistance = SMRTResult.OccluderDistance;

#if SMRT_ADAPTIVE_RAY_COUNT && COMPILER_SUPPORTS_WAVE_VOTE
			if( i == 0 )
			{
				bool bHit = SMRTResult.bValidHit;
				if( bHit )
				{
				#if 0
					//float WorldBluringRadius = ComputeLightSampleWorldBluringRadius( TranslatedWorldPosition, LIGHT_TYPE_SPOT, Light, SMRTResult.OccluderDistance );
					//float PixelBluringRadius = ProjectWorldDistanceToPixelDistance( SceneDepth, WorldBluringRadius );
					float WorldBluringRadius = Light.SourceRadius * SMRTResult.OccluderDistance / max( DistToLight - SMRTResult.OccluderDistance, 0.001 );
					float PixelBluringRadius = WorldBluringRadius * View.ViewSizeAndInvSize.x * 0.5 / ( SceneDepth * View.ClipToView[0][0] );
					bHit = PixelBluringRadius > 2;
				#else
					bHit = SMRTResult.OccluderDistance * Light.SourceRadius * ( 0.25 * View.ViewSizeAndInvSize.y / ( SceneDepth * View.ClipToView[1][1] ) ) > max( DistToLight - Result.OccluderDistance, 0.001 );
				#endif
				}

				// All lanes miss or hit something close enough to not be noisy.
				bool bAllLanesMiss = WaveActiveAllTrue( !bHit );
				if( bAllLanesMiss )
				{
					break;
				}
			}
			else if( i == 1 )
			{
				// After 2 iterations and all have hit, must be in umbra.
				bool bAllLanesHit = WaveActiveAllTrue( RayMissFactor == 0 );
				if( bAllLanesHit )
				{
					break;
				}
			}
#endif
			uint Seed = asuint( StepOffset );
			StepOffset = ( EvolveSobolSeed( Seed ) >> 8 ) * 5.96046447754e-08; // * 2^-24;
		}
		uint RayCount = min(i + 1, MaxRayCount);		// break vs regular for loop exit
		RayMissFactor /= float(RayCount);

		Result.ShadowFactor = RayMissFactor;
		Result.RayCount = RayCount;
	}

	return Result;
}

// Generate ray based on light source geometry
bool GenerateRayLocalLight(
	FLightShaderParameters Light,
	uint2 PixelPos,
	float3 TranslatedWorldPosition,
	float CotMaxRayAngleFromLight,
	uint RayIndex,
	uint RayCount,
	inout float3 OutRayStart,
	inout float3 OutRayEnd)
{
	const half3 N = GetGBufferDataUint(PixelPos, true).WorldNormal;
	const float3 WorldPosition = TranslatedWorldPosition - View.PreViewTranslation;

	float3 ToLight = Light.Position - WorldPosition;
	float DistSqr = dot(ToLight, ToLight);
	float Falloff = rcp(DistSqr + 1);

	float InvDist = rsqrt(DistSqr);
	float DistToLight = DistSqr * InvDist;

	half3 ConeAxis = ToLight * InvDist;

	half SinAlphaSqr = saturate(Pow2(Light.SourceRadius) * Falloff);
	half SinAlpha = sqrt(SinAlphaSqr);
	bool bBackFace = dot(N, ConeAxis) < -SinAlpha;

	if (!bBackFace)
	{
		half ConeSin = Light.SourceRadius * InvDist;
		half ConeCos = sqrt(1 - Pow2(ConeSin));

		uint TileID = MortonEncode(PixelPos);
		RandomSequence RandSequence;
		// Combine pixel-level and sample-level bits into the sample index (visible structure will be hidden by owen scrambling of the index)
		RandSequence.SampleIndex = reversebits((65536 * View.StateFrameIndex + TileID) * RayCount + RayIndex);
		RandSequence.SampleSeed = 0; // always use the same sequence

		float2 E = SobolSampler(RandSequence.SampleIndex, RandSequence.SampleSeed, 8).xy;
		half2 LightUV = UniformSampleDiskConcentricApprox(E).xy;
		LightUV *= ConeSin;

		half SinTheta2 = dot(LightUV, LightUV);
		half SinTheta = sqrt(SinTheta2);
		half CosTheta = sqrt(1 - SinTheta2);

		half3 Dir = TangentToWorld(half3(LightUV, CosTheta), ConeAxis);

		// The further afield our rays go the poorer our approximation is as the "bend" due to our testing
		// against a shadow map instead of along the ray increases. Thus we avoid going all the way to the light
		// where the projection becomes extreme.

		// Clip ray to max angle to limit penumbra sizes
		float ClampedLength = DistToLight * 0.75 * saturate(1.5 / (CosTheta + CotMaxRayAngleFromLight * SinTheta));

		OutRayStart = WorldPosition;
		OutRayEnd = WorldPosition + Dir * ClampedLength;
	}
	return !bBackFace;
}