UnrealEngineUWP/Engine/Shaders/PostProcessAmbientOcclusion.usf

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.

/*=============================================================================
	PostprocessAmbientOcclusion.usf: To generate ambient occlusion as a postprocess
=============================================================================*/

#include "Common.usf"
#include "PostProcessCommon.usf"
#include "DeferredShadingCommon.usf"

// set by C++:
//
// 0:low / 1: medium / 2:high
// SAMPLESET_QUALITY
//
// 0:no / 1:yes
// USE_AO_SETUP_AS_INPUT
//
// 0:no / 1:yes
// USE_UPSAMPLE

// 0:normal method using depth computed in setup passes (might need a clear outside the viewport, test with stereo/splitscreen) / 1:faster but slightly lower quality
#define USE_HZB	1

// 0: classic with weighted sample, 1: don't normalize and adjust the formula to be simpler and faster - can look better and is cheaper (Alchemy like?)
#define OPTIMIZATION_O1 1

// 1:lowest quality, 2:medium , 3:high, more doesn't give too much (maybe HZB mip computations should be adjusted)
//#define SAMPLE_STEPS 3

// 0:does not use normals (slightly faster, lower quality, WIP = not finished), 1: uses normals (slower but higher quality)
#define SSAO_NORMAL_AFFECTED 1

// ambient occlusion
// AO_SAMPLE_QUALITY = 0 : no AO sampling, only upsampling
// AO_SAMPLE_QUALITY = 1 : no dither/per pixel randomization
// AO_SAMPLE_QUALITY = 2 : efficient high frequency 4x4 pattern without jitter for TemporalAA
// AO_SAMPLE_QUALITY = 3 : efficient high frequency 4x4 pattern with jitter for TemporalAA

#if SAMPLESET_QUALITY == 0
	// low
	#define USE_SAMPLESET 1
	#define SAMPLE_STEPS 1
#elif SAMPLESET_QUALITY == 1
	// medium
	#define USE_SAMPLESET 1
	#define SAMPLE_STEPS 3
#else // SAMPLESET_QUALITY == 2
	// high
	#define USE_SAMPLESET 3
	#define SAMPLE_STEPS 3
#endif

// 0:4 samples, 1:9 samples (only really noticable with dither usage ??)
//#define AO_UPSAMPLE_QUALITY

#if USE_AO_SETUP_AS_INPUT == 1
	// lower resolution
	#define AO_SAMPLE_QUALITY 3
	#undef USE_SAMPLESET
	#define USE_SAMPLESET 3
	#define AO_UPSAMPLE_QUALITY 1
#else
	// full resolution is expensive, do lower quality
	#define AO_SAMPLE_QUALITY 3
	#define AO_UPSAMPLE_QUALITY 0
#endif

// 0: 1 point (for testing)
// 1: 3 points
// 2: more evenly spread (5 points - slightly faster, stronger effect, better with multiple levels?)
// 3: near the surface very large, softly fading out (6 points)
//#define USE_SAMPLESET 2
#if USE_SAMPLESET == 0
	#define SAMPLESET_ARRAY_SIZE 1
	static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
	{
		// one sample, for testing
		float2(0.500, 0.500),
	};
#elif USE_SAMPLESET == 1
	#define SAMPLESET_ARRAY_SIZE 3
	static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
	{
		// 3 points distributed on the unit disc, spiral order and distance
		float2(0, -1.0f) * 0.43f,
		float2(0.58f, 0.814f) * 0.7f,
		float2(-0.58f, 0.814f)
	};
#elif USE_SAMPLESET == 2
	#define SAMPLESET_ARRAY_SIZE 5
	static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
	{
		// 5 points distributed on a ring
		float2(0.156434, 0.987688),
		float2(0.987688, 0.156434)*0.9,
		float2(0.453990, -0.891007)*0.8,
		float2(-0.707107, -0.707107)*0.7,
		float2(-0.891006, 0.453991)*0.65,
	};
#else // USE_SAMPLESET == 3
	#define SAMPLESET_ARRAY_SIZE 6
	static const float2 OcclusionSamplesOffsets[SAMPLESET_ARRAY_SIZE]=
	{
		// 6 points distributed on the unit disc, spiral order and distance
		float2(0.000, 0.200),
		float2(0.325, 0.101),
		float2(0.272, -0.396),
		float2(-0.385, -0.488),
		float2(-0.711, 0.274),
		float2(0.060, 0.900)
	};
#endif // USE_SAMPLESET


// [0]: .x:AmbientOcclusionPower, .y:AmbientOcclusionBias/BiasDistance, .z:1/AmbientOcclusionDistance, .w:AmbientOcclusionIntensity
// [1]: .xy:ViewportUVToRandomUV, .z:AORadiusInShader, .w:Ratio
// [2]: .x:ScaleFactor(e.g. 4 if current RT is a quarter in size), .y:InvThreshold, .z:ScaleRadiusInWorldSpace(0:VS/1:WS), .w:MipBlend
// [3]: .x:SSR StepCount, .y:SSR StepSize, .z:StaticFraction, .w: unused
// [4]: .x:Multipler for FadeDistance/Radius, .y:Additive for FadeDistance/Radius, .zw: unused
float4 ScreenSpaceAOandSSRShaderParams[5];

// needed to prevent AO seam near 16 bit float maximum, this feactor pushed the problem far out and it seems to not have a visual degradion nearby
const static float Constant_Float16F_Scale =  4096.0f * 32.0f;

// only for MainSetupPS()
// .x:ScaleFactor(e.g. 4 if current RT is a quarter in size), .y:InvThreshold, .zw: unused
float4 AmbientOcclusionSetupParams;

//
float4 NoiseScale;

/** RGBA8 linear texture containing random normals */
Texture2D RandomNormalTexture;
SamplerState RandomNormalTextureSampler;

float3 HzbUvAndStepMipLevelFactor;

// could be moved to a more central spot
// @param ScreenPos -1 .. 1
float3 ReconstructCSPos(float SceneDepth, float2 ScreenPos)
{
	return float3(ScreenPos * SceneDepth, SceneDepth);
}

// could be moved to a more central spot
float2 ReconstructSSPosFromCS(float3 In)
{
	return In.xy / In.z;
}

// could be moved to a more central spot
// can be optimized
// @param InputSize e.g. PostprocessInput0Size
float2 ScreenPosToUV(float2 ScreenPos, float4 InputSize)
{
	return (ScreenPos * ScreenPosToPixel.xy + ScreenPosToPixel.zw + 0.5f) * InputSize.zw;
}

// 0: not similar .. 1:very similar
float ComputeDepthSimilarity(float DepthA, float DepthB, float TweakScale)
{
	return saturate(1 - abs(DepthA - DepthB) * TweakScale);
}

// downsample the input of the ambient occlusion pass for better performance, can take input from setup or another downsample pass
void MainSetupPS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor0 : SV_Target0)
{
	float2 InUV = UVAndScreenPos.xy;

#if INITIAL_PASS == 1
	float2 Pixel = PostprocessInput0Size.zw;
#else
	float2 Pixel = PostprocessInput1Size.zw;
#endif

	// can be optimized
	float2 UV[4];
	UV[0] = InUV + float2(-0.5f, -0.5f) * Pixel;
	UV[1] = InUV + float2( 0.5f, -0.5f) * Pixel;
	UV[2] = InUV + float2(-0.5f,  0.5f) * Pixel;
	UV[3] = InUV + float2( 0.5f,  0.5f) * Pixel;

	float4 Samples[4];

	UNROLL for(uint i = 0; i < 4; ++i)
	{
		Samples[i].rgb = GetGBufferData(UV[i], true).WorldNormal * 0.5f + 0.5f;
		Samples[i].a = CalcSceneDepth(UV[i]);
	}

	float MaxZ = max( max(Samples[0].a, Samples[1].a), max(Samples[2].a, Samples[3].a));

	float4 AvgColor = 0.0f;

//todo	if(SSAO_NORMAL_AFFECTED == 1)
	{
		AvgColor = 0.0001f;

		float InvThreshold = AmbientOcclusionSetupParams.y;
		{
			UNROLL for(uint i = 0; i < 4; ++i)
			{
				AvgColor += float4(Samples[i].rgb, 1) * ComputeDepthSimilarity(Samples[i].a, MaxZ, InvThreshold);
			}
			AvgColor.rgb /= AvgColor.w;
		}
	}

	OutColor0 = float4(AvgColor.rgb, MaxZ / Constant_Float16F_Scale);
}

float GetDepthFromAOInput(float2 UV)
{
#if USE_AO_SETUP_AS_INPUT
	// low resolution
	return Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, UV).a * Constant_Float16F_Scale;
#else
	// full resolution
	return CalcSceneDepth(UV);
#endif
}

float3 GetWorldSpaceNormalFromAOInput(float2 UV)
{
#if USE_AO_SETUP_AS_INPUT
	// low resolution
	return Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, UV).xyz * 2 - 1;
#else
	// full resolution
	return GetGBufferData(UV, false).WorldNormal;
#endif
}

float4 ComputeUpsampleContribution(float SceneDepth, float2 InUV, float3 CenterWorldNormal)
{
	// can be optimized
#if AO_UPSAMPLE_QUALITY == 0
	const int SampleCount = 4;
	float2 UV[SampleCount];

	UV[0] = InUV + float2(-0.5f,  0.5f) *  PostprocessInput2Size.zw;
	UV[1] = InUV + float2( 0.5f,  0.5f) *  PostprocessInput2Size.zw;
	UV[2] = InUV + float2(-0.5f, -0.5f) *  PostprocessInput2Size.zw;
	UV[3] = InUV + float2( 0.5f, -0.5f) *  PostprocessInput2Size.zw;
#else // AO_UPSAMPLE_QUALITY == 0
	const int SampleCount = 9;
	float2 UV[SampleCount];

	UV[0] = InUV + float2( -1, -1) *  PostprocessInput2Size.zw;
	UV[1] = InUV + float2(  0, -1) *  PostprocessInput2Size.zw;
	UV[2] = InUV + float2(  1, -1) *  PostprocessInput2Size.zw;
	UV[3] = InUV + float2( -1,  0) *  PostprocessInput2Size.zw;
	UV[4] = InUV + float2(  0,  0) *  PostprocessInput2Size.zw;
	UV[5] = InUV + float2(  1,  0) *  PostprocessInput2Size.zw;
	UV[6] = InUV + float2( -1,  1) *  PostprocessInput2Size.zw;
	UV[7] = InUV + float2(  0,  1) *  PostprocessInput2Size.zw;
	UV[8] = InUV + float2(  1,  1) *  PostprocessInput2Size.zw;
#endif // AO_UPSAMPLE_QUALITY == 0

	// to avoid division by 0
	float SmallValue = 0.0001f;

	// we could weight the samples better but tests didn't show much difference
	float WeightSum = SmallValue;
	float4 Ret = float4(SmallValue,0,0,0);

	float InvThreshold = ScreenSpaceAOandSSRShaderParams[2].y;
	float MinIteration = 1.0f;

	UNROLL for(int i = 0; i < SampleCount; ++i)
	{
		float4 SampleValue = Texture2DSample(PostprocessInput2, PostprocessInput2Sampler, UV[i]);

		MinIteration = min(MinIteration, SampleValue.g);

		float4 NormalAndSampleDepth = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, UV[i]);
		float SampleDepth = NormalAndSampleDepth.a * Constant_Float16F_Scale;

		// when tweaking this constant look for crawling pattern at edges
		float Weight = ComputeDepthSimilarity(SampleDepth, SceneDepth, 0.003f);

//todo		if(SSAO_NORMAL_AFFECTED == 1)
		{
			float3 LocalWorldNormal = NormalAndSampleDepth.xyz*2-1;

			Weight *= saturate(dot(LocalWorldNormal, CenterWorldNormal));
		}

		// todo: 1 can be put into the input to save an instruction
		Ret += float4(SampleValue.rgb, 1) * Weight;
		WeightSum += Weight;
	}

	Ret /= WeightSum;
	Ret.g = MinIteration;

	return Ret;
}

// to blend between upsampled and current pass data
float ComputeLerpFactor()
{
	// set up on C++ side
	float MipBlend = ScreenSpaceAOandSSRShaderParams[2].w;

	float AOLerpFactor = MipBlend;

#if AO_SAMPLE_QUALITY == 0
	// we have no AO, we only use the upsampled data
	AOLerpFactor = 1.0f;
#endif

#if USE_UPSAMPLE == 0
	// if there is no former pass we cannot use the data
	AOLerpFactor = 0.0f;
#endif

	return AOLerpFactor;
}

// @return NormAngle means 0..1 is actually 0..PI
float acosApproxNormAngle(float x)
{
	// todo: expose
	// 1: is a good linear approximation, 0.9f seems to look good
	float ContrastTweak = 0.9f;

	// correct: acos(x) / PI
	// linear approximation: saturate((1 - x) * 0.5f);
	// pretty good approximation with contrast tweak
	return saturate((1 - x) * 0.5f * ContrastTweak);
}

// @return float2(InvNormAngle, Weight)
float2 Wedge(float Scale, float2 LocalRandom, float3 FovFix, float3 InvFovFix, float3 ViewSpacePosition, float3 ScaledViewSpaceNormal, float InvHaloSize, float MipLevel)
{
	float AORadiusInShader = ScreenSpaceAOandSSRShaderParams[1].z;		// 40 is default, /400 is constant
	float ScaleRadiusInWorldSpace = ScreenSpaceAOandSSRShaderParams[2].z;

	float3 SamplePosition2 = ViewSpacePosition + float3(LocalRandom, 0) * Scale;

	float2 ScreenSpacePos = ReconstructSSPosFromCS(SamplePosition2 * FovFix);

#if USE_estmap
	float2 UV = ScreenSpacePos * float2(0.5f, -0.5f) + 0.5f;

	float Abs = ConvertFromDeviceZ(Texture2DSampleLevel( PostprocessInput3, PostprocessInput3Sampler, HzbUvAndStepMipLevelFactor.xy * UV, MipLevel).r);
#else
	float2 UV = ScreenPosToUV(ScreenSpacePos, PostprocessInput0Size);

	float Abs = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, UV).a * Constant_Float16F_Scale;
#endif

	float3 SamplePosition = ReconstructCSPos(Abs, ScreenSpacePos) * InvFovFix;

	float3 Delta = SamplePosition - ViewSpacePosition;

#if OPTIMIZATION_O1
	float InvNormAngle = saturate(dot(Delta, ScaledViewSpaceNormal) / dot(Delta, Delta));
#else
	float InvNormAngle = saturate(dot(Delta, ScaledViewSpaceNormal) / length(Delta));
#endif

	float Weight = 1;

#if !OPTIMIZATION_O1
	Weight = saturate(1.0f - length(Delta) * InvHaloSize);
#endif

	return float2(InvNormAngle, Weight);
}


// @return float2(InvNormAngle, Weight)
float2 Wedge2(float Scale, float2 LocalRandom, float3 FovFix, float3 InvFovFix, float3 ViewSpacePosition, float3 ScaledViewSpaceNormal, float InvHaloSize, float MipLevel)
{
	float3 DeltaLeft;
	float AbsLeft;
	float WeightLeft;
	float3 SamplePositionLeft;
	{
		float AORadiusInShader = ScreenSpaceAOandSSRShaderParams[1].z;		// 40 is default, /400 is constant
		float ScaleRadiusInWorldSpace = ScreenSpaceAOandSSRShaderParams[2].z;

		float3 SamplePosition2 = ViewSpacePosition + float3(LocalRandom, 0) * Scale;

		float2 ScreenSpacePos = ReconstructSSPosFromCS(SamplePosition2 * FovFix);

	#if USE_HZB
		float2 UV = ScreenSpacePos * float2(0.5f, -0.5f) + 0.5f;

		AbsLeft = ConvertFromDeviceZ(Texture2DSampleLevel( PostprocessInput3, PostprocessInput3Sampler, HzbUvAndStepMipLevelFactor.xy * UV, MipLevel).r);
	#else
		float2 UV = ScreenPosToUV(ScreenSpacePos, PostprocessInput0Size);

		AbsLeft = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, UV).a * Constant_Float16F_Scale;
	#endif

		SamplePositionLeft = ReconstructCSPos(AbsLeft, ScreenSpacePos) * InvFovFix;

		DeltaLeft = SamplePositionLeft - ViewSpacePosition;

		WeightLeft = 1;

#if !OPTIMIZATION_O1
		WeightLeft = saturate(1.0f - length(DeltaLeft) * InvHaloSize);
#endif
	}

	float3 DeltaRight;
	float AbsRight;
	float WeightRight;
	float3 SamplePositionRight;
	{
		float AORadiusInShader = ScreenSpaceAOandSSRShaderParams[1].z;		// 40 is default, /400 is constant
		float ScaleRadiusInWorldSpace = ScreenSpaceAOandSSRShaderParams[2].z;

		float3 SamplePosition2 = ViewSpacePosition - float3(LocalRandom, 0) * Scale;

		float2 ScreenSpacePos = ReconstructSSPosFromCS(SamplePosition2 * FovFix);

	#if USE_HZB
		float2 UV = ScreenSpacePos * float2(0.5f, -0.5f) + 0.5f;

		AbsLeft = ConvertFromDeviceZ(Texture2DSampleLevel( PostprocessInput3, PostprocessInput3Sampler, HzbUvAndStepMipLevelFactor.xy * UV, MipLevel).r);
	#else
		float2 UV = ScreenPosToUV(ScreenSpacePos, PostprocessInput0Size);

		AbsRight = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, UV).a * Constant_Float16F_Scale;
	#endif

		SamplePositionRight = ReconstructCSPos(AbsRight, ScreenSpacePos) * InvFovFix;

		DeltaRight = SamplePositionRight - ViewSpacePosition;

		WeightRight = 1;

#if !OPTIMIZATION_O1
		WeightRight = saturate(1.0f - length(DeltaRight) * InvHaloSize);
#endif
	}


	float FlatSurfaceBias = 5.0f;

	float left = ViewSpacePosition.z - AbsLeft;
	float right = ViewSpacePosition.z - AbsRight;

	// OptionA: accurate angle computation
	float NormAngle = acosApproxNormAngle( dot(DeltaLeft, DeltaRight) / sqrt(length2(DeltaLeft) * length2(DeltaRight)));
	// OptionB(fade out in near distance): float NormAngle = acosApproxNormAngle( (- left - right) * 20);
	// OptionC(look consistent but more noisy, should be much faster): float NormAngle = 0;


	// not 100% correct but simple
	// bias is needed to avoid flickering on almost perfectly flat surfaces
	//	    if((leftAbs  + rightAbs) * 0.5f > SceneDepth - 0.0001f)
	if(left + right < FlatSurfaceBias)
	{
		// fix concave case
		NormAngle = 1;
	}

	// to avoid halos around objects
	float Weight = 1;

	float InvAmbientOcclusionDistance = ScreenSpaceAOandSSRShaderParams[0].z;
	float ViewDepthAdd = 1.0f - ViewSpacePosition.z * InvAmbientOcclusionDistance;

	Weight *= saturate(SamplePositionLeft.z * InvAmbientOcclusionDistance + ViewDepthAdd);
	Weight *= saturate(SamplePositionRight.z * InvAmbientOcclusionDistance + ViewDepthAdd);

//	return float2(1 - NormAngle, (WeightLeft + WeightRight) * 0.5f);
	return float2((1-NormAngle) / (Weight + 0.001f), Weight);
}

// the main pixel shader that computes ambient occlusion
void MainPS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
{
	OutColor = 0;

	// the following constants as set up on C++ side
	float AmbientOcclusionPower = ScreenSpaceAOandSSRShaderParams[0].x;
	float Ratio = ScreenSpaceAOandSSRShaderParams[1].w;
	float AORadiusInShader = ScreenSpaceAOandSSRShaderParams[1].z;
	float InvAmbientOcclusionDistance = ScreenSpaceAOandSSRShaderParams[0].z;
	float AmbientOcclusionIntensity = ScreenSpaceAOandSSRShaderParams[0].w;
	float2 ViewportUVToRandomUV = ScreenSpaceAOandSSRShaderParams[1].xy;
	float AmbientOcclusionBias = ScreenSpaceAOandSSRShaderParams[0].y;
	float ScaleFactor = ScreenSpaceAOandSSRShaderParams[2].x;
	float ScaleRadiusInWorldSpace = ScreenSpaceAOandSSRShaderParams[2].z;

	float2 UV = UVAndScreenPos.xy;
	float2 ScreenPos = UVAndScreenPos.zw;

	float InvTanHalfFov = ScreenSpaceAOandSSRShaderParams[3].w;
	float3 FovFix = float3(InvTanHalfFov, Ratio * InvTanHalfFov, 1);
	float3 InvFovFix = 1.0f / FovFix;

	float SceneDepth = GetDepthFromAOInput(UV);
	float3 WorldNormal = GetWorldSpaceNormalFromAOInput(UV);

	float3 ViewSpaceNormal = normalize(mul(WorldNormal, (float3x3)View.TranslatedWorldToView));

	float3 ViewSpacePosition = ReconstructCSPos(SceneDepth, ScreenPos);

	//Invert affects of fov and aspect in projection matrix. Reasonably sure the maths works out on this. The results look right.
	ViewSpacePosition *= InvFovFix;
	float2 ActualAORadiusXY = AORadiusInShader * lerp(SceneDepth, 1, ScaleRadiusInWorldSpace) * InvFovFix.xy;

	//Add bias after fixup (causes minor banding - not needed with larger radius)
	ViewSpacePosition += ViewSpaceNormal * (AmbientOcclusionBias * SceneDepth) * ScaleFactor;

	float2 WeightAccumulator = 0.0001f;

#if AO_SAMPLE_QUALITY != 0
	// no per pixel randomization

#if AO_SAMPLE_QUALITY == 1
	// 4x4 random base direction
	float4 RandomBase = float4(0,1,1,0) * float4(ActualAORadiusXY, ActualAORadiusXY);
	{
#elif AO_SAMPLE_QUALITY == 2
	// extract one of 16 base vectors (rotation and scale) from a texture that repeats 4x4
	float4 RandomBase = (Texture2DSample(RandomNormalTexture, RandomNormalTextureSampler, UV * ViewportUVToRandomUV) * 2 - 1 ) * float4(ActualAORadiusXY, ActualAORadiusXY);
	{
#else // AO_SAMPLE_QUALITY == 3
	// extract one of 16 base vectors (rotation and scale) from a texture that repeats 4x4, changing over time if TemporalAA is enabled
	// todo: can be moved into the vertex shader
	float2 TemporalOffset = (View.TemporalAAParams.r % 8) * float2(2.48f, 6.72f) / 64.0f;

	float4 RandomNorm = Texture2DSample(RandomNormalTexture, RandomNormalTextureSampler, TemporalOffset + UV * ViewportUVToRandomUV);
	float4 RandomBase = (RandomNorm * 2 - 1) * float4(ActualAORadiusXY, ActualAORadiusXY);

	{
#endif // AO_SAMPLE_QUALITY ==

		// .x means for very anisotropic viewports we scale by x
		float InvHaloSize = 1.0f / (ActualAORadiusXY.x * 2);

		float3 ScaledViewSpaceNormal = ViewSpaceNormal;

#if OPTIMIZATION_O1
		ScaledViewSpaceNormal *= 0.08f * lerp(SceneDepth, 1000, ScaleRadiusInWorldSpace);
#endif

		UNROLL for(int i = 0; i < SAMPLESET_ARRAY_SIZE; ++i)
		{
			// -1..1
			float2 UnrotatedRandom = OcclusionSamplesOffsets[i].xy;

			// -ActualAORadiusXY..ActualAORadiusXY
			float2 LocalRandom = UnrotatedRandom.x * RandomBase.xy + UnrotatedRandom.y * RandomBase.zw;

			// Ambient Occlusion affected by per pixel normal

			if(SSAO_NORMAL_AFFECTED == 0)
			{
				float2 LocalAccumulator = float2(0, 0);

				UNROLL for(uint step = 0; step < SAMPLE_STEPS; ++step)
				{
					// constant at run time
					float Scale = (step + 1) / (float)SAMPLE_STEPS;
					// constant at run time (higher is better for texture cache / performance, lower is better quality
					float MipLevel = log2(1.0f + HzbUvAndStepMipLevelFactor.z * step);

					float2 StepSample = Wedge2(Scale, LocalRandom, FovFix, InvFovFix, ViewSpacePosition, ScaledViewSpaceNormal, InvHaloSize, MipLevel);

					// combine horizon samples
					LocalAccumulator = lerp(LocalAccumulator, float2(max(LocalAccumulator.x, StepSample.x), 1), StepSample.y);
				}

				// Square(): the area scales quadratic with the angle - it gets a bit darker
				WeightAccumulator += float2(Square(1 - LocalAccumulator.x) * LocalAccumulator.y, LocalAccumulator.y);
			}
			else // SSAO_NORMAL_AFFECTED == 1
			{
				UNROLL for(int mirror = -1; mirror <= 1; mirror +=2)
				{
					float2 LocalAccumulator = float2(0, 0);

					UNROLL for(uint step = 0; step < SAMPLE_STEPS; ++step)
					{
						// constant at run time
						float Scale = (step + 1) / (float)SAMPLE_STEPS * mirror;
						// constant at run time (higher is better for texture cache / performance, lower is better quality
						float MipLevel = log2(1.0f + HzbUvAndStepMipLevelFactor.z * step);

						float2 StepSample = Wedge(Scale, LocalRandom, FovFix, InvFovFix, ViewSpacePosition, ScaledViewSpaceNormal, InvHaloSize, MipLevel);

						// combine horizon samples
						LocalAccumulator = lerp(LocalAccumulator, float2(max(LocalAccumulator.x, StepSample.x), 1), StepSample.y);
					}

					// Square(): the area scales quadratic with the angle - it gets a bit darker
					WeightAccumulator += float2(Square(1 - LocalAccumulator.x) * LocalAccumulator.y, LocalAccumulator.y);
				}
			}
		}
	}

#endif // #if AO_SAMPLE_QUALITY == 0

	OutColor.r = WeightAccumulator.x / WeightAccumulator.y;
	OutColor.gb = float2(0, 0);

	{
		float4 Filtered = ComputeUpsampleContribution(SceneDepth, UV, WorldNormal);

		// recombined result from multiple resolutions
		OutColor.r = lerp(OutColor.r, Filtered.r, ComputeLerpFactor());
	}

#if !USE_AO_SETUP_AS_INPUT
	// full res

	// soft fade out AO in the distance
	{
		float Mul = ScreenSpaceAOandSSRShaderParams[4].x;
		float Add = ScreenSpaceAOandSSRShaderParams[4].y;
		OutColor.r = lerp(OutColor.r, 1, saturate(SceneDepth * Mul + Add));
	}

	// user adjust AO
	// abs() to prevent shader warning
	OutColor.r = 1 - (1 - pow(abs(OutColor.r), AmbientOcclusionPower)) * AmbientOcclusionIntensity;

	// we output in a single alpha channel
	OutColor = OutColor.r;
#endif
}


// used if StaticFraction > small number
void BasePassAOPS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
{
	float2 UV = UVAndScreenPos.xy;

	FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV);

	float StaticFraction = ScreenSpaceAOandSSRShaderParams[3].z;

	// can be optimized
	float AOMask = (ScreenSpaceData.GBuffer.ShadingModelID != SHADINGMODELID_UNLIT);

	OutColor = lerp(1.0f, ScreenSpaceData.AmbientOcclusion * ScreenSpaceData.GBuffer.GBufferAO, AOMask * StaticFraction);
}