You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
Addressing a number of DXC shader validation errors and warnings
#rb David.Harvey #jira none #rnx #ushell-cherrypick of 12330720 by eric.mcdaniel #ROBOMERGE-SOURCE: CL 12726817 in //UE4/Release-4.25Plus/... via CL 12781740 #ROBOMERGE-BOT: RELEASE (Release-Engine-Staging -> Main) (v681-12776863) [CL 12782554 by eric mcdaniel in Main branch]
This commit is contained in:
@@ -15,7 +15,7 @@ void MainPS(
|
||||
float4 SvPosition : SV_POSITION,
|
||||
out float4 OutColor : SV_Target0)
|
||||
{
|
||||
float2 BufferUV = SvPosition * View.BufferSizeAndInvSize.zw;
|
||||
float2 BufferUV = SvPosition.xy * View.BufferSizeAndInvSize.zw;
|
||||
|
||||
float4 SceneColorSample = SceneColor.SampleLevel(SceneColorSampler, BufferUV, 0);
|
||||
float4 SeparateTranslucencySample = SeparateTranslucency.SampleLevel(SeparateTranslucencySampler, BufferUV, 0);
|
||||
|
||||
@@ -100,7 +100,7 @@ void Main(
|
||||
)
|
||||
{
|
||||
#if PLATFORM_SUPPORTS_RENDERTARGET_WRITE_MASK
|
||||
uint CompositeUIMask = DecodeRTWriteMask(Input.Position, UIWriteMaskTexture, 1);
|
||||
uint CompositeUIMask = DecodeRTWriteMask(Input.Position.xy, UIWriteMaskTexture, 1);
|
||||
BRANCH
|
||||
if (CompositeUIMask == 0)
|
||||
{
|
||||
|
||||
@@ -124,8 +124,8 @@ bool SphereIntersectConeWithDepthRanges(float4 SphereCenterAndRadius, float3 Con
|
||||
float ConeAxisDistance = dot(SphereCenterAndRadius.xyz - ConeVertex, ConeAxis);
|
||||
float2 ConeAxisDistanceMinMax = float2(ConeAxisDistance + SphereCenterAndRadius.w, ConeAxisDistance - SphereCenterAndRadius.w);
|
||||
|
||||
if (ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.x && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.y
|
||||
|| ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.z && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.w)
|
||||
if ((ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.x && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.y)
|
||||
|| (ConeAxisDistanceMinMax.x > ConeAxisDepthRanges.z && ConeAxisDistanceMinMax.y < ConeAxisDepthRanges.w))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -41,16 +41,20 @@ void UploadObjectsToBufferCS(
|
||||
{
|
||||
uint DestIndex = UploadOperationIndices[UploadOperationIndex];
|
||||
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_BOUNDS_STRIDE; ++VectorIndex)
|
||||
{
|
||||
UploadBoundsFloat4(DestIndex * OBJECT_BOUNDS_STRIDE + VectorIndex, UploadOperationIndex * UPLOAD_DATA_STRIDE + VectorIndex);
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_BOUNDS_STRIDE; ++VectorIndex)
|
||||
{
|
||||
UploadBoundsFloat4(DestIndex * OBJECT_BOUNDS_STRIDE + VectorIndex, UploadOperationIndex * UPLOAD_DATA_STRIDE + VectorIndex);
|
||||
}
|
||||
}
|
||||
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_DATA_STRIDE; ++VectorIndex)
|
||||
{
|
||||
UploadDataFloat4(DestIndex * OBJECT_DATA_STRIDE + VectorIndex, UploadOperationIndex * UPLOAD_DATA_STRIDE + OBJECT_BOUNDS_STRIDE + VectorIndex);
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_DATA_STRIDE; ++VectorIndex)
|
||||
{
|
||||
UploadDataFloat4(DestIndex * OBJECT_DATA_STRIDE + VectorIndex, UploadOperationIndex * UPLOAD_DATA_STRIDE + OBJECT_BOUNDS_STRIDE + VectorIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -86,16 +90,20 @@ void CopyObjectBufferCS(
|
||||
uint SourceIndex = CopyOperationIndex;
|
||||
uint DestIndex = CopyOperationIndex;
|
||||
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_BOUNDS_STRIDE; ++VectorIndex)
|
||||
{
|
||||
CopyBoundsFloat4(DestIndex * OBJECT_BOUNDS_STRIDE + VectorIndex, SourceIndex * OBJECT_BOUNDS_STRIDE + VectorIndex);
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_BOUNDS_STRIDE; ++VectorIndex)
|
||||
{
|
||||
CopyBoundsFloat4(DestIndex * OBJECT_BOUNDS_STRIDE + VectorIndex, SourceIndex * OBJECT_BOUNDS_STRIDE + VectorIndex);
|
||||
}
|
||||
}
|
||||
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_DATA_STRIDE; VectorIndex++)
|
||||
{
|
||||
CopyDataFloat4(DestIndex * OBJECT_DATA_STRIDE + VectorIndex, SourceIndex * OBJECT_DATA_STRIDE + VectorIndex);
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_DATA_STRIDE; VectorIndex++)
|
||||
{
|
||||
CopyDataFloat4(DestIndex * OBJECT_DATA_STRIDE + VectorIndex, SourceIndex * OBJECT_DATA_STRIDE + VectorIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -144,16 +152,20 @@ void RemoveObjectsFromBufferCS(
|
||||
uint SourceIndex = RemoveOperationIndices[RemoveOperationIndex].y;
|
||||
uint DestIndex = RemoveOperationIndices[RemoveOperationIndex].x;
|
||||
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_BOUNDS_STRIDE; ++VectorIndex)
|
||||
{
|
||||
WriteBoundsFloat4(DestIndex * OBJECT_BOUNDS_STRIDE + VectorIndex, SourceIndex * OBJECT_BOUNDS_STRIDE + VectorIndex);
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_BOUNDS_STRIDE; ++VectorIndex)
|
||||
{
|
||||
WriteBoundsFloat4(DestIndex * OBJECT_BOUNDS_STRIDE + VectorIndex, SourceIndex * OBJECT_BOUNDS_STRIDE + VectorIndex);
|
||||
}
|
||||
}
|
||||
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_DATA_STRIDE; VectorIndex++)
|
||||
{
|
||||
WriteDataFloat4(DestIndex * OBJECT_DATA_STRIDE + VectorIndex, SourceIndex * OBJECT_DATA_STRIDE + VectorIndex);
|
||||
UNROLL
|
||||
for (uint VectorIndex = 0; VectorIndex < OBJECT_DATA_STRIDE; VectorIndex++)
|
||||
{
|
||||
WriteDataFloat4(DestIndex * OBJECT_DATA_STRIDE + VectorIndex, SourceIndex * OBJECT_DATA_STRIDE + VectorIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,8 +146,8 @@ void CullObjectsForShadowCS(
|
||||
#if (DISTANCEFIELD_PRIMITIVE_TYPE == DFPT_SignedDistanceField)
|
||||
float4 ObjectBoundingSphere = LoadFloat4(SceneObjectBounds, SourceIndex);
|
||||
|
||||
if (ShadowBoundingSphere.w == 0 && ShadowConvexHullIntersectSphere(ObjectBoundingSphere.xyz, ObjectBoundingSphere.w)
|
||||
|| ShadowBoundingSphere.w > 0 && dot(ShadowBoundingSphere.xyz - ObjectBoundingSphere.xyz, ShadowBoundingSphere.xyz - ObjectBoundingSphere.xyz) < Square(ShadowBoundingSphere.w + ObjectBoundingSphere.w))
|
||||
if ((ShadowBoundingSphere.w == 0 && ShadowConvexHullIntersectSphere(ObjectBoundingSphere.xyz, ObjectBoundingSphere.w))
|
||||
|| (ShadowBoundingSphere.w > 0 && dot(ShadowBoundingSphere.xyz - ObjectBoundingSphere.xyz, ShadowBoundingSphere.xyz - ObjectBoundingSphere.xyz) < Square(ShadowBoundingSphere.w + ObjectBoundingSphere.w)))
|
||||
{
|
||||
// Assume ObjectBoundingSphere is located at (0, 0, 0) in local space
|
||||
float ViewDist2 = length2(ObjectBoundingSphere.xyz - View.WorldViewOrigin);
|
||||
|
||||
@@ -66,7 +66,6 @@ void FxaaVS(
|
||||
#endif
|
||||
|
||||
#define DECLARE_PARAMETER(a, b, c)
|
||||
float4 fxaaConstDir = float4(1.0, -1.0, 0.25, -0.25);
|
||||
|
||||
// see Fxaa.. header file for more details
|
||||
float4 fxaaConsoleRcpFrameOpt;
|
||||
|
||||
@@ -489,9 +489,8 @@ void ComplexMultTexture(bool bIsHorizontal, bool bUseAlpha, bool bIsGAGroup, in
|
||||
//float4 FilterTint;
|
||||
void ApplyTint(in Complex Tint, inout Complex LocalBuffer[2][RADIX])
|
||||
{
|
||||
for (uint r = 0; r < RADIX; ++r) LocalBuffer[0][r] *= Tint.x;
|
||||
|
||||
for (uint r = 0; r < RADIX; ++r) LocalBuffer[1][r] *= Tint.y;
|
||||
{ for (uint r = 0; r < RADIX; ++r) LocalBuffer[0][r] *= Tint.x; }
|
||||
{ for (uint r = 0; r < RADIX; ++r) LocalBuffer[1][r] *= Tint.y; }
|
||||
}
|
||||
|
||||
|
||||
@@ -632,19 +631,23 @@ void GSConvolutionWithTextureCS(uint3 GroupID : SV_GroupID, uint3 GroupThreadID
|
||||
|
||||
|
||||
// Normalize R & G
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
// This is the R or G channel
|
||||
LocalBuffer[0][r] /= NormMax;
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
// This is the R or G channel
|
||||
LocalBuffer[0][r] /= NormMax;
|
||||
}
|
||||
}
|
||||
|
||||
float AorBNorm = (bIsGAGroup) ? AlphaSum : NormMax;
|
||||
|
||||
// Normalize B & A
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
// This is the B or A channel
|
||||
LocalBuffer[1][r] /= AorBNorm;
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
// This is the B or A channel
|
||||
LocalBuffer[1][r] /= AorBNorm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -114,8 +114,8 @@ void CopyDataLocalToDst(in Complex LocalBuffer[2][RADIX], bool bIsHorizontal, ui
|
||||
|
||||
void CopyDataSrcWindowToLocal(inout Complex LocalBuffer[2][RADIX], bool bIsHorizontal, in uint ScanIdx, uint Loc, uint Stride, uint4 Window)
|
||||
{
|
||||
for (uint i = 0; i < RADIX; ++i) LocalBuffer[0][ i ] = float2(0.f, 0.f);
|
||||
for (uint i = 0; i < RADIX; ++i) LocalBuffer[1][ i ] = float2(0.f, 0.f);
|
||||
{ for (uint i = 0; i < RADIX; ++i) LocalBuffer[0][ i ] = float2(0.f, 0.f); }
|
||||
{ for (uint i = 0; i < RADIX; ++i) LocalBuffer[1][ i ] = float2(0.f, 0.f); }
|
||||
|
||||
if (bIsHorizontal)
|
||||
{
|
||||
@@ -158,8 +158,8 @@ void CopyDataSrcWindowToLocal(inout Complex LocalBuffer[2][RADIX], bool bIsHoriz
|
||||
|
||||
void CopyDataSrcWindowToLocal(inout Complex LocalBuffer[2][RADIX], bool bIsHorizontal, uint ScanIdx, uint Loc, uint Stride, uint2 WindowMin, uint2 WindowMax )
|
||||
{
|
||||
for (uint i = 0; i < RADIX; ++i) LocalBuffer[0][ i ] = float2(0.f, 0.f);
|
||||
for (uint i = 0; i < RADIX; ++i) LocalBuffer[1][ i ] = float2(0.f, 0.f);
|
||||
{ for (uint i = 0; i < RADIX; ++i) LocalBuffer[0][ i ] = float2(0.f, 0.f); }
|
||||
{ for (uint i = 0; i < RADIX; ++i) LocalBuffer[1][ i ] = float2(0.f, 0.f); }
|
||||
|
||||
if (bIsHorizontal)
|
||||
{
|
||||
|
||||
@@ -376,17 +376,19 @@ groupshared float SharedImag[ SCAN_LINE_LENGTH ];
|
||||
|
||||
void CopyLocalToGroupShared(in Complex Local[RADIX], in uint Head, in uint Stride)
|
||||
{
|
||||
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
SharedReal[ i ] = Local[ r ].x;
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
SharedReal[ i ] = Local[ r ].x;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
SharedImag[ i ] = Local[ r ].y;
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
SharedImag[ i ] = Local[ r ].y;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void CopyLocalToGroupSharedWSync(in Complex Local[RADIX], in uint Head, in uint Stride)
|
||||
@@ -398,15 +400,18 @@ void CopyLocalToGroupSharedWSync(in Complex Local[RADIX], in uint Head, in uint
|
||||
|
||||
void CopyGroupSharedToLocal(inout Complex Local[RADIX], in uint Head, in uint Stride)
|
||||
{
|
||||
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
Local[ r ].x = SharedReal[ i ];
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
Local[ r ].x = SharedReal[ i ];
|
||||
}
|
||||
}
|
||||
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
Local[ r ].y = SharedImag[ i ];
|
||||
for (uint r = 0, i = Head; r < RADIX; ++r, i += Stride)
|
||||
{
|
||||
Local[ r ].y = SharedImag[ i ];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -616,13 +621,18 @@ void GroupSharedFFT(in const bool bIsForward, inout Complex Local[RADIX], in con
|
||||
void Scale(inout Complex LocalBuffer[2][RADIX], in float ScaleValue)
|
||||
{
|
||||
// Scale
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
LocalBuffer[0][r] *= ScaleValue;
|
||||
{
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
LocalBuffer[0][r] *= ScaleValue;
|
||||
}
|
||||
}
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
LocalBuffer[1][r] *= ScaleValue;
|
||||
|
||||
{
|
||||
for (uint r = 0; r < RADIX; ++r)
|
||||
{
|
||||
LocalBuffer[1][r] *= ScaleValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -709,44 +719,50 @@ void SplitTwoForOne(inout Complex LocalBuffer[RADIX], in uint Head, in uint Stri
|
||||
|
||||
// Construct the transform for the two real signals in the LocalBuffer
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
//
|
||||
// K = N/2 - abs(SrcIdx - N/2)
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
//
|
||||
// K = N/2 - abs(SrcIdx - N/2)
|
||||
|
||||
// DstK = SrcIdx % Non2;
|
||||
// N - k
|
||||
// DstK = SrcIdx % Non2;
|
||||
// N - k
|
||||
|
||||
FLATTEN uint NmK = (K > 0) ? ( N - K) : 0;
|
||||
uint NmK = (K > 0) ? ( N - K) : 0;
|
||||
|
||||
// Z_k = LocalBuffer[i]
|
||||
// If k < N/2 : Store F_k = 1/2 (Z_k + Z*_{N-k})
|
||||
// If k > N/2 : Compute I*G_k = 1/2 (Z_k - Z*_{N-k})
|
||||
// Z_k = LocalBuffer[i]
|
||||
// If k < N/2 : Store F_k = 1/2 (Z_k + Z*_{N-k})
|
||||
// If k > N/2 : Compute I*G_k = 1/2 (Z_k - Z*_{N-k})
|
||||
|
||||
// Tmp = {+,-}ComplexConjugate( Z_{N-k})
|
||||
Complex Tmp = Complex( SharedReal[NmK], -SharedImag[NmK] );
|
||||
// Tmp = {+,-}ComplexConjugate( Z_{N-k})
|
||||
Complex Tmp = Complex( SharedReal[NmK], -SharedImag[NmK] );
|
||||
|
||||
Tmp *= (K > Non2)? -1 : 1;
|
||||
Tmp *= (K > Non2)? -1 : 1;
|
||||
|
||||
LocalBuffer[i] += Tmp;
|
||||
LocalBuffer[i] += Tmp;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (uint i =0; i < RADIX; ++i) LocalBuffer[ i ] *= 0.5f;
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// If k > N/2 get G_k from I*G_k: G_k = -I * (I G_k)
|
||||
if (K > Non2) LocalBuffer[i] = ComplexMult(Complex(0, -1), LocalBuffer[i] );
|
||||
|
||||
if (K == Non2)
|
||||
{
|
||||
// F_N/2 + I * G_N/2
|
||||
LocalBuffer[i] = Complex(SharedReal[Non2], SharedImag[Non2]);
|
||||
}
|
||||
for (uint i =0; i < RADIX; ++i) LocalBuffer[ i ] *= 0.5f;
|
||||
}
|
||||
|
||||
{
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// If k > N/2 get G_k from I*G_k: G_k = -I * (I G_k)
|
||||
if (K > Non2) LocalBuffer[i] = ComplexMult(Complex(0, -1), LocalBuffer[i] );
|
||||
|
||||
if (K == Non2)
|
||||
{
|
||||
// F_N/2 + I * G_N/2
|
||||
LocalBuffer[i] = Complex(SharedReal[Non2], SharedImag[Non2]);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (Head == 0)
|
||||
@@ -773,27 +789,29 @@ void SplitTwoForOne(inout Complex LocalBuffer[RADIX], in uint Head, in uint Stri
|
||||
|
||||
// Construct the transform for the two real signals in the LocalBuffer
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
//
|
||||
// K = N/2 - abs(SrcIdx - N/2)
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
//
|
||||
// K = N/2 - abs(SrcIdx - N/2)
|
||||
|
||||
// DstK = SrcIdx % Non2;
|
||||
// N - k
|
||||
// DstK = SrcIdx % Non2;
|
||||
// N - k
|
||||
|
||||
FLATTEN uint NmK = (K > 0) ? ( N - K) : 0;
|
||||
uint NmK = (K > 0) ? ( N - K) : 0;
|
||||
|
||||
// Z_k = LocalBuffer[i]
|
||||
// If k < N/2 : Store F_k = 1/2 (Z_k + Z*_{N-k})
|
||||
// If k > N/2 : Compute I*G_k = 1/2 (Z_k - Z*_{N-k})
|
||||
// Z_k = LocalBuffer[i]
|
||||
// If k < N/2 : Store F_k = 1/2 (Z_k + Z*_{N-k})
|
||||
// If k > N/2 : Compute I*G_k = 1/2 (Z_k - Z*_{N-k})
|
||||
|
||||
// Tmp = {+,-}ComplexConjugate( Z_{N-k})
|
||||
float Tmp = SharedReal[NmK];
|
||||
// Tmp = {+,-}ComplexConjugate( Z_{N-k})
|
||||
float Tmp = SharedReal[NmK];
|
||||
|
||||
Tmp *= (K > Non2)? -1 : 1;
|
||||
Tmp *= (K > Non2)? -1 : 1;
|
||||
|
||||
LocalBuffer[i].x += Tmp;
|
||||
LocalBuffer[i].x += Tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (Head == 0 ) LocalBuffer[0].x = 2.f * SharedReal[0];
|
||||
@@ -804,39 +822,45 @@ void SplitTwoForOne(inout Complex LocalBuffer[RADIX], in uint Head, in uint Stri
|
||||
|
||||
FFTMemoryBarrier();
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
//
|
||||
// K = N/2 - abs(SrcIdx - N/2)
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
//
|
||||
// K = N/2 - abs(SrcIdx - N/2)
|
||||
|
||||
// DstK = SrcIdx % Non2;
|
||||
// N - k
|
||||
// DstK = SrcIdx % Non2;
|
||||
// N - k
|
||||
|
||||
FLATTEN uint NmK = (K > 0) ? ( N - K) : 0;
|
||||
uint NmK = (K > 0) ? ( N - K) : 0;
|
||||
|
||||
// Z_k = LocalBuffer[i]
|
||||
// If k < N/2 : Store F_k = 1/2 (Z_k + Z*_{N-k})
|
||||
// If k > N/2 : Compute I*G_k = 1/2 (Z_k - Z*_{N-k})
|
||||
// Z_k = LocalBuffer[i]
|
||||
// If k < N/2 : Store F_k = 1/2 (Z_k + Z*_{N-k})
|
||||
// If k > N/2 : Compute I*G_k = 1/2 (Z_k - Z*_{N-k})
|
||||
|
||||
// Tmp = {+,-}ComplexConjugate( Z_{N-k})
|
||||
float Tmp = -SharedReal[NmK];
|
||||
// Tmp = {+,-}ComplexConjugate( Z_{N-k})
|
||||
float Tmp = -SharedReal[NmK];
|
||||
|
||||
Tmp *= (K < Non2)? 1 : -1;
|
||||
Tmp *= (K < Non2)? 1 : -1;
|
||||
|
||||
LocalBuffer[i].y += Tmp;
|
||||
//LocalBuffer[i] *= 0.5;
|
||||
LocalBuffer[i].y += Tmp;
|
||||
//LocalBuffer[i] *= 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
if (Head == 0) LocalBuffer[0].y = 2.f * SharedReal[0];
|
||||
|
||||
UNROLL for (uint i = 0; i < RADIX; ++i) LocalBuffer[i] *= 0.5;
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// If k > N/2 get G_k from I*G_k: G_k = -I * (I G_k)
|
||||
if (K > Non2) LocalBuffer[i] = ComplexMult(Complex(0, -1), LocalBuffer[i] );
|
||||
UNROLL for (uint i = 0; i < RADIX; ++i) LocalBuffer[i] *= 0.5;
|
||||
}
|
||||
|
||||
{
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// If k > N/2 get G_k from I*G_k: G_k = -I * (I G_k)
|
||||
if (K > Non2) LocalBuffer[i] = ComplexMult(Complex(0, -1), LocalBuffer[i] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -880,45 +904,49 @@ void MergeTwoForOne(inout Complex LocalBuffer[RADIX], in uint Head, in uint Stri
|
||||
|
||||
|
||||
// Compose the transform of a single f+ig signal from the two real signals in the LocalBuffer
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
|
||||
// N - k
|
||||
FLATTEN uint NmK = (K > 0) ? (N - K) : 0 ;
|
||||
// N - k
|
||||
uint NmK = (K > 0) ? (N - K) : 0 ;
|
||||
|
||||
|
||||
|
||||
// If k < N/2 : LocalBuffer[i] = F_k,
|
||||
// Shared[N-K] = G[N-K] = Conjugate(G[k]) = (G[k]_r, -G[k]_i) = Complex(G_r, -G_i)
|
||||
// want I G[k] = float(-G[k]_i, G[k]_r)
|
||||
// Tmp = I * G_k
|
||||
// If k > N/2 : LocalBuffer[i] = G_k
|
||||
// Shared[N-K] = F[N-k] = Conjugate(F[k]) = Complex(F[k]_r, -F[k]_i)
|
||||
// want -I F[k] = Complex(F[k]_i, -F[k]_r)
|
||||
// Tmp = -I * F
|
||||
// ComplexConjugate( Z_{N-k})
|
||||
// If k < N/2 : LocalBuffer[i] = F_k,
|
||||
// Shared[N-K] = G[N-K] = Conjugate(G[k]) = (G[k]_r, -G[k]_i) = Complex(G_r, -G_i)
|
||||
// want I G[k] = float(-G[k]_i, G[k]_r)
|
||||
// Tmp = I * G_k
|
||||
// If k > N/2 : LocalBuffer[i] = G_k
|
||||
// Shared[N-K] = F[N-k] = Conjugate(F[k]) = Complex(F[k]_r, -F[k]_i)
|
||||
// want -I F[k] = Complex(F[k]_i, -F[k]_r)
|
||||
// Tmp = -I * F
|
||||
// ComplexConjugate( Z_{N-k})
|
||||
|
||||
// Tmp = (K < Non2) ? I * G_k : -I * F
|
||||
Complex Tmp = Complex( SharedImag[ NmK ], SharedReal[ NmK ] );
|
||||
Tmp *= (K > Non2) ? -1 : 1;
|
||||
// Tmp = (K < Non2) ? I * G_k : -I * F
|
||||
Complex Tmp = Complex( SharedImag[ NmK ], SharedReal[ NmK ] );
|
||||
Tmp *= (K > Non2) ? -1 : 1;
|
||||
|
||||
LocalBuffer[i] += Tmp;
|
||||
LocalBuffer[i] += Tmp;
|
||||
}
|
||||
}
|
||||
// Compose the transform of a single f+ig signal from the two real signals in the LocalBuffer
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// if k > N/2 we have G - I * F. Multiply by I to get F + I * G
|
||||
if (K > Non2) LocalBuffer[ i ] = ComplexMult(Complex(0, 1), LocalBuffer[ i ]);
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// if k > N/2 we have G - I * F. Multiply by I to get F + I * G
|
||||
if (K > Non2) LocalBuffer[ i ] = ComplexMult(Complex(0, 1), LocalBuffer[ i ]);
|
||||
|
||||
if (K == Non2)
|
||||
{
|
||||
// F_N/2 + I * G_N/2
|
||||
LocalBuffer[ i ] = Complex(SharedReal[ Non2 ], SharedImag[ Non2 ]);
|
||||
}
|
||||
|
||||
if (K == Non2)
|
||||
{
|
||||
// F_N/2 + I * G_N/2
|
||||
LocalBuffer[ i ] = Complex(SharedReal[ Non2 ], SharedImag[ Non2 ]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (Head == 0)
|
||||
@@ -936,37 +964,41 @@ void MergeTwoForOne(inout Complex LocalBuffer[RADIX], in uint Head, in uint Stri
|
||||
// Write the two FFTs into shared memory.
|
||||
|
||||
float TmpX[RADIX];
|
||||
for (uint i = 0; i < RADIX; ++i) TmpX[i] = LocalBuffer[i].x;
|
||||
{
|
||||
for (uint i = 0; i < RADIX; ++i) TmpX[i] = LocalBuffer[i].x;
|
||||
}
|
||||
|
||||
CopyLocalYToGroupShared(LocalBuffer, Head, Stride);
|
||||
|
||||
FFTMemoryBarrier();
|
||||
|
||||
// Compose the transform of a single f+ig signal from the two real signals in the LocalBuffer
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
|
||||
// N - k
|
||||
FLATTEN uint NmK = (K > 0) ? (N - K) : 0 ;
|
||||
// N - k
|
||||
uint NmK = (K > 0) ? (N - K) : 0 ;
|
||||
|
||||
|
||||
|
||||
// If k < N/2 : LocalBuffer[i] = F_k,
|
||||
// Shared[N-K] = G[N-K] = Conjugate(G[k]) = (G[k]_r, -G[k]_i) = Complex(G_r, -G_i)
|
||||
// want I G[k] = float(-G[k]_i, G[k]_r)
|
||||
// Tmp = I * G_k
|
||||
// If k > N/2 : LocalBuffer[i] = G_k
|
||||
// Shared[N-K] = F[N-k] = Conjugate(F[k]) = Complex(F[k]_r, -F[k]_i)
|
||||
// want -I F[k] = Complex(F[k]_i, -F[k]_r)
|
||||
// Tmp = -I * F
|
||||
// ComplexConjugate( Z_{N-k})
|
||||
// If k < N/2 : LocalBuffer[i] = F_k,
|
||||
// Shared[N-K] = G[N-K] = Conjugate(G[k]) = (G[k]_r, -G[k]_i) = Complex(G_r, -G_i)
|
||||
// want I G[k] = float(-G[k]_i, G[k]_r)
|
||||
// Tmp = I * G_k
|
||||
// If k > N/2 : LocalBuffer[i] = G_k
|
||||
// Shared[N-K] = F[N-k] = Conjugate(F[k]) = Complex(F[k]_r, -F[k]_i)
|
||||
// want -I F[k] = Complex(F[k]_i, -F[k]_r)
|
||||
// Tmp = -I * F
|
||||
// ComplexConjugate( Z_{N-k})
|
||||
|
||||
// Tmp = (K < Non2) ? I * G_k : -I * F
|
||||
float Tmp = SharedReal[ NmK ];
|
||||
Tmp *= (K > Non2) ? -1 : 1;
|
||||
// Tmp = (K < Non2) ? I * G_k : -I * F
|
||||
float Tmp = SharedReal[ NmK ];
|
||||
Tmp *= (K > Non2) ? -1 : 1;
|
||||
|
||||
LocalBuffer[i].x += Tmp;
|
||||
LocalBuffer[i].x += Tmp;
|
||||
}
|
||||
}
|
||||
|
||||
float2 FirstElement = float2(0, SharedReal[0]);
|
||||
@@ -985,45 +1017,47 @@ void MergeTwoForOne(inout Complex LocalBuffer[RADIX], in uint Head, in uint Stri
|
||||
FirstElement.x = SharedReal[0];
|
||||
MiddleElement.x = SharedReal[Non2];
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
|
||||
// N - k
|
||||
FLATTEN uint NmK = (K > 0) ? (N - K) : 0 ;
|
||||
// N - k
|
||||
uint NmK = (K > 0) ? (N - K) : 0 ;
|
||||
|
||||
|
||||
|
||||
// If k < N/2 : LocalBuffer[i] = F_k,
|
||||
// Shared[N-K] = G[N-K] = Conjugate(G[k]) = (G[k]_r, -G[k]_i) = Complex(G_r, -G_i)
|
||||
// want I G[k] = float(-G[k]_i, G[k]_r)
|
||||
// Tmp = I * G_k
|
||||
// If k > N/2 : LocalBuffer[i] = G_k
|
||||
// Shared[N-K] = F[N-k] = Conjugate(F[k]) = Complex(F[k]_r, -F[k]_i)
|
||||
// want -I F[k] = Complex(F[k]_i, -F[k]_r)
|
||||
// Tmp = -I * F
|
||||
// ComplexConjugate( Z_{N-k})
|
||||
// If k < N/2 : LocalBuffer[i] = F_k,
|
||||
// Shared[N-K] = G[N-K] = Conjugate(G[k]) = (G[k]_r, -G[k]_i) = Complex(G_r, -G_i)
|
||||
// want I G[k] = float(-G[k]_i, G[k]_r)
|
||||
// Tmp = I * G_k
|
||||
// If k > N/2 : LocalBuffer[i] = G_k
|
||||
// Shared[N-K] = F[N-k] = Conjugate(F[k]) = Complex(F[k]_r, -F[k]_i)
|
||||
// want -I F[k] = Complex(F[k]_i, -F[k]_r)
|
||||
// Tmp = -I * F
|
||||
// ComplexConjugate( Z_{N-k})
|
||||
|
||||
// Tmp = (K < Non2) ? I * G_k : -I * F
|
||||
float Tmp = SharedReal[ NmK ];
|
||||
Tmp *= (K > Non2) ? -1 : 1;
|
||||
// Tmp = (K < Non2) ? I * G_k : -I * F
|
||||
float Tmp = SharedReal[ NmK ];
|
||||
Tmp *= (K > Non2) ? -1 : 1;
|
||||
|
||||
LocalBuffer[i].y += Tmp;
|
||||
LocalBuffer[i].y += Tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// if k > N/2 we have G - I * F. Multiply by I to get F + I * G
|
||||
if (K > Non2) LocalBuffer[ i ] = ComplexMult(Complex(0, 1), LocalBuffer[ i ]);
|
||||
UNROLL
|
||||
for (uint i = 0, K = Head; i < RADIX; ++i, K += Stride)
|
||||
{
|
||||
// if k > N/2 we have G - I * F. Multiply by I to get F + I * G
|
||||
if (K > Non2) LocalBuffer[ i ] = ComplexMult(Complex(0, 1), LocalBuffer[ i ]);
|
||||
|
||||
if (K == Non2)
|
||||
{
|
||||
// F_N/2 + I * G_N/2
|
||||
LocalBuffer[ i ] = MiddleElement;
|
||||
if (K == Non2)
|
||||
{
|
||||
// F_N/2 + I * G_N/2
|
||||
LocalBuffer[ i ] = MiddleElement;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (Head == 0) LocalBuffer[ 0 ] = FirstElement;
|
||||
|
||||
@@ -238,26 +238,28 @@ void CompositeObjectDistanceFieldsCS(
|
||||
NumCulledObjects = NumGridCulledObjects;
|
||||
#endif
|
||||
|
||||
for (uint ListObjectIndex = ThreadIndex; ListObjectIndex < NumCulledObjects; ListObjectIndex += COMPOSITE_THREADGORUP_TOTALSIZE)
|
||||
{
|
||||
#if USE_CULL_GRID
|
||||
uint ObjectIndex = CullGridObjectArray[CullTileIndex * MAX_GRID_CULLED_DF_OBJECTS + ListObjectIndex];
|
||||
#else
|
||||
uint ObjectIndex = ListObjectIndex;
|
||||
#endif
|
||||
|
||||
float4 ObjectPositionAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
||||
//float BoxDistance = ComputeDistanceFromBoxToPoint(TileBoxCenter - TileBoxExtent, TileBoxCenter + TileBoxExtent, ObjectPositionAndRadius.xyz);
|
||||
|
||||
// ComputeSquaredDistanceFromBoxToPoint is correct but adds a lot to the final cost
|
||||
float BoxDistanceSq = ComputeSquaredDistanceFromBoxToPoint(TileBoxCenter, TileBoxExtent, ObjectPositionAndRadius.xyz);
|
||||
float CombinedDistance = ObjectPositionAndRadius.w + AOGlobalMaxSphereQueryRadius;
|
||||
|
||||
if (BoxDistanceSq < CombinedDistance * CombinedDistance)
|
||||
for (uint ListObjectIndex = ThreadIndex; ListObjectIndex < NumCulledObjects; ListObjectIndex += COMPOSITE_THREADGORUP_TOTALSIZE)
|
||||
{
|
||||
uint DestIndex;
|
||||
InterlockedAdd(NumTileCulledObjects, 1U, DestIndex);
|
||||
SharedCulledObjectList[DestIndex] = ObjectIndex;
|
||||
#if USE_CULL_GRID
|
||||
uint ObjectIndex = CullGridObjectArray[CullTileIndex * MAX_GRID_CULLED_DF_OBJECTS + ListObjectIndex];
|
||||
#else
|
||||
uint ObjectIndex = ListObjectIndex;
|
||||
#endif
|
||||
|
||||
float4 ObjectPositionAndRadius = LoadObjectPositionAndRadius(ObjectIndex);
|
||||
//float BoxDistance = ComputeDistanceFromBoxToPoint(TileBoxCenter - TileBoxExtent, TileBoxCenter + TileBoxExtent, ObjectPositionAndRadius.xyz);
|
||||
|
||||
// ComputeSquaredDistanceFromBoxToPoint is correct but adds a lot to the final cost
|
||||
float BoxDistanceSq = ComputeSquaredDistanceFromBoxToPoint(TileBoxCenter, TileBoxExtent, ObjectPositionAndRadius.xyz);
|
||||
float CombinedDistance = ObjectPositionAndRadius.w + AOGlobalMaxSphereQueryRadius;
|
||||
|
||||
if (BoxDistanceSq < CombinedDistance * CombinedDistance)
|
||||
{
|
||||
uint DestIndex;
|
||||
InterlockedAdd(NumTileCulledObjects, 1U, DestIndex);
|
||||
SharedCulledObjectList[DestIndex] = ObjectIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -277,35 +279,37 @@ void CompositeObjectDistanceFieldsCS(
|
||||
MinDistance = ParentGlobalDistanceFieldTexture[IntCoordinate].r;
|
||||
#endif
|
||||
|
||||
LOOP
|
||||
for (uint ListObjectIndex = 0; ListObjectIndex < NumCulledObjects; ListObjectIndex++)
|
||||
{
|
||||
#if USE_OBJECT_COMPOSITING_TILE_CULLING
|
||||
uint ObjectIndex = SharedCulledObjectList[ListObjectIndex];
|
||||
#else
|
||||
uint ObjectIndex = ListObjectIndex;
|
||||
#endif
|
||||
|
||||
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
|
||||
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
|
||||
bool bGeneratedAsTwoSided;
|
||||
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex, bGeneratedAsTwoSided);
|
||||
float3 VolumePosition = mul(float4(WorldPosition, 1), WorldToVolume).xyz;
|
||||
float BoxDistance = ComputeDistanceFromBoxToPoint(-LocalPositionExtent, LocalPositionExtent, VolumePosition) * UVScaleAndVolumeScale.w;
|
||||
|
||||
BRANCH
|
||||
if (BoxDistance < AOGlobalMaxSphereQueryRadius)
|
||||
LOOP
|
||||
for (uint ListObjectIndex = 0; ListObjectIndex < NumCulledObjects; ListObjectIndex++)
|
||||
{
|
||||
float3 UVAdd = LoadObjectUVAddAndSelfShadowBias(ObjectIndex).xyz;
|
||||
float2 DistanceFieldMAD = LoadObjectDistanceFieldMAD(ObjectIndex);
|
||||
#if USE_OBJECT_COMPOSITING_TILE_CULLING
|
||||
uint ObjectIndex = SharedCulledObjectList[ListObjectIndex];
|
||||
#else
|
||||
uint ObjectIndex = ListObjectIndex;
|
||||
#endif
|
||||
|
||||
float3 ClampedSamplePosition = clamp(VolumePosition, -LocalPositionExtent, LocalPositionExtent);
|
||||
float DistanceToClamped = length(VolumePosition - ClampedSamplePosition);
|
||||
float3 LocalPositionExtent = LoadObjectLocalPositionExtent(ObjectIndex);
|
||||
float4x4 WorldToVolume = LoadObjectWorldToVolume(ObjectIndex);
|
||||
bool bGeneratedAsTwoSided;
|
||||
float4 UVScaleAndVolumeScale = LoadObjectUVScale(ObjectIndex, bGeneratedAsTwoSided);
|
||||
float3 VolumePosition = mul(float4(WorldPosition, 1), WorldToVolume).xyz;
|
||||
float BoxDistance = ComputeDistanceFromBoxToPoint(-LocalPositionExtent, LocalPositionExtent, VolumePosition) * UVScaleAndVolumeScale.w;
|
||||
|
||||
float3 StepVolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
|
||||
float DistanceToOccluder = (SampleMeshDistanceField(StepVolumeUV, DistanceFieldMAD).x + DistanceToClamped) * UVScaleAndVolumeScale.w;
|
||||
BRANCH
|
||||
if (BoxDistance < AOGlobalMaxSphereQueryRadius)
|
||||
{
|
||||
float3 UVAdd = LoadObjectUVAddAndSelfShadowBias(ObjectIndex).xyz;
|
||||
float2 DistanceFieldMAD = LoadObjectDistanceFieldMAD(ObjectIndex);
|
||||
|
||||
MinDistance = min(MinDistance, DistanceToOccluder);
|
||||
float3 ClampedSamplePosition = clamp(VolumePosition, -LocalPositionExtent, LocalPositionExtent);
|
||||
float DistanceToClamped = length(VolumePosition - ClampedSamplePosition);
|
||||
|
||||
float3 StepVolumeUV = DistanceFieldVolumePositionToUV(ClampedSamplePosition, UVScaleAndVolumeScale.xyz, UVAdd);
|
||||
float DistanceToOccluder = (SampleMeshDistanceField(StepVolumeUV, DistanceFieldMAD).x + DistanceToClamped) * UVScaleAndVolumeScale.w;
|
||||
|
||||
MinDistance = min(MinDistance, DistanceToOccluder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,11 +39,13 @@ uint PerBoneMotionBlur;
|
||||
|
||||
#if GPUSKIN_APEX_CLOTH
|
||||
/** Vertex buffer from which to read simulated positions of clothing. */
|
||||
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM4
|
||||
Buffer<float2> ClothSimulVertsPositionsNormals;
|
||||
Buffer<float2> PreviousClothSimulVertsPositionsNormals;
|
||||
/** Transform to apply when positions/normals are loaded */
|
||||
float4x4 ClothLocalToWorld;
|
||||
float4x4 PreviousClothLocalToWorld;
|
||||
#endif // #if FEATURE_LEVEL >= FEATURE_LEVEL_SM4
|
||||
/** blend weight between simulated positions and original key-framed animation */
|
||||
float ClothBlendWeight;
|
||||
#endif// #if GPUSKIN_APEX_CLOTH
|
||||
@@ -301,7 +303,6 @@ float3 GetClothSimulNormal(int Index, bool bPrevious)
|
||||
}
|
||||
|
||||
#endif //FEATURE_LEVEL >= FEATURE_LEVEL_SM4
|
||||
|
||||
}
|
||||
|
||||
float3 ClothingPosition(FClothVertex Input, bool bPrevious)
|
||||
|
||||
@@ -181,7 +181,7 @@ void HZBBuildCS(
|
||||
|
||||
void HZBBuildPS(float4 SvPosition : SV_POSITION, out float4 OutColor : SV_Target0)
|
||||
{
|
||||
float2 BufferUV = SvPosition * DispatchThreadIdToBufferUV.xy + DispatchThreadIdToBufferUV.zw;
|
||||
float2 BufferUV = SvPosition.xy * DispatchThreadIdToBufferUV.xy + DispatchThreadIdToBufferUV.zw;
|
||||
float4 DeviceZ = Gather4(ParentTextureMip, ParentTextureMipSampler, BufferUV);
|
||||
|
||||
float FurthestDeviceZ = min(min(DeviceZ.x, DeviceZ.y), min(DeviceZ.z, DeviceZ.w));
|
||||
|
||||
@@ -37,18 +37,20 @@ void HZBTestPS(float2 InUV : TEXCOORD0, out float4 OutColor : SV_Target0)
|
||||
// Screen rect from bounds
|
||||
float3 RectMin = float3( 1, 1, 1 );
|
||||
float3 RectMax = float3( -1, -1, -1 );
|
||||
UNROLL for( int i = 0; i < 8; i++ )
|
||||
{
|
||||
float3 PointSrc;
|
||||
PointSrc.x = Bounds[ (i >> 0) & 1 ].x;
|
||||
PointSrc.y = Bounds[ (i >> 1) & 1 ].y;
|
||||
PointSrc.z = Bounds[ (i >> 2) & 1 ].z;
|
||||
UNROLL for( int i = 0; i < 8; i++ )
|
||||
{
|
||||
float3 PointSrc;
|
||||
PointSrc.x = Bounds[ (i >> 0) & 1 ].x;
|
||||
PointSrc.y = Bounds[ (i >> 1) & 1 ].y;
|
||||
PointSrc.z = Bounds[ (i >> 2) & 1 ].z;
|
||||
|
||||
float4 PointClip = mul( float4( PointSrc, 1 ), View.TranslatedWorldToClip );
|
||||
float3 PointScreen = PointClip.xyz / PointClip.w;
|
||||
float4 PointClip = mul( float4( PointSrc, 1 ), View.TranslatedWorldToClip );
|
||||
float3 PointScreen = PointClip.xyz / PointClip.w;
|
||||
|
||||
RectMin = min( RectMin, PointScreen );
|
||||
RectMax = max( RectMax, PointScreen );
|
||||
RectMin = min( RectMin, PointScreen );
|
||||
RectMax = max( RectMax, PointScreen );
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -194,15 +196,17 @@ void HZBTestPS(float2 InUV : TEXCOORD0, out float4 OutColor : SV_Target0)
|
||||
float2 Bias = HZBUvFactor.xy * Rect.xy;
|
||||
|
||||
float4 MinDepth = 1;
|
||||
UNROLL for( int i = 0; i < 4; i++ )
|
||||
{
|
||||
// TODO could vectorize this
|
||||
float4 Depth;
|
||||
Depth.x = HZBTexture.SampleLevel( HZBSampler, float2( i, 0 ) * Scale + Bias, Level ).r;
|
||||
Depth.y = HZBTexture.SampleLevel( HZBSampler, float2( i, 1 ) * Scale + Bias, Level ).r;
|
||||
Depth.z = HZBTexture.SampleLevel( HZBSampler, float2( i, 2 ) * Scale + Bias, Level ).r;
|
||||
Depth.w = HZBTexture.SampleLevel( HZBSampler, float2( i, 3 ) * Scale + Bias, Level ).r;
|
||||
MinDepth = min( MinDepth, Depth );
|
||||
UNROLL for( int i = 0; i < 4; i++ )
|
||||
{
|
||||
// TODO could vectorize this
|
||||
float4 Depth;
|
||||
Depth.x = HZBTexture.SampleLevel( HZBSampler, float2( i, 0 ) * Scale + Bias, Level ).r;
|
||||
Depth.y = HZBTexture.SampleLevel( HZBSampler, float2( i, 1 ) * Scale + Bias, Level ).r;
|
||||
Depth.z = HZBTexture.SampleLevel( HZBSampler, float2( i, 2 ) * Scale + Bias, Level ).r;
|
||||
Depth.w = HZBTexture.SampleLevel( HZBSampler, float2( i, 3 ) * Scale + Bias, Level ).r;
|
||||
MinDepth = min( MinDepth, Depth );
|
||||
}
|
||||
}
|
||||
MinDepth.x = min( min(MinDepth.x, MinDepth.y), min(MinDepth.z, MinDepth.w) );
|
||||
|
||||
|
||||
@@ -423,20 +423,22 @@ void CSPropagate(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID
|
||||
ldsCacheOrigin -= int3(1,1,1);
|
||||
#endif
|
||||
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
// Get the cell index,from 0-215 (6x6x6 block). There will be 4 indices per thread (with some duplication)
|
||||
uint cellCacheIndex = laneID*4+i;
|
||||
if ( cellCacheIndex < (6*6*6) )
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
uint3 offset3;
|
||||
offset3.x = ( cellCacheIndex % 6 );
|
||||
offset3.y = ( cellCacheIndex / 6 ) % 6;
|
||||
offset3.z = cellCacheIndex / ( 6 * 6 );
|
||||
int3 cachedGridPos = ldsCacheOrigin + int3(offset3);
|
||||
LPVCell CachedThreadCell = ReadLpvCell( GetGridAddress(cachedGridPos) );
|
||||
// Get the cell index,from 0-215 (6x6x6 block). There will be 4 indices per thread (with some duplication)
|
||||
uint cellCacheIndex = laneID*4+i;
|
||||
if ( cellCacheIndex < (6*6*6) )
|
||||
{
|
||||
uint3 offset3;
|
||||
offset3.x = ( cellCacheIndex % 6 );
|
||||
offset3.y = ( cellCacheIndex / 6 ) % 6;
|
||||
offset3.z = cellCacheIndex / ( 6 * 6 );
|
||||
int3 cachedGridPos = ldsCacheOrigin + int3(offset3);
|
||||
LPVCell CachedThreadCell = ReadLpvCell( GetGridAddress(cachedGridPos) );
|
||||
|
||||
LDSCachedLPV[cellCacheIndex] = PackLPVCell( CachedThreadCell );
|
||||
LDSCachedLPV[cellCacheIndex] = PackLPVCell( CachedThreadCell );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -86,16 +86,20 @@ void PrefilterPlanarReflectionPS(
|
||||
float AccumulatedWeight = 0;
|
||||
|
||||
// Two texels at a time with bilinear
|
||||
LOOP
|
||||
for (float y = 0; y < KernelRadiusY; y += 2)
|
||||
{
|
||||
VerticalSample(BufferUV, y, KernelRadiusY, CenterDistanceToPlane, CenterReflectionDistance, false, AccumulatedColor, AccumulatedWeight);
|
||||
LOOP
|
||||
for (float y = 0; y < KernelRadiusY; y += 2)
|
||||
{
|
||||
VerticalSample(BufferUV, y, KernelRadiusY, CenterDistanceToPlane, CenterReflectionDistance, false, AccumulatedColor, AccumulatedWeight);
|
||||
}
|
||||
}
|
||||
|
||||
LOOP
|
||||
for (float y = -2; y > -KernelRadiusY; y -= 2)
|
||||
{
|
||||
VerticalSample(BufferUV, y, KernelRadiusY, CenterDistanceToPlane, CenterReflectionDistance, true, AccumulatedColor, AccumulatedWeight);
|
||||
LOOP
|
||||
for (float y = -2; y > -KernelRadiusY; y -= 2)
|
||||
{
|
||||
VerticalSample(BufferUV, y, KernelRadiusY, CenterDistanceToPlane, CenterReflectionDistance, true, AccumulatedColor, AccumulatedWeight);
|
||||
}
|
||||
}
|
||||
|
||||
AccumulatedColor = AccumulatedColor / max(AccumulatedWeight, .0001f);
|
||||
|
||||
@@ -2078,7 +2078,7 @@ FTAAHistoryPayload TemporalAASample(uint2 GroupId, uint2 GroupThreadId, uint Gro
|
||||
bool Dynamic7 = SceneVelocityBuffer.SampleLevel(SceneVelocityBufferSampler, InputParams.NearestBufferUV, 0, int2( 0, 1)).x > 0;
|
||||
|
||||
bool Dynamic = Dynamic1 || Dynamic3 || Dynamic4 || Dynamic5 || Dynamic7;
|
||||
IgnoreHistory = IgnoreHistory || !Dynamic && History.Color.a > 0;
|
||||
IgnoreHistory = IgnoreHistory || (!Dynamic && History.Color.a > 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -443,13 +443,15 @@ void RadixSort_Spine(
|
||||
}
|
||||
|
||||
// Scan raking totals.
|
||||
[unroll]
|
||||
for ( uint RakingOffset = 1; RakingOffset < RAKING_THREADS_PER_DIGIT; RakingOffset <<= 1 )
|
||||
{
|
||||
// Acquire LocalRakingTotals.
|
||||
PPS_BARRIER();
|
||||
Total = Total + LocalRakingTotals[RakingIndex - RakingOffset];
|
||||
LocalRakingTotals[RakingIndex] = Total;
|
||||
[unroll]
|
||||
for ( uint RakingOffset = 1; RakingOffset < RAKING_THREADS_PER_DIGIT; RakingOffset <<= 1 )
|
||||
{
|
||||
// Acquire LocalRakingTotals.
|
||||
PPS_BARRIER();
|
||||
Total = Total + LocalRakingTotals[RakingIndex - RakingOffset];
|
||||
LocalRakingTotals[RakingIndex] = Total;
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire LocalRakingTotals.
|
||||
@@ -459,13 +461,15 @@ void RadixSort_Spine(
|
||||
LocalTotals[ThreadId + DIGIT_SCAN_PADDING] = DigitSum;
|
||||
|
||||
// Scan local totals.
|
||||
[unroll]
|
||||
for ( uint RakingOffset = 1; RakingOffset < DIGIT_COUNT; RakingOffset <<= 1 )
|
||||
{
|
||||
// Acquire LocalTotals.
|
||||
PPS_BARRIER();
|
||||
DigitSum += LocalTotals[ThreadId + DIGIT_SCAN_PADDING - RakingOffset];
|
||||
LocalTotals[ThreadId + DIGIT_SCAN_PADDING] = DigitSum;
|
||||
[unroll]
|
||||
for ( uint RakingOffset = 1; RakingOffset < DIGIT_COUNT; RakingOffset <<= 1 )
|
||||
{
|
||||
// Acquire LocalTotals.
|
||||
PPS_BARRIER();
|
||||
DigitSum += LocalTotals[ThreadId + DIGIT_SCAN_PADDING - RakingOffset];
|
||||
LocalTotals[ThreadId + DIGIT_SCAN_PADDING] = DigitSum;
|
||||
}
|
||||
}
|
||||
|
||||
// Acquire LocalTotals.
|
||||
|
||||
@@ -84,7 +84,7 @@ void RemapClearCoatDiffuseAndSpecularColor(FGBufferData GBuffer, float2 ScreenPo
|
||||
//BaseColor += Dither / 255.f;
|
||||
DiffuseColor = BaseColor - BaseColor * GBuffer.Metallic;
|
||||
|
||||
float3 Specular = lerp(1, RefractionScale, ClearCoat);
|
||||
float Specular = lerp(1, RefractionScale, ClearCoat);
|
||||
SpecularColor = ComputeF0(Specular, BaseColor, GBuffer.Metallic);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,22 +97,26 @@ void AddUIntSymbols(inout float2 ScreenPos, in uint Value)
|
||||
uint SymbolCount = 1;
|
||||
uint Divisor = 1;
|
||||
uint TestValue = Value;
|
||||
for (uint i = 0; i < MaxSymbols-1; ++i)
|
||||
{
|
||||
TestValue /= 10;
|
||||
if (TestValue > 0)
|
||||
for (uint i = 0; i < MaxSymbols-1; ++i)
|
||||
{
|
||||
SymbolCount ++;
|
||||
Divisor *= 10;
|
||||
TestValue /= 10;
|
||||
if (TestValue > 0)
|
||||
{
|
||||
SymbolCount ++;
|
||||
Divisor *= 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint i = 0; i < SymbolCount; ++i)
|
||||
{
|
||||
uint Digit = Value / Divisor;
|
||||
AddSymbol(ScreenPos, GetDecimalSymbol((int)Digit));
|
||||
Value = Value - (Digit * Divisor);
|
||||
Divisor /= 10;
|
||||
for (uint i = 0; i < SymbolCount; ++i)
|
||||
{
|
||||
uint Digit = Value / Divisor;
|
||||
AddSymbol(ScreenPos, GetDecimalSymbol((int)Digit));
|
||||
Value = Value - (Digit * Divisor);
|
||||
Divisor /= 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ VertexToPixelInterpolants Main(
|
||||
|
||||
{
|
||||
UNROLL
|
||||
for( int CoordinateIndex = 0; CoordinateIndex < NUM_MATERIAL_TEXCOORDS_VERTEX; CoordinateIndex++ )
|
||||
for( uint CoordinateIndex = 0; CoordinateIndex < NUM_MATERIAL_TEXCOORDS_VERTEX; CoordinateIndex++ )
|
||||
{
|
||||
VertexParameters.TexCoords[CoordinateIndex] = UVArrays[CoordinateIndex];
|
||||
}
|
||||
@@ -76,17 +76,19 @@ VertexToPixelInterpolants Main(
|
||||
float2 CustomizedUVs[NUM_SLATE_TEXCOORDS];
|
||||
GetMaterialCustomizedUVs(VertexParameters, CustomizedUVs);
|
||||
|
||||
UNROLL
|
||||
for( uint CoordinateIndex = 0; CoordinateIndex < NUM_SLATE_TEXCOORDS; CoordinateIndex++ )
|
||||
{
|
||||
FLATTEN
|
||||
if (Mod(CoordinateIndex, 2))
|
||||
UNROLL
|
||||
for( uint CoordinateIndex = 0; CoordinateIndex < NUM_SLATE_TEXCOORDS; CoordinateIndex++ )
|
||||
{
|
||||
VOut.TextureCoordinates[CoordinateIndex / 2].zw = CustomizedUVs[CoordinateIndex];
|
||||
}
|
||||
else
|
||||
{
|
||||
VOut.TextureCoordinates[CoordinateIndex / 2].xy = CustomizedUVs[CoordinateIndex];
|
||||
FLATTEN
|
||||
if (Mod(CoordinateIndex, 2))
|
||||
{
|
||||
VOut.TextureCoordinates[CoordinateIndex / 2].zw = CustomizedUVs[CoordinateIndex];
|
||||
}
|
||||
else
|
||||
{
|
||||
VOut.TextureCoordinates[CoordinateIndex / 2].xy = CustomizedUVs[CoordinateIndex];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user