Files
UnrealEngineUWP/Engine/Source/Runtime/Renderer/Private/GPUFastFourierTransform.cpp
guillaume abadie 2e85f24b98 Reland 18322673: Moves the bloom's intensity to be applied before writing to the bloom rdg texture
This allows lens flares to work with FFT bloom without introducing additional memory bandwidth costs in the tonemapper.

#rb none
#preflight 61a63600361aa0b85b3c506b

#ROBOMERGE-AUTHOR: guillaume.abadie
#ROBOMERGE-SOURCE: CL 18324910 via CL 18374616 via CL 18374634
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v895-18170469)

[CL 18374649 by guillaume abadie in ue5-release-engine-test branch]
2021-12-03 17:20:45 -05:00

1365 lines
49 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "GPUFastFourierTransform.h"
#include "SceneUtils.h"
#include "GlobalShader.h"
#include "RenderGraph.h"
#include "ShaderCompilerCore.h"
#include "SystemTextures.h"
bool ShouldCompileGPUFFT(EShaderPlatform Platform)
{
return FDataDrivenShaderPlatformInfo::GetSupportsFFTBloom(Platform);
}
uint32 GPUFFT::MaxScanLineLength()
{
return 4096;
}
bool GPUFFT::IsHorizontal(const FFT_XFORM_TYPE& XFormType)
{
return (XFormType == FFT_XFORM_TYPE::FORWARD_HORIZONTAL || XFormType == FFT_XFORM_TYPE::INVERSE_HORIZONTAL);
}
bool GPUFFT::IsForward(const FFT_XFORM_TYPE& XFormType)
{
return (XFormType == FFT_XFORM_TYPE::FORWARD_HORIZONTAL || XFormType == FFT_XFORM_TYPE::FORWARD_VERTICAL);
}
GPUFFT::FFTDescription::FFTDescription(const GPUFFT::FFT_XFORM_TYPE& XForm, const FIntPoint& XFormExtent)
:XFormType(XForm)
{
if (GPUFFT::IsHorizontal(XFormType))
{
SignalLength = XFormExtent.X;
NumScanLines = XFormExtent.Y;
}
else
{
SignalLength = XFormExtent.Y;
NumScanLines = XFormExtent.X;
}
}
FIntPoint GPUFFT::FFTDescription::TransformExtent() const
{
const bool bIsHornizontal = GPUFFT::IsHorizontal(XFormType);
FIntPoint Extent = (bIsHornizontal) ? FIntPoint(SignalLength, NumScanLines) : FIntPoint(NumScanLines, SignalLength);
return Extent;
}
bool GPUFFT::FFTDescription::IsHorizontal() const
{
return GPUFFT::IsHorizontal(XFormType);
}
bool GPUFFT::FFTDescription::IsForward() const
{
return GPUFFT::IsForward(XFormType);
}
const TCHAR* GPUFFT::FFTDescription::FFT_TypeName() const
{
if (XFormType == FFT_XFORM_TYPE::FORWARD_HORIZONTAL) return TEXT("Forward Horizontal");
if (XFormType == FFT_XFORM_TYPE::INVERSE_HORIZONTAL) return TEXT("Inverse Horizontal");
if (XFormType == FFT_XFORM_TYPE::FORWARD_VERTICAL) return TEXT("Forward Vertical");
if (XFormType == FFT_XFORM_TYPE::INVERSE_VERTICAL) return TEXT("Inverse Vertical");
unimplemented();
return TEXT("Error");
}
namespace GPUFFT
{
// Encode the transform type in the lower two bits
static uint32 BitEncode(const GPUFFT::FFT_XFORM_TYPE& XFormType)
{
// put a 1 in the low bit for Horizontal
uint32 BitEncodedValue = GPUFFT::IsHorizontal(XFormType) ? 1 : 0;
// put a 1 in the second lowest bit for forward
if (GPUFFT::IsForward(XFormType))
{
BitEncodedValue |= 2;
}
return BitEncodedValue;
}
/**
* Computes the minimal number of bits required to represent the in number N
*/
uint32 BitSize(uint32 N)
{
uint32 Result = 0;
while (N > 0) {
N = N >> 1;
Result++;
}
return Result;
}
/**
* Decompose the input PowTwoLenght, as PowTwoLength = N X PowTwoBase X PowTwoBase X .. X PowTwoBase
* NB: This assumes the PowTwoLength and PowTwoBase are powers of two.
*/
TArray<uint32> GetFactors(const uint32 PowTwoLength, const uint32 PowTwoBase)
{
TArray<uint32> FactorList;
// Early out.
if (!FMath::IsPowerOfTwo(PowTwoLength) || !FMath::IsPowerOfTwo(PowTwoBase)) return FactorList;
const uint32 LogTwoLength = BitSize(PowTwoLength) - 1;
const uint32 LogTwoBase = BitSize(PowTwoBase) - 1;
const uint32 RemainderPower = LogTwoLength % LogTwoBase;
const uint32 BasePower = LogTwoLength / LogTwoBase;
for (uint32 idx = 0; idx < BasePower; idx++)
{
FactorList.Add(PowTwoBase);
}
if (RemainderPower != 0)
{
uint32 Factor = 1 << RemainderPower;
FactorList.Add(Factor);
}
return FactorList;
}
void SwapContents(FRDGTextureRef& TmpBuffer, FRDGTextureRef& DstBuffer)
{
// Swap the pointers
FRDGTextureRef TmpDst = DstBuffer;
DstBuffer = TmpBuffer;
TmpBuffer = TmpDst;
}
}
namespace
{
class FPowRadixSignalLengthDim : SHADER_PERMUTATION_SPARSE_INT("SCAN_LINE_LENGTH", 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096);
class FFFTShader : public FGlobalShader
{
public:
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
return ShouldCompileGPUFFT(Parameters.Platform);
}
FFFTShader() = default;
FFFTShader(const CompiledShaderInitializerType& Initializer)
: FGlobalShader(Initializer)
{}
};
class FReorderFFTPassCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FReorderFFTPassCS);
SHADER_USE_PARAMETER_STRUCT(FReorderFFTPassCS, FFFTShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntRect, SrcRect)
SHADER_PARAMETER(FIntRect, DstRect)
SHADER_PARAMETER(int32, TransformType)
SHADER_PARAMETER(int32, LogTwoLength)
SHADER_PARAMETER(int32, BitCount)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcSRV)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstUAV)
END_SHADER_PARAMETER_STRUCT()
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_REORDER_FFT_PASS"), 1);
}
};
class FGroupShardSubFFTPassCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FGroupShardSubFFTPassCS);
SHADER_USE_PARAMETER_STRUCT(FGroupShardSubFFTPassCS, FFFTShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntRect, SrcWindow)
SHADER_PARAMETER(int32, TransformType)
SHADER_PARAMETER(int32, TransformLength)
SHADER_PARAMETER(int32, NumSubRegions)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcTexture)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstTexture)
END_SHADER_PARAMETER_STRUCT()
static uint32 SubPassLength() { return 2048; }
static uint32 Radix() { return 2; }
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_GROUP_SHARED_SUB_COMPLEX_FFT"), 1);
OutEnvironment.SetDefine(TEXT("SCAN_LINE_LENGTH"), FGroupShardSubFFTPassCS::SubPassLength());
OutEnvironment.SetDefine(TEXT("RADIX"), FGroupShardSubFFTPassCS::Radix());
}
};
class FComplexFFTPassCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FComplexFFTPassCS);
SHADER_USE_PARAMETER_STRUCT(FComplexFFTPassCS, FFFTShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntRect, SrcRect)
SHADER_PARAMETER(FIntRect, DstRect)
SHADER_PARAMETER(int32, TransformType)
SHADER_PARAMETER(int32, PowTwoLength)
SHADER_PARAMETER(int32, BitCount)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcSRV)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, DstPostFilterParameters)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstUAV)
END_SHADER_PARAMETER_STRUCT()
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_COMPLEX_FFT_PASS"), 1);
}
};
class FPackTwoForOneFFTPassCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FPackTwoForOneFFTPassCS);
SHADER_USE_PARAMETER_STRUCT(FPackTwoForOneFFTPassCS, FFFTShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntRect, DstRect)
SHADER_PARAMETER(int32, TransformType)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcSRV)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstUAV)
END_SHADER_PARAMETER_STRUCT()
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_PACK_TWOFORONE_FFT_PASS"), 1);
}
};
class FCopyWindowCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FCopyWindowCS);
SHADER_USE_PARAMETER_STRUCT(FCopyWindowCS, FFFTShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntRect, SrcRect)
SHADER_PARAMETER(FIntRect, DstRect)
SHADER_PARAMETER(FVector3f, BrightPixelGain)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcSRV)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstUAV)
END_SHADER_PARAMETER_STRUCT()
static FIntPoint ThreadCount() { return FIntPoint(1, 32); }
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FFFTShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_COPY_WINDOW"), 1);
OutEnvironment.SetDefine(TEXT("X_THREAD_COUNT"), ThreadCount().X);
OutEnvironment.SetDefine(TEXT("Y_THREAD_COUNT"), ThreadCount().Y);
}
};
class FComplexMultiplyImagesCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FComplexMultiplyImagesCS);
SHADER_USE_PARAMETER_STRUCT(FComplexMultiplyImagesCS, FFFTShader);
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntRect, SrcRect)
SHADER_PARAMETER(int32, DataLayout)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcSRV)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, KnlSRV)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstUAV)
END_SHADER_PARAMETER_STRUCT()
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_COMPLEX_MULTIPLY_IMAGES"), 1);
}
};
class FGSComplexTransformCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FGSComplexTransformCS);
SHADER_USE_PARAMETER_STRUCT(FGSComplexTransformCS, FFFTShader);
using FPermutationDomain = TShaderPermutationDomain<FPowRadixSignalLengthDim>;
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntPoint, SrcRectMin)
SHADER_PARAMETER(FIntPoint, SrcRectMax)
SHADER_PARAMETER(FIntPoint, DstExtent)
SHADER_PARAMETER(FIntRect, DstRect)
SHADER_PARAMETER(FVector3f, BrightPixelGain)
SHADER_PARAMETER(int32, TransformType)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcTexture)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstTexture)
END_SHADER_PARAMETER_STRUCT()
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FPermutationDomain PermutationVector(Parameters.PermutationId);
int32 PowRadixSignalLength = PermutationVector.Get<FPowRadixSignalLengthDim>();
FFFTShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_GROUP_SHARED_COMPLEX_FFT"), 1);
if (PowRadixSignalLength > 8)
{
OutEnvironment.SetDefine(TEXT("MIXED_RADIX"), 1);
}
}
};
class FGSTwoForOneTransformCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FGSTwoForOneTransformCS);
SHADER_USE_PARAMETER_STRUCT(FGSTwoForOneTransformCS, FFFTShader);
using FPermutationDomain = TShaderPermutationDomain<FPowRadixSignalLengthDim>;
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntPoint, SrcRectMin)
SHADER_PARAMETER(FIntPoint, SrcRectMax)
SHADER_PARAMETER(FIntPoint, DstExtent)
SHADER_PARAMETER(FIntRect, DstRect)
SHADER_PARAMETER(FVector3f, BrightPixelGain)
SHADER_PARAMETER(int32, TransformType)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcTexture)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, DstPostFilterParameters)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstTexture)
END_SHADER_PARAMETER_STRUCT()
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FPermutationDomain PermutationVector(Parameters.PermutationId);
int32 PowRadixSignalLength = PermutationVector.Get<FPowRadixSignalLengthDim>();
FFFTShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_GROUP_SHARED_TWO_FOR_ONE_FFT"), 1);
if (PowRadixSignalLength > 8)
{
OutEnvironment.SetDefine(TEXT("MIXED_RADIX"), 1);
}
}
};
class FGSConvolutionWithTextureCS : public FFFTShader
{
DECLARE_GLOBAL_SHADER(FGSConvolutionWithTextureCS);
SHADER_USE_PARAMETER_STRUCT(FGSConvolutionWithTextureCS, FFFTShader);
using FPermutationDomain = TShaderPermutationDomain<FPowRadixSignalLengthDim>;
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER(FIntPoint, SrcRectMin)
SHADER_PARAMETER(FIntPoint, SrcRectMax)
SHADER_PARAMETER(FIntPoint, DstExtent)
SHADER_PARAMETER(int32, TransformType)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, SrcTexture)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, FilterTexture)
SHADER_PARAMETER_RDG_TEXTURE_UAV(RWTexture2D, DstTexture)
END_SHADER_PARAMETER_STRUCT()
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FPermutationDomain PermutationVector(Parameters.PermutationId);
int32 PowRadixSignalLength = PermutationVector.Get<FPowRadixSignalLengthDim>();
FFFTShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("INCLUDE_GROUP_SHARED_CONVOLUTION_WITH_TEXTURE"), 1);
if (PowRadixSignalLength > 8)
{
OutEnvironment.SetDefine(TEXT("MIXED_RADIX"), 1);
}
}
};
IMPLEMENT_GLOBAL_SHADER(FReorderFFTPassCS, "/Engine/Private/GPUFastFourierTransform.usf", "ReorderFFTPassCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FGroupShardSubFFTPassCS, "/Engine/Private/GPUFastFourierTransform.usf", "GroupSharedSubComplexFFTCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FComplexFFTPassCS, "/Engine/Private/GPUFastFourierTransform.usf", "ComplexFFTPassCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FPackTwoForOneFFTPassCS, "/Engine/Private/GPUFastFourierTransform.usf", "PackTwoForOneFFTPassCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FCopyWindowCS, "/Engine/Private/GPUFastFourierTransform.usf", "CopyWindowCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FComplexMultiplyImagesCS, "/Engine/Private/GPUFastFourierTransform.usf", "ComplexMultiplyImagesCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FGSComplexTransformCS, "/Engine/Private/GPUFastFourierTransform.usf", "GroupSharedComplexFFTCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FGSTwoForOneTransformCS, "/Engine/Private/GPUFastFourierTransform.usf", "GroupSharedTwoForOneFFTCS", SF_Compute);
IMPLEMENT_GLOBAL_SHADER(FGSConvolutionWithTextureCS, "/Engine/Private/GPUFastFourierTransform.usf", "GSConvolutionWithTextureCS", SF_Compute);
/**
* Single Pass of a multi-pass complex FFT that just reorders data for
* a group shared subpass to consume.
*
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interpt as a pair of independent complex numbers.
*
* @param FFTDesc - Metadata that describes the underlying complex FFT
* @param SrcRct - The region in the Src buffer where the image to transform lives.
* @param SrcTexture - SRV the source image buffer
* @param DstRect - The target Window.
* @param DstUAV - UAV, the destination buffer that will hold the result of the single pass
*
*/
void DispatchReorderFFTPassCS(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const GPUFFT::FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef DstTexture, const FIntRect& DstWindow,
const bool bScrubNaNs = false)
{
// Using multiple radix two passes.
const uint32 Radix = 2;
uint32 TransformLength = FFTDesc.SignalLength;
// Breaks the data into the correct number of sub-transforms for the later group-shared pass.
uint32 PowTwoSubLengthCount = TransformLength / FGroupShardSubFFTPassCS::SubPassLength();
FReorderFFTPassCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FReorderFFTPassCS::FParameters>();
PassParameters->SrcRect = SrcWindow;
PassParameters->DstRect = DstWindow;
{
PassParameters->TransformType = GPUFFT::BitEncode(FFTDesc.XFormType);
if (bScrubNaNs)
{
PassParameters->TransformType |= 4;
}
}
PassParameters->LogTwoLength = GPUFFT::BitSize(FFTDesc.SignalLength) - 1;
PassParameters->BitCount = GPUFFT::BitSize(PowTwoSubLengthCount) - 1;
PassParameters->SrcSRV = SrcTexture;
PassParameters->DstUAV = GraphBuilder.CreateUAV(DstTexture);
TShaderMapRef<FReorderFFTPassCS> ComputeShader(ShaderMap);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT Multipass: Complex %s Reorder pass of size %d", FFTDesc.FFT_TypeName(), TransformLength),
FFTDesc.ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, TransformLength / Radix));
}
/**
* A Group Shared Single Pass of a multi-pass complex FFT.
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interpt as a pair of independent complex numbers.
*
* @param FFTDesc - Metadata that describes the underlying complex FFT
* @param SrcTexture - SRV the source image buffer
* @param SrcRct - The region in the Src buffer where the image to transform lives.
* @param DstUAV - UAV, the destination buffer that will hold the result of the single pass
*
*/
void DispatchGSSubComplexFFTPassCS(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const GPUFFT::FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef DstTexture)
{
const uint32 TransformLength = FFTDesc.SignalLength;
const uint32 NumSubRegions = TransformLength / FGroupShardSubFFTPassCS::SubPassLength();
// The window on which a single transform acts.
FIntRect SubPassWindow = SrcWindow;
if (FFTDesc.IsHorizontal())
{
SubPassWindow.Max.X = SubPassWindow.Min.X + FGroupShardSubFFTPassCS::SubPassLength();
}
else
{
SubPassWindow.Max.Y = SubPassWindow.Min.Y + FGroupShardSubFFTPassCS::SubPassLength();
}
const uint32 NumSignals = FFTDesc.IsHorizontal() ? SubPassWindow.Size().Y : SubPassWindow.Size().X;
FGroupShardSubFFTPassCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FGroupShardSubFFTPassCS::FParameters>();
PassParameters->SrcWindow = SrcWindow;
PassParameters->TransformType = GPUFFT::BitEncode(FFTDesc.XFormType);
PassParameters->TransformLength = TransformLength;
PassParameters->NumSubRegions = NumSubRegions;
PassParameters->SrcTexture = SrcTexture;
PassParameters->DstTexture = GraphBuilder.CreateUAV(DstTexture);
TShaderMapRef<FGroupShardSubFFTPassCS> ComputeShader(ShaderMap);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT Multipass: %d GS Subpasses Complex %s of size %d",
NumSubRegions, FFTDesc.FFT_TypeName(), FGroupShardSubFFTPassCS::SubPassLength()),
FFTDesc.ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, NumSignals));
}
/**
* Single Pass of a multi-pass complex FFT.
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interpt as a pair of independent complex numbers.
*
* @param FFTDesc - Metadata that describes the underlying complex FFT
* @param PassStage - The Depth at which this FFT pass lives.
* @param SrcTexture - SRV the source image buffer
* @param SrcRct - The region in the Src buffer where the image to transform lives.
* @param DstUAV - UAV, the destination buffer that will hold the result of the single pass
*
*/
void DispatchComplexFFTPassCS(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const GPUFFT::FFTDescription& FFTDesc,
const uint32 PassLength,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGBufferRef DstPostFilterParameters,
FRDGTextureRef DstTexture)
{
// Using multiple radix two passes.
const uint32 Radix = 2;
const uint32 TransformLength = FFTDesc.SignalLength;
// Translate the transform type.
uint32 TransformTypeValue = GPUFFT::BitEncode(FFTDesc.XFormType);
if (!DstPostFilterParameters)
{
DstPostFilterParameters = GSystemTextures.GetDefaultStructuredBuffer(GraphBuilder, sizeof(FVector4f), FVector4f(1.0f, 1.0f, 1.0f, 1.0f));
}
FComplexFFTPassCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FComplexFFTPassCS::FParameters>();
PassParameters->SrcRect = SrcWindow;
PassParameters->DstRect = FIntRect(FIntPoint(0, 0), FFTDesc.TransformExtent());
PassParameters->TransformType = TransformTypeValue;
PassParameters->PowTwoLength = PassLength;
PassParameters->BitCount = GPUFFT::BitSize(TransformLength);
PassParameters->SrcSRV = SrcTexture;
PassParameters->DstPostFilterParameters = GraphBuilder.CreateSRV(DstPostFilterParameters);
PassParameters->DstUAV = GraphBuilder.CreateUAV(DstTexture);
TShaderMapRef<FComplexFFTPassCS> ComputeShader(ShaderMap);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT Multipass: Pass %d of Complex %s of size %d", PassLength, FFTDesc.FFT_TypeName(), TransformLength),
FFTDesc.ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, TransformLength / Radix));
}
/**
* Single Pass that separates or merges the transform of four real signals from
* viewed as the transform of two complex signals.
*
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interprets as a pair of independent complex numbers.
*
* @param FFTDesc - Metadata that describes the underlying complex FFT
* @param SrcTexture - SRV the source image buffer
* @param DstUAV - UAV, the destination buffer that will hold the result of the single pass
*
* The data in the SrcTexture and DstUAV is aligned with (0,0).
* FFTDesc.IsHorizontal() indicates the transform direction in the buffer that needs to be spit/merged
* FFTDesc.IsForward() indicates spit (true) vs merge (false).
*/
void DispatchPackTwoForOneFFTPassCS(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const GPUFFT::FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture,
FRDGTextureRef DstTexture)
{
const uint32 TransformLength = FFTDesc.SignalLength;
// A real signal of length 'TransformLenght' requires only TransformLength / 2 + 1 complex coefficients,
const uint32 RealTransformLength = ( TransformLength / 2 ) + 1;
// The splitting into two real signals (isForward) or joinging back into a single signal
const uint32 ResultingLength = (FFTDesc.IsForward()) ? 2 * RealTransformLength : TransformLength;
FIntPoint DstExtent = FFTDesc.TransformExtent();
if (FFTDesc.IsHorizontal())
{
DstExtent.X = ResultingLength;
}
else
{
DstExtent.Y = ResultingLength;
}
FPackTwoForOneFFTPassCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FPackTwoForOneFFTPassCS::FParameters>();
PassParameters->DstRect = FIntRect(FIntPoint(0, 0), DstExtent);
PassParameters->TransformType = GPUFFT::BitEncode(FFTDesc.XFormType);
PassParameters->SrcSRV = SrcTexture;
PassParameters->DstUAV = GraphBuilder.CreateUAV(DstTexture);
TShaderMapRef<FPackTwoForOneFFTPassCS> ComputeShader(ShaderMap);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT Multipass: TwoForOne Combine/split result of %s of size %d", FFTDesc.FFT_TypeName(), TransformLength),
FFTDesc.ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, RealTransformLength));
}
/**
* Single Pass that Copies a sub-region of a buffer and potentially
* boosts the intensity of select pixels.
*
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interpreted as a pair of independent complex numbers.
* The Knl float4 data is interpreted as a pair of independent complex numbers
*
* @param SrcWindow - The region of interest to copy.
* @param SrcTexture - SRV the source image buffer.
* @param DstWindow - The target location for the images.
*
* @param DstUAV - UAV, the destination buffer that will hold the result of the single pass
* @param Prefilter - Optional filter to boost selected pixels.
*/
void DispatchCopyWindowCS(
FRDGBuilder& GraphBuilder,
ERDGPassFlags ComputePassFlags,
const FGlobalShaderMap* ShaderMap,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef DstTexture, const FIntRect& DstWindow,
const GPUFFT::FPreFilter& PreFilter = GPUFFT::FPreFilter(TNumericLimits<float>::Max(), TNumericLimits<float>::Lowest(), 0.f))
{
FCopyWindowCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FCopyWindowCS::FParameters>();
PassParameters->SrcRect = SrcWindow;
PassParameters->DstRect = DstWindow;
PassParameters->BrightPixelGain = PreFilter;
PassParameters->SrcSRV = SrcTexture;
PassParameters->DstUAV = GraphBuilder.CreateUAV(DstTexture);
TShaderMapRef<FCopyWindowCS> ComputeShader(ShaderMap);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT Multipass: Copy Subwindow"),
ComputePassFlags,
ComputeShader,
PassParameters,
FComputeShaderUtils::GetGroupCount(DstWindow.Size(), FCopyWindowCS::ThreadCount()));
}
/**
*
* Single Pass that computes the Frequency Space convolution of two buffers
* that have already been transformed into frequency space.
*
* This is really means the complex product of two buffers divided
* by the correct values to "normalize" the effect of the kernel buffer.
*
* In this case, each float4 is viewed as a pair of complex numbers,
* and the product of float4 Src, Knl is computed
* as float4(ComplexMult(Src.xy, Knl.xy) / Na, ComplexMult(Src.zw, Knl.zw)/Nb)
* where Na and Nb are related to the sums of the weights in the kernel buffer.
*
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interpreted as a pair of independent complex numbers.
* The Knl float4 data is interpreted as a pair of independent complex numbers
*
* @param bHorizontalFirst - Describes the layout of the transformed data.
* bHorizontalFirst == true for data that was transformed as
* Vertical::ComplexFFT following Horizontal::TwoForOneRealFFT
* bHorizontalFirst == false for data that was transformed as
* Horizontal::ComplexFFT following Vertical::TwoForOneRealFFT
* @param SrcTexture - SRV the source image buffer.
* @param KnlTexture - SRV the kernel image buffer.
*
* @param DstUAV - UAV, the destination buffer that will hold the result of the single pass
*
*/
void DispatchComplexMultiplyImagesCS(
FRDGBuilder& GraphBuilder,
ERDGPassFlags ComputePassFlags,
const FGlobalShaderMap* ShaderMap,
const bool bHorizontalScanlines,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef KnlTexure,
FRDGTextureRef DstTexture)
{
// The size of the dst
const FIntPoint DstExtent = SrcWindow.Size();
// Align the scanlines in the direction of the first transform.
const uint32 NumScanLines = (bHorizontalScanlines) ? DstExtent.Y : DstExtent.X;
const uint32 SignalLength = (!bHorizontalScanlines) ? DstExtent.Y : DstExtent.X;
FComplexMultiplyImagesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FComplexMultiplyImagesCS::FParameters>();
PassParameters->SrcRect = SrcWindow;
PassParameters->DataLayout = (bHorizontalScanlines) ? 1 : 0;
PassParameters->SrcSRV = SrcTexture;
PassParameters->KnlSRV = KnlTexure;
PassParameters->DstUAV = GraphBuilder.CreateUAV(DstTexture);
TShaderMapRef<FComplexMultiplyImagesCS> ComputeShader(ShaderMap);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT Multipass: Convolution in freq-space"),
ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, NumScanLines));
}
/**
* Complex 1D FFT of two independent complex signals.
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interpt as a pair of independent complex numbers.
*
* @param Context - container for RHI and ShanderMap
* @param FFTDesc - Metadata that describes the underlying complex FFT
* @param SrcTexture - SRV the source image buffer
* @param SrcRct - The region in the Src buffer where the image to transform lives.
* @param DstUAV - UAV, the destination buffer that will hold the result of the 1d complex FFT
*
*/
void DispatchGSComplexFFTCS(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const GPUFFT::FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcRect,
FRDGTextureRef DstTexture)
{
const uint32 TransformLength = FFTDesc.SignalLength;
const FIntPoint DstExtent = FFTDesc.TransformExtent();
// The number of signals to transform simultaneously (i.e. number of scan lines)
const uint32 NumSignals = FFTDesc.IsHorizontal() ? SrcRect.Size().Y : SrcRect.Size().X;
FGSComplexTransformCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FGSComplexTransformCS::FParameters>();
PassParameters->SrcRectMin = SrcRect.Min;
PassParameters->SrcRectMax = SrcRect.Max;
PassParameters->DstExtent = DstExtent;
PassParameters->DstRect = FIntRect(FIntPoint(0, 0), DstExtent);
PassParameters->BrightPixelGain = GPUFFT::FPreFilter(TNumericLimits<float>::Max(), TNumericLimits<float>::Lowest(), 0.f);
{
// Translate the transform type.
PassParameters->TransformType = GPUFFT::BitEncode(FFTDesc.XFormType);
// We have a valid prefilter if the min is less than the max
if (PassParameters->BrightPixelGain.Component(0) < PassParameters->BrightPixelGain.Component(1))
{
// Encode a bool to turn on the pre-filter.
PassParameters->TransformType |= 4;
}
}
PassParameters->SrcTexture = SrcTexture;
PassParameters->DstTexture = GraphBuilder.CreateUAV(DstTexture);
FGSComplexTransformCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FPowRadixSignalLengthDim>(TransformLength);
TShaderMapRef<FGSComplexTransformCS> ComputeShader(ShaderMap, PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT: Complex %s of size %d", FFTDesc.FFT_TypeName(), TransformLength),
FFTDesc.ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, NumSignals));
}
/**
* Real 1D FFT of four independent real signals.
* Assumes the dst buffer is large enough to hold the result.
* The Src float4 data is interptd as 4 independent real numbers.
* The Dst float4 data will be two complex numbers.
*
* @param Context - container for RHI and ShanderMap
* @param FFTDesc - Metadata that describes the underlying complex FFT
* @param SrcTexture - SRV the source image buffer
* @param SrcRct - The region in the Src buffer where the image to transform lives.
* @param DstUAV - UAV, the destination buffer that will hold the result of the 1d complex FFT
* @param DstRect - Where to write the tranform data in the Dst buffer
* @param PreFilter - Used to boost the intensity of already bright pixels.
*
*/
void DispatchGSTwoForOneFFTCS(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const GPUFFT::FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcRect,
FRDGTextureRef DstTexture, const FIntRect& DstRect,
const GPUFFT::FPreFilter& PreFilter,
FRDGBufferRef DstPostFilterParameters)
{
const uint32 TransformLength = FFTDesc.SignalLength;
// The number of signals to transform simultaneously (i.e. number of scan lines)
const uint32 NumScanLines = (FFTDesc.IsHorizontal()) ? SrcRect.Size().Y : SrcRect.Size().X;
if (!DstPostFilterParameters)
{
DstPostFilterParameters = GSystemTextures.GetDefaultStructuredBuffer(GraphBuilder, sizeof(FVector4f), FVector4f(1.0f, 1.0f, 1.0f, 1.0f));
}
FGSTwoForOneTransformCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FGSTwoForOneTransformCS::FParameters>();
PassParameters->SrcRectMin = SrcRect.Min;
PassParameters->SrcRectMax = SrcRect.Max;
PassParameters->DstExtent = DstRect.Size();
PassParameters->DstRect = DstRect;
PassParameters->BrightPixelGain = PreFilter;
{
// Translate the transform type.
PassParameters->TransformType = GPUFFT::BitEncode(FFTDesc.XFormType);
// We have a valid prefilter if the min is less than the max
if (PassParameters->BrightPixelGain.Component(0) < PassParameters->BrightPixelGain.Component(1))
{
// Encode a bool to turn on the pre-filter.
PassParameters->TransformType |= 4;
}
}
PassParameters->SrcTexture = SrcTexture;
PassParameters->DstPostFilterParameters = GraphBuilder.CreateSRV(DstPostFilterParameters);
PassParameters->DstTexture = GraphBuilder.CreateUAV(DstTexture);
FGSTwoForOneTransformCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FPowRadixSignalLengthDim>(TransformLength);
TShaderMapRef<FGSTwoForOneTransformCS> ComputeShader(ShaderMap, PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT: Two-For-One %s of size %d of buffer %d x %d", FFTDesc.FFT_TypeName(), TransformLength, SrcRect.Size().X, SrcRect.Size().Y),
FFTDesc.ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, NumScanLines));
}
/**
* Complex 1D FFT followed by multiplication in with kernel and inverse transform.
*
* @param Context - container for RHI and ShanderMap
* @param FFTDesc - Metadata that describes the underlying complex FFT
* @param PreTransformedKernel - Pre-transformed kernel used in the convolution.
* @param SrcTexture - SRV the source image buffer
* @param SrcRct - The region in the Src buffer where the image to transform lives.
* @param DstUAV - UAV, the destination buffer that will hold the result of the 1d complex FFT
* @param DstRect - Where to write the tranform data in the Dst buffer
*
*/
void DispatchGSConvolutionWithTextureCS(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const GPUFFT::FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcRect,
FRDGTextureRef PreTransformedKernel,
FRDGTextureRef DstTexture)
{
const uint32 SignalLength = FFTDesc.SignalLength;
const bool bIsHornizontal = FFTDesc.IsHorizontal();
const FIntPoint& SrcRectSize = SrcRect.Size();
// The number of signals to transform simultaneously (i.e. number of scan lines)
// NB: This may be different from the FFTDesc.NumScanlines.
const uint32 NumSignals = (bIsHornizontal) ? SrcRectSize.Y : SrcRectSize.X;
FGSConvolutionWithTextureCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FGSConvolutionWithTextureCS::FParameters>();
PassParameters->SrcRectMin = SrcRect.Min;
PassParameters->SrcRectMax = SrcRect.Max;
PassParameters->DstExtent = SrcRect.Size();
{
PassParameters->TransformType = GPUFFT::BitEncode(FFTDesc.XFormType);
const bool bUseAlpha = true;
if (bUseAlpha)
{
PassParameters->TransformType |= 8;
}
}
PassParameters->SrcTexture = SrcTexture;
PassParameters->FilterTexture = PreTransformedKernel;
PassParameters->DstTexture = GraphBuilder.CreateUAV(DstTexture);
FGSConvolutionWithTextureCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FPowRadixSignalLengthDim>(SignalLength);
TShaderMapRef<FGSConvolutionWithTextureCS> ComputeShader(ShaderMap, PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FFT: Apply %s Transform, Multiply Texture, and InverseTransform size %d of buffer %d x %d", FFTDesc.FFT_TypeName(), SignalLength, SrcRectSize.X, SrcRectSize.Y),
FFTDesc.ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, NumSignals));
}
bool FitsInGroupSharedMemory(const uint32& Length)
{
const bool bFitsInGroupSharedMemory = !(GPUFFT::MaxScanLineLength() < Length);
return bFitsInGroupSharedMemory;
}
bool FitsInGroupSharedMemory(const GPUFFT::FFTDescription& FFTDesc)
{
return FitsInGroupSharedMemory(FFTDesc.SignalLength);
}
} // namespace
void GPUFFT::CopyImage2D(
FRDGBuilder& GraphBuilder,
ERDGPassFlags ComputePassFlags,
const FGlobalShaderMap* ShaderMap,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef DstTexture, const FIntRect& DstWindow,
const FPreFilter& PreFilter)
{
DispatchCopyWindowCS(GraphBuilder, ComputePassFlags, ShaderMap, SrcTexture, SrcWindow, DstTexture, DstWindow, PreFilter);
}
void GPUFFT::ComplexFFTImage1D::MultiPass(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef DstTexture,
FRDGBufferRef PostFilterParameters,
const bool bScrubNaNs)
{
if (FitsInGroupSharedMemory(FFTDesc))
{
return DispatchGSComplexFFTCS(
GraphBuilder, ShaderMap,
FFTDesc,
SrcTexture, SrcWindow,
DstTexture);
}
RDG_EVENT_SCOPE(GraphBuilder, "ComplexFFTImage1D");
const uint32 TransformLength = FFTDesc.SignalLength;
const FFT_XFORM_TYPE XFormType = FFTDesc.XFormType;
// The direction of the transform must be a power of two.
check(FMath::IsPowerOfTwo(TransformLength));
// The number of iterations required.
const uint32 Log2TransformLength = BitSize(TransformLength) - 1;
FIntPoint DstExtent = FFTDesc.TransformExtent();
const FIntRect XFormWindow(FIntPoint(0, 0), DstExtent);
// Testing code branch: Breaks the transform into Log2(TransformLength) number of passes.
// this is the slowest algorithm, and uses no group-shared storage.
#if 0
{
DispatchComplexFFTPassCS(Context, FFTDesc, 1, SrcTexture, Window, Targets.DstTarget().UAV, bScrubNaNs);
for (uint32 Ns = 2; Ns < TransformLength; Ns *= 2)
{
Targets.Swap();
auto HasValidTargets = [&Targets, &DstExtent]()->bool
{
// Verify that the buffers being used are big enough. Note that we are checking the "src" buffer, but due
// to the double buffering we will end up testing both buffers.
FIntPoint SrcBufferSize = Targets.SrcTarget().ShaderResourceTexture->GetTexture2D()->GetSizeXY();
bool Fits = !(SrcBufferSize.X < DstExtent.X) && !(SrcBufferSize.Y < DstExtent.Y);
return Fits;
};
checkf(HasValidTargets(), TEXT("FFT: Allocated Buffers too small."));
DispatchComplexFFTPassCS(Context, FFTDesc, Ns, Targets.SrcTarget().ShaderResourceTexture, XFormWindow, Targets.DstTarget().UAV);
}
// If this transform requires an even number of passes
// this swap will insure that the "DstBuffer" is filled last.
if (Log2TransformLength % 2 == 0)
{
SwapContents(TmpBuffer, DstBuffer);
}
}
#endif
// Reorder, followed by a High-level group-shared pass followed by Log2(TransformLength / SubPassLength() ) simple passes.
// In total 2 + Log2(TransformLength / SubPassLength() ) passes. This will be on the order of 3 or 4 passes
// compared with 12 or more ..
{
FRDGTextureDesc SpectralDesc = FFTDesc.IsForward() ? DstTexture->Desc : SrcTexture->Desc;
// Re-order the data so we can do a pass of group-shared transforms
FRDGTextureRef SpectralTexture = GraphBuilder.CreateTexture(SpectralDesc, TEXT("FFT.Spectral"));
DispatchReorderFFTPassCS(
GraphBuilder, ShaderMap, FFTDesc,
/* SrcTexture = */ SrcTexture, SrcWindow,
/* DstTexture = */ SpectralTexture, XFormWindow);
FRDGTextureRef PrevSpectralTexture = GraphBuilder.CreateTexture(SpectralDesc, TEXT("FFT.Spectral"));
DispatchGSSubComplexFFTPassCS(
GraphBuilder, ShaderMap, FFTDesc,
/* SrcTexture = */ SpectralTexture, XFormWindow,
/* DstTexture = */ PrevSpectralTexture);
for (uint32 Ns = FGroupShardSubFFTPassCS::SubPassLength(); Ns < TransformLength; Ns *= 2)
{
FRDGTextureRef OpDstTexture;
FRDGBufferRef DstPostFilterParameters = nullptr;
if (Ns * 2 < TransformLength)
{
FIntPoint SrcBufferSize = PrevSpectralTexture->Desc.Extent;
checkf(!(SrcBufferSize.X < DstExtent.X) && !(SrcBufferSize.Y < DstExtent.Y), TEXT("FFT: Allocated Buffers too small."));
OpDstTexture = GraphBuilder.CreateTexture(SpectralDesc, TEXT("FFT.Spectral"));
}
else
{
OpDstTexture = DstTexture;
DstPostFilterParameters = PostFilterParameters;
}
DispatchComplexFFTPassCS(
GraphBuilder, ShaderMap, FFTDesc,
Ns,
/* SrcTexture = */ PrevSpectralTexture, XFormWindow,
/* DstPostFilterParameters = */ DstPostFilterParameters,
/* DstTexture = */ OpDstTexture);
PrevSpectralTexture = OpDstTexture;
}
check(PrevSpectralTexture == DstTexture);
}
}
void GPUFFT::TwoForOneRealFFTImage1D::MultiPass(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef DstTexture, const FIntRect& DstWindow,
const FPreFilter& PreFilter,
FRDGBufferRef PostFilterParameters)
{
if (FitsInGroupSharedMemory(FFTDesc))
{
DispatchGSTwoForOneFFTCS(
GraphBuilder, ShaderMap, FFTDesc,
SrcTexture, SrcWindow,
DstTexture, DstWindow,
PreFilter,
PostFilterParameters);
}
else if (FFTDesc.IsForward())
{
RDG_EVENT_SCOPE(GraphBuilder, "TwoForOneRealFFTImage1D(Forward)");
// Filter only on forward transform through image copy pass.
if (IsActive(PreFilter))
{
FRDGTextureDesc Desc = FRDGTextureDesc::Create2D(
SrcTexture->Desc.Extent,
SrcTexture->Desc.Format,
FClearValueBinding::None,
TexCreate_ShaderResource | TexCreate_UAV);
FRDGTextureRef PreFilteredTexture = GraphBuilder.CreateTexture(Desc, TEXT("FFT.PreFilter"));
CopyImage2D(
GraphBuilder, FFTDesc.ComputePassFlags, ShaderMap,
/* SrcTexture = */ SrcTexture, SrcWindow,
/* DstTexture = */ PreFilteredTexture, SrcWindow,
PreFilter);
SrcTexture = PreFilteredTexture;
}
FRDGTextureRef SpectralTexture = GraphBuilder.CreateTexture(DstTexture->Desc, TEXT("FFT.Spectral"));
ComplexFFTImage1D::MultiPass(
GraphBuilder, ShaderMap, FFTDesc,
/* SrcTexture = */ SrcTexture, SrcWindow,
/* DstTexture = */ SpectralTexture,
/* PostFilterParameters = */ nullptr,
/* bScrubNaNs = */ true);
// Unpack the complex transform into transform of real data
ensure(DstWindow.Min == FIntPoint::ZeroValue);
DispatchPackTwoForOneFFTPassCS(
GraphBuilder, ShaderMap, FFTDesc,
/* SrcTexture = */ SpectralTexture,
/* DstTexture = */ DstTexture);
}
else // Inverse transform.
{
RDG_EVENT_SCOPE(GraphBuilder, "TwoForOneRealFFTImage1D(Inverse)");
// Pack the 4 transforms of real data as 2 transforms of complex data
FRDGTextureRef SpectralTexture = GraphBuilder.CreateTexture(SrcTexture->Desc, TEXT("FFT.Spectral"));
ensure(SrcWindow.Min == FIntPoint::ZeroValue);
DispatchPackTwoForOneFFTPassCS(
GraphBuilder, ShaderMap, FFTDesc,
/* SrcTexture = */ SrcTexture,
/* DstTexture = */ SpectralTexture);
// Transform as complex data
ensure(DstWindow.Min == FIntPoint::ZeroValue);
ComplexFFTImage1D::MultiPass(
GraphBuilder, ShaderMap, FFTDesc,
/* SrcTexture = */ SpectralTexture, SrcWindow,
/* DstTexture = */ DstTexture,
/* PostFilterParameters = */ PostFilterParameters,
/* bScrubNaNs = */ false);
}
}
void GPUFFT::FFTImage2D(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const FIntPoint& FrequencySize, bool bHorizontalFirst,
FRDGTextureRef SrcTexture, const FIntRect& ROIRect,
FRDGTextureRef DstTexture)
{
using GPUFFT::FFTDescription;
// This will be for the actual output.
const FIntRect FFTResultRect = ROIRect;
const FIntPoint ImageSpaceExtent = ROIRect.Size();
// Set up the transform descriptions
FFTDescription TwoForOneFFTDesc = (bHorizontalFirst) ? FFTDescription(FFT_XFORM_TYPE::FORWARD_HORIZONTAL, FrequencySize) : FFTDescription(FFT_XFORM_TYPE::FORWARD_VERTICAL, FrequencySize);
FFTDescription ComplexFFTDesc = (bHorizontalFirst) ? FFTDescription(FFT_XFORM_TYPE::FORWARD_VERTICAL, FrequencySize) : FFTDescription(FFT_XFORM_TYPE::FORWARD_HORIZONTAL, FrequencySize);
// The two-for-one transform data storage has two additional elements.
const uint32 FrequencyPadding = 2;
// This additional elements translate to two additional scanlines that need to be transformed by the complex fft
ComplexFFTDesc.NumScanLines += FrequencyPadding;
// Temp Double Buffers
const FIntPoint TmpExent = (bHorizontalFirst) ? FIntPoint(FrequencySize.X + FrequencyPadding, ImageSpaceExtent.Y) : FIntPoint(ImageSpaceExtent.X, FrequencySize.Y + FrequencyPadding);
const FIntRect TmpRect = FIntRect(FIntPoint(0, 0), TmpExent);
// Two-for-one transform: SrcTexture fills the TmpBuffer
FRDGTextureRef TempTexture = GraphBuilder.CreateTexture(DstTexture->Desc, DstTexture->Name);
TwoForOneRealFFTImage1D::MultiPass(
GraphBuilder, ShaderMap,
TwoForOneFFTDesc,
SrcTexture, ROIRect,
TempTexture, TmpRect);
ComplexFFTImage1D::MultiPass(
GraphBuilder, ShaderMap,
ComplexFFTDesc,
TempTexture, TmpRect,
DstTexture);
}
void GPUFFT::ConvolutionWithTextureImage1D::MultiPass(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap* ShaderMap,
const FFTDescription& FFTDesc,
FRDGTextureRef SrcTexture, const FIntRect& SrcWindow,
FRDGTextureRef TransformedKernel,
FRDGTextureRef DstTexture)
{
if (FitsInGroupSharedMemory(FFTDesc))
{
DispatchGSConvolutionWithTextureCS(
GraphBuilder, ShaderMap, FFTDesc,
SrcTexture, SrcWindow, TransformedKernel, DstTexture);
}
else
{
RDG_EVENT_SCOPE(GraphBuilder, "ConvolutionWithTextureImage1D");
// Frequency Space size.
const FIntPoint TargetExtent = FFTDesc.TransformExtent();
const FIntRect TargetRect(FIntPoint(0, 0), TargetExtent);
// Forward transform: Results in DstBuffer
FRDGTextureRef SpectralTexture0 = GraphBuilder.CreateTexture(SrcTexture->Desc, TEXT("FFT.Spectral"));
ComplexFFTImage1D::MultiPass(
GraphBuilder, ShaderMap, FFTDesc,
/* SrcTexture = */ SrcTexture, SrcWindow,
/* DstTexture = */ SpectralTexture0);
// Apply convolution kernel.
FRDGTextureRef SpectralTexture1 = GraphBuilder.CreateTexture(SrcTexture->Desc, TEXT("FFT.Spectral"));
DispatchComplexMultiplyImagesCS(
GraphBuilder, FFTDesc.ComputePassFlags, ShaderMap, FFTDesc.IsHorizontal(),
/* SrcTexture = */ SpectralTexture0, TargetRect,
TransformedKernel,
/* DstTexture = */ SpectralTexture1);
// Backward transform.
FFTDescription InvFFTDesc = FFTDesc;
InvFFTDesc.XFormType = GetInverseOfXForm(FFTDesc.XFormType);
ComplexFFTImage1D::MultiPass(
GraphBuilder, ShaderMap, InvFFTDesc,
/* SrcTexture = */ SpectralTexture1, TargetRect,
/* DstTexture = */ DstTexture);
}
}
FIntPoint GPUFFT::Convolution2DBufferSize(const FIntPoint& FrequencySize, const bool bHorizontalFirst, const FIntPoint& ROIExtent)
{
using GPUFFT::FFTDescription;
using GPUFFT::FFT_XFORM_TYPE;
FFTDescription TwoForOneFFTDesc;
TwoForOneFFTDesc.XFormType = bHorizontalFirst ? FFT_XFORM_TYPE::FORWARD_HORIZONTAL : FFT_XFORM_TYPE::FORWARD_VERTICAL;
TwoForOneFFTDesc.SignalLength = bHorizontalFirst ? FrequencySize.X : FrequencySize.Y;
TwoForOneFFTDesc.NumScanLines = (bHorizontalFirst) ? ROIExtent.Y : ROIExtent.X;
FFTDescription ConvolutionFFTDesc;
ConvolutionFFTDesc.XFormType = bHorizontalFirst ? FFT_XFORM_TYPE::FORWARD_VERTICAL : FFT_XFORM_TYPE::FORWARD_HORIZONTAL;
ConvolutionFFTDesc.SignalLength = bHorizontalFirst ? FrequencySize.Y : FrequencySize.X;
ConvolutionFFTDesc.NumScanLines = TwoForOneFFTDesc.SignalLength + 2;
FFTDescription TwoForOneIvnFFTDesc = TwoForOneFFTDesc;
TwoForOneIvnFFTDesc.XFormType = GetInverseOfXForm(TwoForOneFFTDesc.XFormType);
FIntPoint BufferSize;
if (FitsInGroupSharedMemory(ConvolutionFFTDesc))
{
FFTDescription TmpDesc = TwoForOneFFTDesc;
TmpDesc.SignalLength += 2;
BufferSize = TmpDesc.TransformExtent();
}
else
{
// a larger buffer is needed when the convolution can't be done in group-shared
BufferSize = ConvolutionFFTDesc.TransformExtent();
}
return BufferSize;
}
FRDGTextureDesc CreateFrequencyDesc(const FIntPoint& FrequencySize, const bool bHorizontalFirst, const FIntPoint& SrcExtent)
{
const FIntPoint FrequencyExtent = GPUFFT::Convolution2DBufferSize(
FrequencySize, bHorizontalFirst, SrcExtent);
return FRDGTextureDesc::Create2D(
FrequencyExtent,
GPUFFT::PixelFormat(),
FClearValueBinding::None,
TexCreate_ShaderResource | TexCreate_UAV);
}
void GPUFFT::ConvolutionWithTextureImage2D(
FRDGBuilder& GraphBuilder,
ERDGPassFlags ComputePassFlags,
const FGlobalShaderMap* ShaderMap,
const FIntPoint& FrequencySize, bool bHorizontalFirst,
FRDGTextureRef TransformedKernel,
FRDGTextureRef SrcTexture, const FIntRect& ROIRect,
FRDGTextureRef DstTexture, const FIntRect& DstRect,
const FPreFilter& PreFilter,
FRDGBufferRef PostFilterParameters)
{
FRDGTextureDesc SpectralDesc = CreateFrequencyDesc(FrequencySize, bHorizontalFirst, ROIRect.Size());
// This will be for the actual output.
const FIntRect FFTResultRect = ROIRect;
const FIntPoint ROISize = ROIRect.Size();
// Set up the transform descriptions
FFTDescription TwoForOneFFTDesc;
TwoForOneFFTDesc.XFormType = bHorizontalFirst ? FFT_XFORM_TYPE::FORWARD_HORIZONTAL : FFT_XFORM_TYPE::FORWARD_VERTICAL;
TwoForOneFFTDesc.SignalLength = bHorizontalFirst ? FrequencySize.X : FrequencySize.Y;
TwoForOneFFTDesc.NumScanLines = (bHorizontalFirst) ? ROISize.Y : ROISize.X;
TwoForOneFFTDesc.ComputePassFlags = ComputePassFlags;
// The output has two more elements
FIntRect SpectralOutputRect;
{
FFTDescription TmpDesc = TwoForOneFFTDesc;
TmpDesc.SignalLength += 2;
SpectralOutputRect = FIntRect(FIntPoint(0, 0), TmpDesc.TransformExtent());
}
FFTDescription ConvolutionFFTDesc;
ConvolutionFFTDesc.XFormType = bHorizontalFirst ? FFT_XFORM_TYPE::FORWARD_VERTICAL : FFT_XFORM_TYPE::FORWARD_HORIZONTAL;
ConvolutionFFTDesc.SignalLength = bHorizontalFirst ? FrequencySize.Y : FrequencySize.X;
ConvolutionFFTDesc.NumScanLines = TwoForOneFFTDesc.SignalLength + 2; // two-for-one transform generated two additional elements
ConvolutionFFTDesc.ComputePassFlags = ComputePassFlags;
FFTDescription TwoForOneIvnFFTDesc = TwoForOneFFTDesc;
TwoForOneIvnFFTDesc.XFormType = GetInverseOfXForm(TwoForOneFFTDesc.XFormType);
TwoForOneIvnFFTDesc.ComputePassFlags = ComputePassFlags;
// ---- Two For One Transform ---
FRDGTextureRef TwoForOneFFTOutput = GraphBuilder.CreateTexture(SpectralDesc, TEXT("FFT.TwoForOne"));
TwoForOneRealFFTImage1D::MultiPass(
GraphBuilder, ShaderMap, TwoForOneFFTDesc,
SrcTexture, ROIRect,
TwoForOneFFTOutput, SpectralOutputRect,
PreFilter,
/* PostFilterParameters = */ nullptr);
// ---- 1 D Convolution ---
FRDGTextureRef ConvolutionOutput = GraphBuilder.CreateTexture(SpectralDesc, TEXT("FFT.Convolution"));
ConvolutionWithTextureImage1D::MultiPass(
GraphBuilder, ShaderMap,
ConvolutionFFTDesc,
TwoForOneFFTOutput, SpectralOutputRect,
TransformedKernel,
ConvolutionOutput);
// ---- Inverse Two For One ---
TwoForOneRealFFTImage1D::MultiPass(
GraphBuilder, ShaderMap,
TwoForOneIvnFFTDesc,
ConvolutionOutput, SpectralOutputRect,
DstTexture, DstRect,
PreFilter,
PostFilterParameters);
}