Files
UnrealEngineUWP/Engine/Source/Runtime/SignalProcessing/Private/VoiceProcessing.cpp

380 lines
14 KiB
C++
Raw Normal View History

// Copyright Epic Games, Inc. All Rights Reserved.
#include "DSP/VoiceProcessing.h"
#include "DSP/AudioDebuggingUtilities.h"
#include "DSP/FloatArrayMath.h"
namespace Audio
{
FMovingAverager::FMovingAverager(uint32 NumSamples)
: BufferCursor(0)
, AccumulatedSum(0.0f)
{
AudioBuffer.Reset();
AudioBuffer.AddZeroed(FMath::Max<uint32>(NumSamples, 1));
}
float FMovingAverager::ProcessInput(const float& Input, float& Output)
{
FScopeLock ScopeLock(&ProcessCriticalSection);
float* BufferPtr = AudioBuffer.GetData();
Output = BufferPtr[BufferCursor];
BufferPtr[BufferCursor] = Input;
BufferCursor = (BufferCursor + 1) % AudioBuffer.Num();
// Instead of summing our entire buffer every tick, we simply add the incoming sample amplitude, and subtract the outgoing amplitude.
// TODO: With this approach AccumulatedSum will start to slowly drift over time from accumulated rounding error. Every so often we will need to
// reset AccumulatedSum to the actual sum of AudioBuffer.
AccumulatedSum += (FMath::Abs(Input) - FMath::Abs(Output));
return AccumulatedSum;
}
void FMovingAverager::SetNumSamples(uint32 NumSamples)
{
FScopeLock ScopeLock(&ProcessCriticalSection);
AudioBuffer.Reset();
AudioBuffer.AddZeroed(NumSamples);
AccumulatedSum = 0.0f;
}
FMovingVectorAverager::FMovingVectorAverager(uint32 NumSamples)
: BufferCursor(0)
, AccumulatedSum(VectorZero())
{
checkf(NumSamples % 4 == 0, TEXT("NumSamples must be divisible by 4!"));
const uint32 NumVectors = NumSamples / 4;
AudioBuffer.Reset();
AudioBuffer.AddZeroed(NumVectors);
}
float FMovingVectorAverager::ProcessAudio(const VectorRegister4Float& Input, VectorRegister4Float& Output)
{
VectorRegister4Float* BufferPtr = AudioBuffer.GetData();
Output = BufferPtr[BufferCursor];
BufferPtr[BufferCursor] = Input;
BufferCursor = (BufferCursor + 1) % AudioBuffer.Num();
// Instead of summing our entire buffer every tick, we simply add the incoming sample amplitude, and subtract the outgoing amplitude.
// TODO: With this approach AccumulatedSum will start to slowly drift over time from accumulated rounding error. Every so often we will need to
// reset AccumulatedSum to the actual sum of AudioBuffer.
const VectorRegister4Float AbsInput = VectorAbs(Input);
const VectorRegister4Float AbsOutput = VectorAbs(Output);
const VectorRegister4Float TotalAccumulation = VectorSubtract(AbsInput, AbsOutput);
AccumulatedSum = VectorAdd(AccumulatedSum, TotalAccumulation);
alignas(16) float PartionedSums[4];
VectorStore(AccumulatedSum, PartionedSums);
return (PartionedSums[0] + PartionedSums[1] + PartionedSums[2] + PartionedSums[3]) / (AudioBuffer.Num() * 4);
}
FSilenceDetection::FSilenceDetection(float InOnsetThreshold, float InReleaseThreshold, int32 AttackDurationInSamples, int32 ReleaseDurationInSamples)
: Averager(AttackDurationInSamples)
, ReleaseTau(FMath::Exp(-1.0f / (ReleaseDurationInSamples / 4)))
, OnsetThreshold(InOnsetThreshold)
, ReleaseThreshold(InReleaseThreshold)
, CurrentAmplitude(0.0f)
, bOnsetWasInLastBuffer(false)
{
ensureMsgf(InOnsetThreshold > InReleaseThreshold, TEXT("The onset threshold should always be louder than the release threshold."));
}
int32 FSilenceDetection::ProcessBuffer(const float* InAudio, float* OutAudio, int32 NumSamples)
{
if (bOnsetWasInLastBuffer || CurrentAmplitude > ReleaseThreshold)
{
bOnsetWasInLastBuffer = false;
// If we've been outputting audio up until the start of this callback, we are going to continue outputting audio
// Until the end of this buffer. If the current amplitude is below our threshold at the end of this callback, we will
// stop outputting audio then.
for (int32 InSampleIndex = 0; InSampleIndex < NumSamples; InSampleIndex += 4)
{
const VectorRegister4Float InputVector = VectorLoad(&InAudio[InSampleIndex]);
VectorRegister4Float OutputVector;
float InstantaneousAmplitude = Averager.ProcessAudio(InputVector, OutputVector);
CurrentAmplitude = ReleaseTau * (CurrentAmplitude - InstantaneousAmplitude) + InstantaneousAmplitude;
VectorStore(OutputVector, &OutAudio[InSampleIndex]);
}
// If we are releasing back to silence at the end of this buffer callback, we perform a short fadeout here.
if (CurrentAmplitude < ReleaseThreshold)
{
static const int32 DefaultNumSamplesToFadeOutOver = 32;
const int32 NumSamplesToFadeOutOver = FMath::Min(NumSamples, DefaultNumSamplesToFadeOutOver);
const int32 Offset = NumSamples - NumSamplesToFadeOutOver;
TArrayView<float> OutAudioView(&OutAudio[Offset], NumSamplesToFadeOutOver);
Audio::ArrayFade(OutAudioView, 1.0f, 0.0f);
}
return NumSamples;
}
else
{
// If we started this callback in a silent state, we simply buffer audio until we've detected an onset,
// At which point we begin outputting audio from the Averager.
int32 OutSampleIndex = 0;
bool bHitThreshold = false;
float InstantaneousAmplitude = 0.0f;
for (int32 InSampleIndex = 0; InSampleIndex < NumSamples; InSampleIndex += 4)
{
const VectorRegister4Float InputVector = VectorLoad(&InAudio[InSampleIndex]);
VectorRegister4Float OutputVector;
InstantaneousAmplitude = Averager.ProcessAudio(InputVector, OutputVector);
if (bHitThreshold)
{
VectorStore(OutputVector, &OutAudio[OutSampleIndex]);
OutSampleIndex += 4;
}
else
{
bHitThreshold = InstantaneousAmplitude > OnsetThreshold;
}
}
CurrentAmplitude = InstantaneousAmplitude;
bOnsetWasInLastBuffer = bHitThreshold;
check(CurrentAmplitude < 100.0f);
return OutSampleIndex;
}
}
void FSilenceDetection::SetThreshold(float InThreshold)
{
OnsetThreshold = InThreshold;
}
float FSilenceDetection::GetCurrentAmplitude()
{
return CurrentAmplitude;
}
FSlowAdaptiveGainControl::FSlowAdaptiveGainControl(float InGainTarget, int32 InAdaptiveRate, float InGainMin /*= 0.5f*/, float InGainMax /*= 2.0f*/)
: PeakDetector(InAdaptiveRate)
, GainTarget(InGainTarget)
, PreviousGain(1.0f)
, GainMin(InGainMin)
, GainMax(InGainMax)
{
}
float FSlowAdaptiveGainControl::ProcessAudio(float* InAudio, int32 NumSamples, float InAmplitude)
{
float PeakDetectorOutput = 0.0f; // unused
const float EstimatedPeak = PeakDetector.ProcessInput(InAmplitude, PeakDetectorOutput);
const float TargetGain = GetTargetGain(EstimatedPeak);
TArrayView<float> InAudioView(InAudio, NumSamples);
Audio::ArrayFade(InAudioView, PreviousGain, TargetGain);
PreviousGain = TargetGain;
return TargetGain;
}
void FSlowAdaptiveGainControl::SetAdaptiveRate(int32 InAdaptiveRate)
{
PeakDetector.SetNumSamples(InAdaptiveRate);
}
float FSlowAdaptiveGainControl::GetTargetGain(float InAmplitude)
{
const float UnclampedGain = GainTarget / InAmplitude;
return FMath::Clamp(UnclampedGain, GainMin, GainMax);
}
FAdaptiveFilter_DEPRECATED::FAdaptiveFilter_DEPRECATED(int32 FilterLength, int32 AudioCallbackSize)
: WindowSize(FilterLength)
, CurrentStepsUntilConvergence(0)
{
const int32 FFTSize = FFTIntrinsics::NextPowerOf2(FilterLength * 2 - 1);
//Initialize our weights to 0:
WeightDeltas.InitZeroed(FFTSize);
CurrentWeights.InitZeroed(FFTSize);
InputFrequencies.InitZeroed(FFTSize);
// For passthrough, we initialize our current weights to 1:
for (int32 Index = 0; Index < CurrentWeights.Real.Num(); Index++)
{
CurrentWeights.Real[Index] = 1.0f;
}
Convolver.SetFilter(CurrentWeights, FilterLength);
}
void FAdaptiveFilter_DEPRECATED::AdaptFilter()
{
// If we've suitably converged, we avoid incrementing our weights.
if (CurrentStepsUntilConvergence <= 0)
{
return;
}
IncrementWeights();
CurrentStepsUntilConvergence--;
Convolver.SetFilter(CurrentWeights, WindowSize);
}
void FAdaptiveFilter_DEPRECATED::SetWeightDeltas(const float* InWeightsReal, const float* InWeightsImag, int32 NumWeights, float InLearningRate)
{
// ProcessAudio::NumWeights needs to use the same
checkSlow(NumWeights == WeightDeltas.Real.Num());
// If NumWeights is not divisible by four, something is terribly wrong, since we are using an FFT with this.
checkSlow(NumWeights % 4 == 0);
const float* CurrentRealBuffer = CurrentWeights.Real.GetData();
const float* CurrentImagBuffer = CurrentWeights.Imag.GetData();
float* DeltaRealBuffer = WeightDeltas.Real.GetData();
float* DeltaImagBuffer = WeightDeltas.Imag.GetData();
// Our convergence rate is 1 / our NumConvergence steps.
const VectorRegister4Float ConvergenceRate = VectorSetFloat1(InLearningRate);
CurrentStepsUntilConvergence = FMath::CeilToInt(1.0f / InLearningRate);
for (int32 WeightIndex = 0; WeightIndex < NumWeights; WeightIndex+= 4)
{
VectorRegister4Float TargetReal = VectorLoad(&InWeightsReal[WeightIndex]);
VectorRegister4Float CurrentReal = VectorLoad(&CurrentRealBuffer[WeightIndex]);
const VectorRegister4Float DeltaReal = VectorMultiply(VectorSubtract(TargetReal, CurrentReal), ConvergenceRate);
VectorStore(DeltaReal, &DeltaRealBuffer[WeightIndex]);
VectorRegister4Float TargetImag = VectorLoad(&InWeightsImag[WeightIndex]);
VectorRegister4Float CurrentImag = VectorLoad(&CurrentImagBuffer[WeightIndex]);
const VectorRegister4Float DeltaImag = VectorMultiply(VectorSubtract(TargetImag, CurrentImag), ConvergenceRate);
VectorStore(DeltaImag, &DeltaImagBuffer[WeightIndex]);
}
}
void FAdaptiveFilter_DEPRECATED::IncrementWeights()
{
const int32 NumWeights = CurrentWeights.Real.Num();
float* CurrentRealBuffer = CurrentWeights.Real.GetData();
float* CurrentImagBuffer = CurrentWeights.Imag.GetData();
const float* DeltasRealBuffer = WeightDeltas.Real.GetData();
const float* DeltasImagBuffer = WeightDeltas.Imag.GetData();
for (int32 WeightIndex = 0; WeightIndex < NumWeights; WeightIndex += 4)
{
VectorRegister4Float DeltaReal = VectorLoad(&DeltasRealBuffer[WeightIndex]);
VectorRegister4Float CurrentReal = VectorLoad(&CurrentRealBuffer[WeightIndex]);
CurrentReal = VectorAdd(CurrentReal, DeltaReal);
VectorStore(CurrentReal, &CurrentRealBuffer[WeightIndex]);
VectorRegister4Float DeltaImag = VectorLoad(&DeltasImagBuffer[WeightIndex]);
VectorRegister4Float CurrentImag = VectorLoad(&CurrentImagBuffer[WeightIndex]);
CurrentImag = VectorAdd(CurrentImag, DeltaImag);
VectorStore(CurrentImag, &CurrentImagBuffer[WeightIndex]);
}
}
void FAdaptiveFilter_DEPRECATED::ProcessAudio(float* InAudio, int32 NumSamples)
{
AdaptFilter();
Convolver.ProcessAudio(InAudio, NumSamples);
}
void FAdaptiveFilter_DEPRECATED::SetWeights(const FrequencyBuffer& InFilterWeights, int32 FilterLength, float InLearningRate)
{
SetWeightDeltas(InFilterWeights.Real.GetData(), InFilterWeights.Imag.GetData(), InFilterWeights.Real.Num(), InLearningRate);
}
FFDAPFilterComputer_DEPRECATED::FFDAPFilterComputer_DEPRECATED()
{
}
void FFDAPFilterComputer_DEPRECATED::GenerateWeights(const float* IncomingSignal, int32 NumIncomingSamples, const float* OutgoingSignal, int32 NumOutgoingSamples, FrequencyBuffer& OutWeights)
{
int32 FFTSize = FFTIntrinsics::NextPowerOf2(NumIncomingSamples + NumOutgoingSamples - 1);
// zero pad inputs appropriately:
ZeroPaddedIncomingBuffer.Reset();
ZeroPaddedIncomingBuffer.AddZeroed(FFTSize);
FMemory::Memcpy(ZeroPaddedIncomingBuffer.GetData(), IncomingSignal, NumIncomingSamples * sizeof(float));
ZeroPaddedOutgoingBuffer.Reset();
ZeroPaddedOutgoingBuffer.AddZeroed(FFTSize);
FMemory::Memcpy(ZeroPaddedOutgoingBuffer.GetData(), OutgoingSignal, NumOutgoingSamples * sizeof(float));
int32 FilterLength = FMath::Max(NumIncomingSamples, NumOutgoingSamples);
PRAGMA_DISABLE_DEPRECATION_WARNINGS
CrossCorrelate(ZeroPaddedIncomingBuffer.GetData(), ZeroPaddedOutgoingBuffer.GetData(), FilterLength, FFTSize, IncomingFrequencies, OutgoingFrequencies, OutWeights);
PRAGMA_ENABLE_DEPRECATION_WARNINGS
// TODO: Perform linear phase conversion on OutWeights.
}
FAcousticEchoCancellation_DEPRECATED::FAcousticEchoCancellation_DEPRECATED(float InConvergenceRate, int32 CallbackSize, int32 InFilterLength, int32 InFilterUpdateRate /*= 1*/)
: AdaptiveFilter(InFilterLength, CallbackSize)
, ConvergenceRate(InConvergenceRate)
, FilterLength(InFilterLength)
, FilterUpdateRate(InFilterUpdateRate)
, FitlerUpdateCounter(InFilterUpdateRate)
{
checkSlow(FMath::IsPowerOfTwo(FilterLength));
}
void FAcousticEchoCancellation_DEPRECATED::ProcessAudio(float* InAudio, int32 NumSamples)
{
checkSlow(FMath::IsPowerOfTwo(NumSamples));
int32 NumBlocks = NumSamples / FilterLength;
for (int32 Index = 0; Index < NumBlocks; Index++)
{
float* BlockAudio = &InAudio[Index * FilterLength];
if (FitlerUpdateCounter == 0)
{
FilterComputerInput.Reset();
FilterComputerInput.AddZeroed(FilterLength);
// Since we are likely sampling audio buffers much less often than we are rendering them,
// we always skip ahead to the latest audio to compute our filter coefficients.
const bool bUseLatestAudio = true;
// Grab our audio from our taps:
int32 PopResult = PatchMixer.PopAudio(FilterComputerInput.GetData(), FilterLength, bUseLatestAudio);
if (PopResult != 0)
{
// Compute our new filter target and pass it to the adaptive filter:
FilterComputer.GenerateWeights(FilterComputerInput.GetData(), FilterLength, BlockAudio, FilterLength, FilterComputerOutput);
AdaptiveFilter.ProcessAudio(BlockAudio, NumSamples);
AdaptiveFilter.SetWeights(FilterComputerOutput, FilterLength, ConvergenceRate);
}
else
{
// If our taps were empty, don't both updating the filter.
AdaptiveFilter.ProcessAudio(BlockAudio, FilterLength);
}
FitlerUpdateCounter = FilterUpdateRate - 1;
}
else
{
AdaptiveFilter.ProcessAudio(BlockAudio, FilterLength);
FitlerUpdateCounter--;
}
}
}
Audio::FPatchInput FAcousticEchoCancellation_DEPRECATED::AddNewSignalPatch(int32 ExpectedLatency, float Gain /*= 1.0f*/)
{
return PatchMixer.AddNewInput(ExpectedLatency, Gain);
}
void FAcousticEchoCancellation_DEPRECATED::RemoveSignalPatch(const FPatchInput& Patch)
{
PatchMixer.RemovePatch(Patch);
}
}