Files
UnrealEngineUWP/Engine/Plugins/Runtime/Metasound/Source/MetasoundEngine/Private/MetasoundWaveProxyReader.cpp
helen yang 24bf2e8fc0 Read sample rate and num frames from file header in MetaSound wave proxy reader to account for incorrect sample rate when using Resample for Device platform cook override
#jira UE-183237
#rb phil.popp, jimmy.smith
#preflight 645acead6c35ad81e6a237b2

[CL 25413497 by helen yang in ue5-main branch]
2023-05-10 15:14:06 -04:00

547 lines
19 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "MetasoundWaveProxyReader.h"
#include "Containers/Array.h"
#include "Containers/ArrayView.h"
#include "DecoderInputFactory.h"
#include "DSP/BufferVectorOperations.h"
#include "DSP/Dsp.h"
#include "HAL/IConsoleManager.h"
#include "HAL/Platform.h"
#include "HAL/UnrealMemory.h"
#include "IAudioCodec.h"
#include "IAudioCodecRegistry.h"
#include "Math/UnrealMathUtility.h"
#include "MetasoundLog.h"
#include "MetasoundTrace.h"
#include "Misc/AssertionMacros.h"
#include "Sound/SoundWave.h"
#include "Templates/SharedPointer.h"
#include "Templates/UniquePtr.h"
static int32 GMetaSoundWaveProxyReaderSimulateSeekOnNonSeekable = 0;
FAutoConsoleVariableRef CVarMetaSoundWaveProxyReaderSimulateSeekOnNonSeekable(
TEXT("au.MetaSound.WavePlayer.SimulateSeek"),
GMetaSoundWaveProxyReaderSimulateSeekOnNonSeekable ,
TEXT("If true, SoundWaves which are not of a seekable format will simulate seek calls by reading and discarding samples.\n")
TEXT("0: Do not simulate seek, !0: Simulate seek"),
ECVF_Default);
namespace Metasound
{
namespace MetasoundWaveProxyReaderPrivate
{
/** Construct a FDecoderOutput
*
* @param InNumFramesPerDecode - Maximum number of frames pushed when
* audio is decoded. The buffer will be
* able to hold twice the number of frames.
*/
FDecoderOutput::FDecoderOutput(uint32 InNumFramesPerDecode)
: NumFramesPerDecode(FMath::Max(InNumFramesPerDecode, MinNumFramesPerDecode))
{
Init();
}
/** Set the number of channels in the audio being decoded. */
void FDecoderOutput::SetNumChannels(uint32 InNumChannels)
{
NumChannels = FMath::Max(MinNumChannels, InNumChannels);
Init();
}
/** Removes all samples from the buffer. */
void FDecoderOutput::Reset()
{
Buffer.SetNum(0);
}
/** Returns requirements used by the audio codec system. */
Audio::IDecoderOutput::FRequirements FDecoderOutput::GetRequirements(const Audio::FDecodedFormatInfo& InFormat) const
{
FRequirements Requirements;
Requirements.DownstreamFormat = Audio::EBitRepresentation::Float32_Interleaved;
Requirements.NumSampleFramesWanted = NumFramesPerDecode;
Requirements.NumSampleFramesPerSecond = InFormat.NumFramesPerSec;
Requirements.NumChannels = InFormat.NumChannels;
return Requirements;
}
/** Adds samples to the buffer.
*
* This is called by the Decoder and should not be called otherwise.
*/
int32 FDecoderOutput::PushAudio(const Audio::IDecoderOutput::FPushedAudioDetails& InDetails, TArrayView<const int16> In16BitInterleave)
{
if (InDetails.NumChannels != NumChannels)
{
SetNumChannels(InDetails.NumChannels);
}
return PushAudioInternal(InDetails, In16BitInterleave);
}
/** Adds samples to the buffer.
*
* This is called by the Decoder and should not be called otherwise.
*/
int32 FDecoderOutput::PushAudio(const FPushedAudioDetails& InDetails, TArrayView<const float> InFloat32Interleave)
{
if (InDetails.NumChannels != NumChannels)
{
SetNumChannels(InDetails.NumChannels);
}
return PushAudioInternal(InDetails, InFloat32Interleave);
}
/** This should not be called. It removes 16 bit PCM samples from the buffer
* which is an unsupported operation of this class.
*/
int32 FDecoderOutput::PopAudio(TArrayView<int16> InExternalInt16Buffer, FPushedAudioDetails& OutDetails)
{
// This buffer cannot produce 16 bit PCM audio.
checkNoEntry();
return 0;
}
/** Copy samples to OutBuffer and remove them from this objects internal
* buffer.
*
*
* @param OutBuffer - A destination array to copy samples to.
* @param OutDetails - Unused.
*
* @return The actual number of samples copied.
*/
int32 FDecoderOutput::PopAudio(TArrayView<float> OutBuffer, FPushedAudioDetails& OutDetails)
{
return Buffer.Pop(OutBuffer.GetData(), OutBuffer.Num());
}
// Initialize buffer size.
void FDecoderOutput::Init()
{
// Allow it to hold two decode buffers max.
uint32 MinBufferCapacity = NumChannels * NumFramesPerDecode * 2;
constexpr bool bRetainExistingSamples = false;
Buffer.Reserve(MinBufferCapacity, bRetainExistingSamples);
}
int32 FDecoderOutput::PushAudioInternal(const FPushedAudioDetails& InDetails, TArrayView<const float> InBuffer)
{
return Buffer.Push(InBuffer.GetData(), InBuffer.Num());
}
int32 FDecoderOutput::PushAudioInternal(const FPushedAudioDetails& InDetails, TArrayView<const int16> InBuffer)
{
SampleConversionBuffer.SetNumUninitialized(InBuffer.Num(), false /* bAllowShrinking */);
// Convert 16 bit pcm to 32 bit float.
constexpr float Scalar = 1.f / 32768.f;
const int16* Src = InBuffer.GetData();
float* Dst = SampleConversionBuffer.GetData();
int32 Num = InBuffer.Num();
// Convert 1 sample at a time, slow.
for (int32 i = 0; i < Num; ++i)
{
*Dst++ = static_cast<float>(*Src++) * Scalar;
}
return PushAudioInternal(InDetails, SampleConversionBuffer);
}
}
uint32 FWaveProxyReader::ConformDecodeSize(uint32 InMaxDesiredDecodeSizeInFrames)
{
static_assert(DefaultMinDecodeSizeInFrames >= DecodeSizeQuantizationInFrames, "Min decode size less than decode size quantization");
static_assert((DefaultMinDecodeSizeInFrames % DecodeSizeQuantizationInFrames) == 0, "Min decode size must be equally divisible by decode size quantization");
const uint32 QuantizedDecodeSizeInFrames = (InMaxDesiredDecodeSizeInFrames / DecodeSizeQuantizationInFrames) * DecodeSizeQuantizationInFrames;
const uint32 ConformedDecodeSizeInFrames = FMath::Max(QuantizedDecodeSizeInFrames, DefaultMinDecodeSizeInFrames);
check((ConformedDecodeSizeInFrames % QuantizedDecodeSizeInFrames) == 0);
check(ConformedDecodeSizeInFrames > 0);
return ConformedDecodeSizeInFrames;
}
/** Construct a wave proxy reader.
*
* @param InWaveProxy - A TSharedRef of a FSoundWaveProxy which is to be played.
* @param InSettings - Reader settings.
*/
FWaveProxyReader::FWaveProxyReader(FSoundWaveProxyRef InWaveProxy, const FSettings& InSettings)
: WaveProxy(InWaveProxy)
, DecoderOutput(ConformDecodeSize(InSettings.MaxDecodeSizeInFrames))
, Settings(InSettings)
{
// Get local copies of some values from the proxy.
SampleRate = WaveProxy->GetSampleRate();
NumChannels = WaveProxy->GetNumChannels();
NumFramesInWave = WaveProxy->GetNumFrames();
DurationInSeconds = WaveProxy->GetDuration();
MaxLoopStartTimeInSeconds = FMath::Max(0, DurationInSeconds - MinLoopDurationInSeconds);
// Clamp times
Settings.StartTimeInSeconds = FMath::Clamp(Settings.StartTimeInSeconds, 0.f, DurationInSeconds);
Settings.LoopStartTimeInSeconds = ClampLoopStartTime(Settings.LoopStartTimeInSeconds);
Settings.LoopDurationInSeconds = ClampLoopDuration(Settings.LoopDurationInSeconds);
// Setup frame indices
CurrentFrameIndex = 0;
UpdateLoopBoundaries();
// Determine max size of decode buffer
Settings.MaxDecodeSizeInFrames = FMath::Max(DefaultMinDecodeSizeInFrames, Settings.MaxDecodeSizeInFrames);
DecoderOutput.SetNumChannels(NumChannels);
// Prepare to read audio
bIsDecoderValid = InitializeDecoder(Settings.StartTimeInSeconds);
}
/** Create a wave proxy reader.
*
* @param InWaveProxy - A TSharedRef of a FSoundWaveProxy which is to be played.
* @param InSettings - Reader settings.
*/
TUniquePtr<FWaveProxyReader> FWaveProxyReader::Create(FSoundWaveProxyRef InWaveProxy, const FSettings& InSettings)
{
if (InWaveProxy->GetSampleRate() <= 0.f)
{
UE_LOG(LogMetaSound, Warning, TEXT("Cannot create FWaveProxy reader due to invalid sample rate (%f). Package: %s"), InWaveProxy->GetSampleRate(), *InWaveProxy->GetPackageName().ToString());
return TUniquePtr<FWaveProxyReader>();
}
if (InWaveProxy->GetNumChannels() <= 0)
{
UE_LOG(LogMetaSound, Warning, TEXT("Cannot create FWaveProxy reader due to invalid num channels (%d). Package: %s"), InWaveProxy->GetNumChannels(), *InWaveProxy->GetPackageName().ToString());
return TUniquePtr<FWaveProxyReader>();
}
if (InWaveProxy->GetNumFrames() <= 0)
{
UE_LOG(LogMetaSound, Warning, TEXT("Cannot create FWaveProxy reader due to invalid num frames (%d). Package: %s"), InWaveProxy->GetNumFrames(), *InWaveProxy->GetPackageName().ToString());
return TUniquePtr<FWaveProxyReader>();
}
return TUniquePtr<FWaveProxyReader>(new FWaveProxyReader(InWaveProxy, InSettings));
}
/** Set whether the reader should loop the audio or not. */
void FWaveProxyReader::SetIsLooping(bool bInIsLooping)
{
if (Settings.bIsLooping != bInIsLooping)
{
Settings.bIsLooping = bInIsLooping;
UpdateLoopBoundaries();
}
}
/** Sets the beginning position of the loop. */
void FWaveProxyReader::SetLoopStartTime(float InLoopStartTimeInSeconds)
{
InLoopStartTimeInSeconds = ClampLoopStartTime(InLoopStartTimeInSeconds);
if (!FMath::IsNearlyEqual(Settings.LoopStartTimeInSeconds, InLoopStartTimeInSeconds))
{
Settings.LoopStartTimeInSeconds = InLoopStartTimeInSeconds;
UpdateLoopBoundaries();
}
}
/** Sets the duration of the loop in seconds. If the value is negative, the
* loop duration consists of the entire file. */
void FWaveProxyReader::SetLoopDuration(float InLoopDurationInSeconds)
{
InLoopDurationInSeconds = ClampLoopDuration(InLoopDurationInSeconds);
if (!FMath::IsNearlyEqual(Settings.LoopDurationInSeconds, InLoopDurationInSeconds))
{
Settings.LoopDurationInSeconds = InLoopDurationInSeconds;
UpdateLoopBoundaries();
}
}
bool FWaveProxyReader::SeekToTime(float InSeconds)
{
// Direct seeking is not supported. A new decoder must be created.
bIsDecoderValid = InitializeDecoder(InSeconds);
return bIsDecoderValid;
}
/** Copies audio into OutBuffer. It returns the number of samples copied.
* Samples not written to will be set to zero.
*/
int32 FWaveProxyReader::PopAudio(Audio::FAlignedFloatBuffer& OutBuffer)
{
using namespace Audio;
METASOUND_TRACE_CPUPROFILER_EVENT_SCOPE(Metasound::FWaveProxyReader::PopAudio);
if ((0 == NumChannels) || !bIsDecoderValid)
{
if (OutBuffer.Num() > 0)
{
FMemory::Memset(OutBuffer.GetData(), 0, sizeof(float) * OutBuffer.Num());
}
return 0;
}
checkf(0 == (OutBuffer.Num() % NumChannels), TEXT("Output buffer size must be evenly divisible by the number of channels"));
TArrayView<float> OutBufferView{OutBuffer};
int32 NumSamplesUnset = OutBuffer.Num();
int32 NumSamplesCopied = 0;
bool bDecoderOutputHasMoreData = DecoderOutput.Num() > 0;
bool bDecoderCanDecodeMoreData = bIsDecoderValid && (EDecodeResult::MoreDataRemaining == DecodeResult);
bool bCanProduceMoreAudio = bDecoderOutputHasMoreData || bDecoderCanDecodeMoreData;
while ((NumSamplesUnset > 0) && bCanProduceMoreAudio)
{
int32 NumSamplesCopiedThisLoop = PopAudioFromDecoderOutput(OutBufferView);
// Update sample counters and output buffer view
NumSamplesCopied += NumSamplesCopiedThisLoop;
NumSamplesUnset -= NumSamplesCopiedThisLoop;
OutBufferView = OutBufferView.Slice(NumSamplesCopiedThisLoop, NumSamplesUnset);
if (Settings.bIsLooping)
{
// Seek to loop start if we have used up all our decodable samples or
// are at the loop boundary.
if (0 == DecoderOutput.Num())
{
if ((!bDecoderCanDecodeMoreData) || (CurrentFrameIndex >= LoopEndFrameIndex))
{
SeekToTime(Settings.LoopStartTimeInSeconds);
bDecoderCanDecodeMoreData = bIsDecoderValid && (EDecodeResult::MoreDataRemaining == DecodeResult);
}
}
}
// Determine if we can / should decode more data.
if ((NumSamplesUnset > 0) && (DecoderOutput.Num() == 0) && bDecoderCanDecodeMoreData)
{
// Looping is handle within the FWaveProxyReader instead of
// within the decoder.
DecodeResult = Decoder->Decode(false /* bIsLooping */);
bDecoderCanDecodeMoreData = EDecodeResult::MoreDataRemaining == DecodeResult;
}
bDecoderOutputHasMoreData = DecoderOutput.Num() > 0;
bCanProduceMoreAudio = bDecoderOutputHasMoreData || bDecoderCanDecodeMoreData;
}
// Zero pad any unset samples.
if (OutBufferView.Num() > 0)
{
check(!bCanProduceMoreAudio);
// Zero out audio that was not set.
FMemory::Memset(OutBufferView.GetData(), 0, sizeof(float) * OutBufferView.Num());
CurrentFrameIndex += OutBufferView.Num() / NumChannels;
}
return NumSamplesCopied;
}
int32 FWaveProxyReader::PopAudioFromDecoderOutput(TArrayView<float> OutBufferView)
{
using namespace MetasoundWaveProxyReaderPrivate;
check(NumChannels > 0);
int32 NumSamplesCopied = 0;
if (DecoderOutput.Num() > 0)
{
// Get samples from the decoder buffer.
FDecoderOutput::FPushedAudioDetails UnusedDetails;
NumSamplesCopied = DecoderOutput.PopAudio(OutBufferView, UnusedDetails);
int32 NumFramesCopied = NumSamplesCopied / NumChannels;
CurrentFrameIndex += NumFramesCopied;
// Check whether the samples copied from the decoder extend
// past the end of the loop.
if (Settings.bIsLooping)
{
const bool bDidOvershoot = CurrentFrameIndex >= LoopEndFrameIndex;
if (bDidOvershoot)
{
// Rewind sample counters if the loop boundary was overshot.
NumFramesCopied -= (CurrentFrameIndex - LoopEndFrameIndex);
CurrentFrameIndex = LoopEndFrameIndex;
// If Settings.bIsLooping info was altered, NumFramesCopied can end up
// negative if the current frame index is past the loop end frame.
NumFramesCopied = FMath::Max(0, NumFramesCopied);
NumSamplesCopied = NumFramesCopied * NumChannels;
// Remove any remaining samples in the decoder because they
// are past the end of the loop.
DecoderOutput.Reset();
}
}
}
return NumSamplesCopied;
}
bool FWaveProxyReader::InitializeDecoder(float InStartTimeInSeconds)
{
using namespace Audio;
METASOUND_TRACE_CPUPROFILER_EVENT_SCOPE(MetaSound::FWaveProxyReader::InitializeDecoder);
// Create decoder input
FName Format = WaveProxy->GetRuntimeFormat();
DecoderInput = Audio::CreateBackCompatDecoderInput(Format, WaveProxy);
if (!DecoderInput.IsValid())
{
UE_LOG(LogMetaSound, Error, TEXT("Failed to create decoder input (format:%s) for wave (package:%s)"), *Format.ToString(), *WaveProxy->GetPackageName().ToString());
return false;
}
// Seek input to start time.
if (!FMath::IsNearlyEqual(0.0f, InStartTimeInSeconds))
{
if (WaveProxy->IsSeekable())
{
const bool bSeekSucceeded = DecoderInput->SeekToTime(InStartTimeInSeconds);
if (!bSeekSucceeded)
{
UE_LOG(LogMetaSound, Warning, TEXT("Failed to seek decoder input during initialization: (format:%s) for wave (package:%s) to time '%.6f'"),
*Format.ToString(),
*WaveProxy->GetPackageName().ToString(),
InStartTimeInSeconds);
}
}
else
{
UE_LOG(LogMetaSound, Warning, TEXT("Attempt to seek on non-seekable wave: (format:%s) for wave (package:%s) to time '%.6f'"),
*Format.ToString(),
*WaveProxy->GetPackageName().ToString(),
InStartTimeInSeconds);
}
}
// Get codec ptr by reading the header info from the decoder input.
ICodecRegistry::FCodecPtr Codec = ICodecRegistry::Get().FindCodecByParsingInput(DecoderInput.Get());
if (nullptr == Codec)
{
UE_LOG(LogMetaSound, Error, TEXT("Failed to find codec (format:%s) for wave (package:%s)"), *Format.ToString(), *WaveProxy->GetPackageName().ToString());
return false;
}
// Read the sample rate and number of frames from the header
// Similar to refreshing the wave data in FMixerBuffer::CreateStreamingBuffer
// This is a runtime hack to address incorrect sample rate on soundwaves
// on platforms with Resample for Device enabled (UE-183237)
Audio::FFormatDescriptorSection FormatDesc;
DecoderInput->FindSection(FormatDesc);
SampleRate = FormatDesc.NumFramesPerSec;
NumFramesInWave = FormatDesc.NumFrames;
// end hack
CurrentFrameIndex = FMath::Clamp(static_cast<int32>(InStartTimeInSeconds * GetSampleRate()), 0, GetNumFramesInWave());
// Create the decoder
Decoder = Codec->CreateDecoder(DecoderInput.Get(), &DecoderOutput);
if (!Decoder.IsValid())
{
UE_LOG(LogMetaSound, Error, TEXT("Failed to create decoder (format:%s) for wave (package:%s)"), *Format.ToString(), *WaveProxy->GetPackageName().ToString());
DecodeResult = EDecodeResult::Fail;
return false;
}
else
{
// The DecodeResult needs to be set to a valid state incase the prior
// decoder finished or failed.
DecodeResult = EDecodeResult::MoreDataRemaining;
}
// For non-seekable streaming waves, use a fallback method to seek
// to the start time.
const bool bUseFallbackSeekMethod = (GMetaSoundWaveProxyReaderSimulateSeekOnNonSeekable != 0) && (InStartTimeInSeconds != 0.f) && (!WaveProxy->IsSeekable());
if (bUseFallbackSeekMethod)
{
if (!bFallbackSeekMethodWarningLogged)
{
UE_LOG(LogMetaSound, Warning, TEXT("Simulating seeking in wave which is not seekable (package:%s). For better performance, set wave to a seekable format"), *WaveProxy->GetPackageName().ToString());
bFallbackSeekMethodWarningLogged = true;
}
int32 NumSamplesToDiscard = CurrentFrameIndex * NumChannels;
DiscardSamples(NumSamplesToDiscard);
}
// return true if all the components were successfully create
return true;
}
void FWaveProxyReader::DiscardSamples(int32 InNumSamplesToDiscard)
{
while (InNumSamplesToDiscard > 0)
{
DecodeResult = Decoder->Decode(false /* bIsLooping */);
int32 NumSamplesToDiscardThisLoop = FMath::Min(DecoderOutput.Num(), InNumSamplesToDiscard);
if (NumSamplesToDiscardThisLoop < 1)
{
UE_LOG(LogMetaSound, Error, TEXT("Failed to decode samples (package: %s)"), *WaveProxy->GetPackageName().ToString());
break;
}
int32 ActualNumSamplesDiscarded = DecoderOutput.PopAudio(NumSamplesToDiscardThisLoop);
InNumSamplesToDiscard -= ActualNumSamplesDiscarded;
if ((InNumSamplesToDiscard > 0) && (DecodeResult != EDecodeResult::MoreDataRemaining))
{
UE_LOG(LogMetaSound, Error, TEXT("Failed to simulate seek (seek to frame: %d, package: %s)"), CurrentFrameIndex, *WaveProxy->GetPackageName().ToString());
break;
}
}
}
float FWaveProxyReader::ClampLoopStartTime(float InStartTimeInSeconds)
{
return FMath::Clamp(InStartTimeInSeconds, 0.f, MaxLoopStartTimeInSeconds);
}
float FWaveProxyReader::ClampLoopDuration(float InDurationInSeconds)
{
if (InDurationInSeconds <= 0.f)
{
return MaxLoopDurationInSeconds;
}
return FMath::Max(InDurationInSeconds, MinLoopDurationInSeconds);
}
void FWaveProxyReader::UpdateLoopBoundaries()
{
if (Settings.bIsLooping)
{
const int32 MinLoopDurationInFrames = FMath::CeilToInt(MinLoopDurationInSeconds * SampleRate);
const float LoopEndTime = Settings.LoopStartTimeInSeconds + Settings.LoopDurationInSeconds;
const int32 MinLoopEndFrameIndex = LoopStartFrameIndex + MinLoopDurationInFrames;
const int32 MaxLoopEndFrameIndex = NumFramesInWave;
LoopStartFrameIndex = FMath::FloorToInt(Settings.LoopStartTimeInSeconds * SampleRate);
LoopEndFrameIndex = FMath::Clamp(LoopEndTime * SampleRate, MinLoopEndFrameIndex, MaxLoopEndFrameIndex);
}
else
{
LoopStartFrameIndex = 0;
LoopEndFrameIndex = NumFramesInWave;
}
}
}