gecko/dom/media/fmp4/apple/AppleATDecoder.cpp
Jean-Yves Avenard 1afe38d07f Bug 1075332 - Refactor mac audio decoder. Properly calculate timestamps. r=rillian
Also, in order to prevent the MediaDecoderStateMachine to stall waiting for audio data,
feed back as many decoded audio frame as were first submitted to the decoder in one go.
2014-11-12 15:13:02 +11:00

440 lines
14 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <AudioToolbox/AudioToolbox.h>
#include "AppleUtils.h"
#include "MP4Reader.h"
#include "MP4Decoder.h"
#include "mozilla/RefPtr.h"
#include "mp4_demuxer/Adts.h"
#include "mp4_demuxer/DecoderData.h"
#include "nsIThread.h"
#include "AppleATDecoder.h"
#include "prlog.h"
#ifdef PR_LOGGING
PRLogModuleInfo* GetAppleMediaLog();
#define LOG(...) PR_LOG(GetAppleMediaLog(), PR_LOG_DEBUG, (__VA_ARGS__))
#else
#define LOG(...)
#endif
namespace mozilla {
AppleATDecoder::AppleATDecoder(const mp4_demuxer::AudioDecoderConfig& aConfig,
MediaTaskQueue* aAudioTaskQueue,
MediaDataDecoderCallback* aCallback)
: mConfig(aConfig)
, mTaskQueue(aAudioTaskQueue)
, mCallback(aCallback)
, mConverter(nullptr)
, mStream(nullptr)
, mCurrentAudioTimestamp(-1)
, mNextAudioTimestamp(-1)
, mSamplePosition(0)
, mSizeDecoded(0)
, mLastError(noErr)
{
MOZ_COUNT_CTOR(AppleATDecoder);
LOG("Creating Apple AudioToolbox decoder");
LOG("Audio Decoder configuration: %s %d Hz %d channels %d bits per channel",
mConfig.mime_type,
mConfig.samples_per_second,
mConfig.channel_count,
mConfig.bits_per_sample);
if (!strcmp(aConfig.mime_type, "audio/mpeg")) {
mFileType = kAudioFileMP3Type;
} else if (!strcmp(aConfig.mime_type, "audio/mp4a-latm")) {
mFileType = kAudioFileAAC_ADTSType;
} else {
mFileType = 0;
}
}
AppleATDecoder::~AppleATDecoder()
{
MOZ_COUNT_DTOR(AppleATDecoder);
MOZ_ASSERT(!mConverter);
MOZ_ASSERT(!mStream);
}
static void
_MetadataCallback(void* aDecoder,
AudioFileStreamID aStream,
AudioFileStreamPropertyID aProperty,
UInt32* aFlags)
{
LOG("AppleATDecoder metadata callback");
AppleATDecoder* decoder = static_cast<AppleATDecoder*>(aDecoder);
decoder->MetadataCallback(aStream, aProperty, aFlags);
}
static void
_SampleCallback(void* aDecoder,
UInt32 aNumBytes,
UInt32 aNumPackets,
const void* aData,
AudioStreamPacketDescription* aPackets)
{
LOG("AppleATDecoder sample callback %u bytes %u packets",
aNumBytes, aNumPackets);
AppleATDecoder* decoder = static_cast<AppleATDecoder*>(aDecoder);
decoder->SampleCallback(aNumBytes, aNumPackets, aData, aPackets);
}
nsresult
AppleATDecoder::Init()
{
if (!mFileType) {
NS_ERROR("Non recognised format");
return NS_ERROR_FAILURE;
}
LOG("Initializing Apple AudioToolbox decoder");
OSStatus rv = AudioFileStreamOpen(this,
_MetadataCallback,
_SampleCallback,
mFileType,
&mStream);
if (rv) {
NS_ERROR("Couldn't open AudioFileStream");
return NS_ERROR_FAILURE;
}
return NS_OK;
}
nsresult
AppleATDecoder::Input(mp4_demuxer::MP4Sample* aSample)
{
LOG("mp4 input sample %p %lld us %lld pts%s %llu bytes audio",
aSample,
aSample->duration,
aSample->composition_timestamp,
aSample->is_sync_point ? " keyframe" : "",
(unsigned long long)aSample->size);
// Queue a task to perform the actual decoding on a separate thread.
mTaskQueue->Dispatch(
NS_NewRunnableMethodWithArg<nsAutoPtr<mp4_demuxer::MP4Sample>>(
this,
&AppleATDecoder::SubmitSample,
nsAutoPtr<mp4_demuxer::MP4Sample>(aSample)));
return NS_OK;
}
nsresult
AppleATDecoder::Flush()
{
LOG("Flushing AudioToolbox AAC decoder");
mTaskQueue->Flush();
OSStatus rv = AudioConverterReset(mConverter);
if (rv) {
LOG("Error %d resetting AudioConverter", rv);
return NS_ERROR_FAILURE;
}
// Notify our task queue of the coming input discontinuity.
mTaskQueue->Dispatch(
NS_NewRunnableMethod(this, &AppleATDecoder::SignalFlush));
return NS_OK;
}
nsresult
AppleATDecoder::Drain()
{
LOG("Draining AudioToolbox AAC decoder");
mTaskQueue->AwaitIdle();
mCallback->DrainComplete();
return Flush();
}
nsresult
AppleATDecoder::Shutdown()
{
LOG("Shutdown: Apple AudioToolbox AAC decoder");
OSStatus rv1 = AudioConverterDispose(mConverter);
if (rv1) {
LOG("error %d disposing of AudioConverter", rv1);
} else {
mConverter = nullptr;
}
OSStatus rv2 = AudioFileStreamClose(mStream);
if (rv2) {
LOG("error %d closing AudioFileStream", rv2);
} else {
mStream = nullptr;
}
return (rv1 && rv2) ? NS_OK : NS_ERROR_FAILURE;
}
void
AppleATDecoder::MetadataCallback(AudioFileStreamID aFileStream,
AudioFileStreamPropertyID aPropertyID,
UInt32* aFlags)
{
if (aPropertyID == kAudioFileStreamProperty_ReadyToProducePackets) {
SetupDecoder();
}
}
struct PassthroughUserData {
AppleATDecoder* mDecoder;
UInt32 mNumPackets;
UInt32 mDataSize;
const void* mData;
AudioStreamPacketDescription* mPacketDesc;
bool mDone;
};
// Error value we pass through the decoder to signal that nothing
// has gone wrong during decoding, but more data is needed.
const uint32_t kNeedMoreData = 'MOAR';
static OSStatus
_PassthroughInputDataCallback(AudioConverterRef aAudioConverter,
UInt32* aNumDataPackets /* in/out */,
AudioBufferList* aData /* in/out */,
AudioStreamPacketDescription** aPacketDesc,
void* aUserData)
{
PassthroughUserData* userData = (PassthroughUserData*)aUserData;
if (userData->mDone) {
// We make sure this callback is run _once_, with all the data we received
// from |AudioFileStreamParseBytes|. When we return an error, the decoder
// simply passes the return value on to the calling method,
// |SampleCallback|; and flushes all of the audio frames it had
// buffered. It does not change the decoder's state.
LOG("requested too much data; returning\n");
*aNumDataPackets = 0;
return kNeedMoreData;
}
userData->mDone = true;
LOG("AudioConverter wants %u packets of audio data\n", *aNumDataPackets);
*aNumDataPackets = userData->mNumPackets;
*aPacketDesc = userData->mPacketDesc;
aData->mBuffers[0].mNumberChannels = userData->mDecoder->mConfig.channel_count;
aData->mBuffers[0].mDataByteSize = userData->mDataSize;
aData->mBuffers[0].mData = const_cast<void*>(userData->mData);
return noErr;
}
void
AppleATDecoder::SampleCallback(uint32_t aNumBytes,
uint32_t aNumPackets,
const void* aData,
AudioStreamPacketDescription* aPackets)
{
// Pick a multiple of the frame size close to a power of two
// for efficient allocation.
const uint32_t MAX_AUDIO_FRAMES = 128;
const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * mConfig.channel_count;
// Descriptions for _decompressed_ audio packets. ignored.
nsAutoArrayPtr<AudioStreamPacketDescription>
packets(new AudioStreamPacketDescription[MAX_AUDIO_FRAMES]);
// This API insists on having packets spoon-fed to it from a callback.
// This structure exists only to pass our state and the result of the
// parser on to the callback above.
PassthroughUserData userData =
{ this, aNumPackets, aNumBytes, aData, aPackets, false };
// Decompressed audio buffer
nsAutoArrayPtr<AudioDataValue> decoded(new AudioDataValue[maxDecodedSamples]);
do {
AudioBufferList decBuffer;
decBuffer.mNumberBuffers = 1;
decBuffer.mBuffers[0].mNumberChannels = mOutputFormat.mChannelsPerFrame;
decBuffer.mBuffers[0].mDataByteSize =
maxDecodedSamples * sizeof(AudioDataValue);
decBuffer.mBuffers[0].mData = decoded.get();
// in: the max number of packets we can handle from the decoder.
// out: the number of packets the decoder is actually returning.
UInt32 numFrames = MAX_AUDIO_FRAMES;
OSStatus rv = AudioConverterFillComplexBuffer(mConverter,
_PassthroughInputDataCallback,
&userData,
&numFrames /* in/out */,
&decBuffer,
packets.get());
if (rv && rv != kNeedMoreData) {
LOG("Error decoding audio stream: %d\n", rv);
mLastError = rv;
break;
}
mOutputData.AppendElements(decoded.get(),
numFrames * mConfig.channel_count);
if (rv == kNeedMoreData) {
// No error; we just need more data.
LOG("FillComplexBuffer out of data\n");
break;
}
LOG("%d frames decoded", numFrames);
} while (true);
mSizeDecoded += aNumBytes;
}
void
AppleATDecoder::SetupDecoder()
{
LOG("Setting up Apple AudioToolbox decoder.");
AudioStreamBasicDescription inputFormat;
nsresult rv = AppleUtils::GetRichestDecodableFormat(mStream, inputFormat);
if (NS_FAILED(rv)) {
mCallback->Error();
return;
}
// Fill in the output format manually.
PodZero(&mOutputFormat);
mOutputFormat.mFormatID = kAudioFormatLinearPCM;
mOutputFormat.mSampleRate = inputFormat.mSampleRate;
mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame;
#if defined(MOZ_SAMPLE_TYPE_FLOAT32)
mOutputFormat.mBitsPerChannel = 32;
mOutputFormat.mFormatFlags =
kLinearPCMFormatFlagIsFloat |
0;
#else
# error Unknown audio sample type
#endif
// Set up the decoder so it gives us one sample per frame
mOutputFormat.mFramesPerPacket = 1;
mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame
= mOutputFormat.mChannelsPerFrame * mOutputFormat.mBitsPerChannel / 8;
OSStatus status =
AudioConverterNew(&inputFormat, &mOutputFormat, &mConverter);
if (status) {
LOG("Error %d constructing AudioConverter", rv);
mConverter = nullptr;
mCallback->Error();
}
}
void
AppleATDecoder::SubmitSample(nsAutoPtr<mp4_demuxer::MP4Sample> aSample)
{
// Prepend ADTS header to AAC audio.
if (!strcmp(mConfig.mime_type, "audio/mp4a-latm")) {
bool rv = mp4_demuxer::Adts::ConvertSample(mConfig.channel_count,
mConfig.frequency_index,
mConfig.aac_profile,
aSample);
if (!rv) {
NS_ERROR("Failed to apply ADTS header");
mCallback->Error();
return;
}
}
const Microseconds fuzz = 5;
CheckedInt<Microseconds> upperFuzz = mNextAudioTimestamp + fuzz;
CheckedInt<Microseconds> lowerFuzz = mNextAudioTimestamp - fuzz;
bool discontinuity =
!mNextAudioTimestamp.isValid() || mNextAudioTimestamp.value() < 0 ||
!upperFuzz.isValid() || lowerFuzz.value() < 0 ||
upperFuzz.value() < aSample->composition_timestamp ||
lowerFuzz.value() > aSample->composition_timestamp;
if (discontinuity) {
LOG("Discontinuity detected, expected %lld got %lld\n",
mNextAudioTimestamp.value(), aSample->composition_timestamp);
mCurrentAudioTimestamp = aSample->composition_timestamp;
mSamplePosition = aSample->byte_offset;
}
uint32_t flags = discontinuity ? kAudioFileStreamParseFlag_Discontinuity : 0;
OSStatus rv = AudioFileStreamParseBytes(mStream,
aSample->size,
aSample->data,
flags);
if (!mOutputData.IsEmpty()) {
int rate = mOutputFormat.mSampleRate;
int channels = mOutputFormat.mChannelsPerFrame;
size_t numFrames = mOutputData.Length() / channels;
CheckedInt<Microseconds> duration = FramesToUsecs(numFrames, rate);
if (!duration.isValid()) {
NS_ERROR("Invalid count of accumulated audio samples");
mCallback->Error();
return;
}
LOG("pushed audio at time %lfs; duration %lfs\n",
(double)mCurrentAudioTimestamp.value() / USECS_PER_S,
(double)duration.value() / USECS_PER_S);
nsAutoArrayPtr<AudioDataValue>
data(new AudioDataValue[mOutputData.Length()]);
PodCopy(data.get(), &mOutputData[0], mOutputData.Length());
mOutputData.Clear();
AudioData* audio = new AudioData(mSamplePosition,
mCurrentAudioTimestamp.value(),
duration.value(),
numFrames,
data.forget(),
channels,
rate);
mCallback->Output(audio);
mCurrentAudioTimestamp += duration.value();
if (!mCurrentAudioTimestamp.isValid()) {
NS_ERROR("Invalid count of accumulated audio samples");
mCallback->Error();
return;
}
mSamplePosition += mSizeDecoded;
mSizeDecoded = 0;
}
// This is the timestamp of the next sample we should be receiving
mNextAudioTimestamp =
CheckedInt<Microseconds>(aSample->composition_timestamp) + aSample->duration;
if (rv != noErr) {
LOG("Error %d parsing audio data", rv);
mCallback->Error();
return;
}
if (mLastError != noErr) {
LOG("Error %d during decoding", mLastError);
mCallback->Error();
mLastError = noErr;
return;
}
if (mTaskQueue->IsEmpty()) {
mCallback->InputExhausted();
}
}
void
AppleATDecoder::SignalFlush()
{
mOutputData.Clear();
mNextAudioTimestamp = -1;
mSizeDecoded = 0;
}
} // namespace mozilla