/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include "AppleUtils.h" #include "MP4Reader.h" #include "MP4Decoder.h" #include "mozilla/RefPtr.h" #include "mp4_demuxer/Adts.h" #include "mp4_demuxer/DecoderData.h" #include "nsIThread.h" #include "AppleATDecoder.h" #include "prlog.h" #ifdef PR_LOGGING PRLogModuleInfo* GetAppleMediaLog(); #define LOG(...) PR_LOG(GetAppleMediaLog(), PR_LOG_DEBUG, (__VA_ARGS__)) #else #define LOG(...) #endif namespace mozilla { AppleATDecoder::AppleATDecoder(const mp4_demuxer::AudioDecoderConfig& aConfig, MediaTaskQueue* aAudioTaskQueue, MediaDataDecoderCallback* aCallback) : mConfig(aConfig) , mTaskQueue(aAudioTaskQueue) , mCallback(aCallback) , mConverter(nullptr) , mStream(nullptr) , mCurrentAudioTimestamp(-1) , mNextAudioTimestamp(-1) , mSamplePosition(0) , mSizeDecoded(0) , mLastError(noErr) { MOZ_COUNT_CTOR(AppleATDecoder); LOG("Creating Apple AudioToolbox decoder"); LOG("Audio Decoder configuration: %s %d Hz %d channels %d bits per channel", mConfig.mime_type, mConfig.samples_per_second, mConfig.channel_count, mConfig.bits_per_sample); if (!strcmp(aConfig.mime_type, "audio/mpeg")) { mFileType = kAudioFileMP3Type; } else if (!strcmp(aConfig.mime_type, "audio/mp4a-latm")) { mFileType = kAudioFileAAC_ADTSType; } else { mFileType = 0; } } AppleATDecoder::~AppleATDecoder() { MOZ_COUNT_DTOR(AppleATDecoder); MOZ_ASSERT(!mConverter); MOZ_ASSERT(!mStream); } static void _MetadataCallback(void* aDecoder, AudioFileStreamID aStream, AudioFileStreamPropertyID aProperty, UInt32* aFlags) { LOG("AppleATDecoder metadata callback"); AppleATDecoder* decoder = static_cast(aDecoder); decoder->MetadataCallback(aStream, aProperty, aFlags); } static void _SampleCallback(void* aDecoder, UInt32 aNumBytes, UInt32 aNumPackets, const void* aData, AudioStreamPacketDescription* aPackets) { LOG("AppleATDecoder sample callback %u bytes %u packets", aNumBytes, aNumPackets); AppleATDecoder* decoder = static_cast(aDecoder); decoder->SampleCallback(aNumBytes, aNumPackets, aData, aPackets); } nsresult AppleATDecoder::Init() { if (!mFileType) { NS_ERROR("Non recognised format"); return NS_ERROR_FAILURE; } LOG("Initializing Apple AudioToolbox decoder"); OSStatus rv = AudioFileStreamOpen(this, _MetadataCallback, _SampleCallback, mFileType, &mStream); if (rv) { NS_ERROR("Couldn't open AudioFileStream"); return NS_ERROR_FAILURE; } return NS_OK; } nsresult AppleATDecoder::Input(mp4_demuxer::MP4Sample* aSample) { LOG("mp4 input sample %p %lld us %lld pts%s %llu bytes audio", aSample, aSample->duration, aSample->composition_timestamp, aSample->is_sync_point ? " keyframe" : "", (unsigned long long)aSample->size); // Queue a task to perform the actual decoding on a separate thread. mTaskQueue->Dispatch( NS_NewRunnableMethodWithArg>( this, &AppleATDecoder::SubmitSample, nsAutoPtr(aSample))); return NS_OK; } nsresult AppleATDecoder::Flush() { LOG("Flushing AudioToolbox AAC decoder"); mTaskQueue->Flush(); OSStatus rv = AudioConverterReset(mConverter); if (rv) { LOG("Error %d resetting AudioConverter", rv); return NS_ERROR_FAILURE; } // Notify our task queue of the coming input discontinuity. mTaskQueue->Dispatch( NS_NewRunnableMethod(this, &AppleATDecoder::SignalFlush)); return NS_OK; } nsresult AppleATDecoder::Drain() { LOG("Draining AudioToolbox AAC decoder"); mTaskQueue->AwaitIdle(); mCallback->DrainComplete(); return Flush(); } nsresult AppleATDecoder::Shutdown() { LOG("Shutdown: Apple AudioToolbox AAC decoder"); OSStatus rv1 = AudioConverterDispose(mConverter); if (rv1) { LOG("error %d disposing of AudioConverter", rv1); } else { mConverter = nullptr; } OSStatus rv2 = AudioFileStreamClose(mStream); if (rv2) { LOG("error %d closing AudioFileStream", rv2); } else { mStream = nullptr; } return (rv1 && rv2) ? NS_OK : NS_ERROR_FAILURE; } void AppleATDecoder::MetadataCallback(AudioFileStreamID aFileStream, AudioFileStreamPropertyID aPropertyID, UInt32* aFlags) { if (aPropertyID == kAudioFileStreamProperty_ReadyToProducePackets) { SetupDecoder(); } } struct PassthroughUserData { AppleATDecoder* mDecoder; UInt32 mNumPackets; UInt32 mDataSize; const void* mData; AudioStreamPacketDescription* mPacketDesc; bool mDone; }; // Error value we pass through the decoder to signal that nothing // has gone wrong during decoding, but more data is needed. const uint32_t kNeedMoreData = 'MOAR'; static OSStatus _PassthroughInputDataCallback(AudioConverterRef aAudioConverter, UInt32* aNumDataPackets /* in/out */, AudioBufferList* aData /* in/out */, AudioStreamPacketDescription** aPacketDesc, void* aUserData) { PassthroughUserData* userData = (PassthroughUserData*)aUserData; if (userData->mDone) { // We make sure this callback is run _once_, with all the data we received // from |AudioFileStreamParseBytes|. When we return an error, the decoder // simply passes the return value on to the calling method, // |SampleCallback|; and flushes all of the audio frames it had // buffered. It does not change the decoder's state. LOG("requested too much data; returning\n"); *aNumDataPackets = 0; return kNeedMoreData; } userData->mDone = true; LOG("AudioConverter wants %u packets of audio data\n", *aNumDataPackets); *aNumDataPackets = userData->mNumPackets; *aPacketDesc = userData->mPacketDesc; aData->mBuffers[0].mNumberChannels = userData->mDecoder->mConfig.channel_count; aData->mBuffers[0].mDataByteSize = userData->mDataSize; aData->mBuffers[0].mData = const_cast(userData->mData); return noErr; } void AppleATDecoder::SampleCallback(uint32_t aNumBytes, uint32_t aNumPackets, const void* aData, AudioStreamPacketDescription* aPackets) { // Pick a multiple of the frame size close to a power of two // for efficient allocation. const uint32_t MAX_AUDIO_FRAMES = 128; const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * mConfig.channel_count; // Descriptions for _decompressed_ audio packets. ignored. nsAutoArrayPtr packets(new AudioStreamPacketDescription[MAX_AUDIO_FRAMES]); // This API insists on having packets spoon-fed to it from a callback. // This structure exists only to pass our state and the result of the // parser on to the callback above. PassthroughUserData userData = { this, aNumPackets, aNumBytes, aData, aPackets, false }; // Decompressed audio buffer nsAutoArrayPtr decoded(new AudioDataValue[maxDecodedSamples]); do { AudioBufferList decBuffer; decBuffer.mNumberBuffers = 1; decBuffer.mBuffers[0].mNumberChannels = mOutputFormat.mChannelsPerFrame; decBuffer.mBuffers[0].mDataByteSize = maxDecodedSamples * sizeof(AudioDataValue); decBuffer.mBuffers[0].mData = decoded.get(); // in: the max number of packets we can handle from the decoder. // out: the number of packets the decoder is actually returning. UInt32 numFrames = MAX_AUDIO_FRAMES; OSStatus rv = AudioConverterFillComplexBuffer(mConverter, _PassthroughInputDataCallback, &userData, &numFrames /* in/out */, &decBuffer, packets.get()); if (rv && rv != kNeedMoreData) { LOG("Error decoding audio stream: %d\n", rv); mLastError = rv; break; } mOutputData.AppendElements(decoded.get(), numFrames * mConfig.channel_count); if (rv == kNeedMoreData) { // No error; we just need more data. LOG("FillComplexBuffer out of data\n"); break; } LOG("%d frames decoded", numFrames); } while (true); mSizeDecoded += aNumBytes; } void AppleATDecoder::SetupDecoder() { LOG("Setting up Apple AudioToolbox decoder."); AudioStreamBasicDescription inputFormat; nsresult rv = AppleUtils::GetRichestDecodableFormat(mStream, inputFormat); if (NS_FAILED(rv)) { mCallback->Error(); return; } // Fill in the output format manually. PodZero(&mOutputFormat); mOutputFormat.mFormatID = kAudioFormatLinearPCM; mOutputFormat.mSampleRate = inputFormat.mSampleRate; mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame; #if defined(MOZ_SAMPLE_TYPE_FLOAT32) mOutputFormat.mBitsPerChannel = 32; mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsFloat | 0; #else # error Unknown audio sample type #endif // Set up the decoder so it gives us one sample per frame mOutputFormat.mFramesPerPacket = 1; mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame = mOutputFormat.mChannelsPerFrame * mOutputFormat.mBitsPerChannel / 8; OSStatus status = AudioConverterNew(&inputFormat, &mOutputFormat, &mConverter); if (status) { LOG("Error %d constructing AudioConverter", rv); mConverter = nullptr; mCallback->Error(); } } void AppleATDecoder::SubmitSample(nsAutoPtr aSample) { // Prepend ADTS header to AAC audio. if (!strcmp(mConfig.mime_type, "audio/mp4a-latm")) { bool rv = mp4_demuxer::Adts::ConvertSample(mConfig.channel_count, mConfig.frequency_index, mConfig.aac_profile, aSample); if (!rv) { NS_ERROR("Failed to apply ADTS header"); mCallback->Error(); return; } } const Microseconds fuzz = 5; CheckedInt upperFuzz = mNextAudioTimestamp + fuzz; CheckedInt lowerFuzz = mNextAudioTimestamp - fuzz; bool discontinuity = !mNextAudioTimestamp.isValid() || mNextAudioTimestamp.value() < 0 || !upperFuzz.isValid() || lowerFuzz.value() < 0 || upperFuzz.value() < aSample->composition_timestamp || lowerFuzz.value() > aSample->composition_timestamp; if (discontinuity) { LOG("Discontinuity detected, expected %lld got %lld\n", mNextAudioTimestamp.value(), aSample->composition_timestamp); mCurrentAudioTimestamp = aSample->composition_timestamp; mSamplePosition = aSample->byte_offset; } uint32_t flags = discontinuity ? kAudioFileStreamParseFlag_Discontinuity : 0; OSStatus rv = AudioFileStreamParseBytes(mStream, aSample->size, aSample->data, flags); if (!mOutputData.IsEmpty()) { int rate = mOutputFormat.mSampleRate; int channels = mOutputFormat.mChannelsPerFrame; size_t numFrames = mOutputData.Length() / channels; CheckedInt duration = FramesToUsecs(numFrames, rate); if (!duration.isValid()) { NS_ERROR("Invalid count of accumulated audio samples"); mCallback->Error(); return; } LOG("pushed audio at time %lfs; duration %lfs\n", (double)mCurrentAudioTimestamp.value() / USECS_PER_S, (double)duration.value() / USECS_PER_S); nsAutoArrayPtr data(new AudioDataValue[mOutputData.Length()]); PodCopy(data.get(), &mOutputData[0], mOutputData.Length()); mOutputData.Clear(); AudioData* audio = new AudioData(mSamplePosition, mCurrentAudioTimestamp.value(), duration.value(), numFrames, data.forget(), channels, rate); mCallback->Output(audio); mCurrentAudioTimestamp += duration.value(); if (!mCurrentAudioTimestamp.isValid()) { NS_ERROR("Invalid count of accumulated audio samples"); mCallback->Error(); return; } mSamplePosition += mSizeDecoded; mSizeDecoded = 0; } // This is the timestamp of the next sample we should be receiving mNextAudioTimestamp = CheckedInt(aSample->composition_timestamp) + aSample->duration; if (rv != noErr) { LOG("Error %d parsing audio data", rv); mCallback->Error(); return; } if (mLastError != noErr) { LOG("Error %d during decoding", mLastError); mCallback->Error(); mLastError = noErr; return; } if (mTaskQueue->IsEmpty()) { mCallback->InputExhausted(); } } void AppleATDecoder::SignalFlush() { mOutputData.Clear(); mNextAudioTimestamp = -1; mSizeDecoded = 0; } } // namespace mozilla