gecko/content/media/omx/MP3FrameParser.cpp
Thomas Zimmermann 0668f3f25a Bug 831224: Added MP3 frame parser, r=padenot
MP3 streams consist of small frames, with each frame containing the
audio data of a few hundred milliseconds. The actual duration of the
encoded audio can among frames.

Each frame consists of a 4-byte frame header, some optional extra
information, and the audio data. The MP3 frame parser walks over the
content of an MP3 stream, computes the duration of each frame from
the frame header, and sums them up to the streams complete duration.

The MP3 frame parser does not decode the actual audio data.
2013-05-03 09:44:02 +02:00

488 lines
13 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include <algorithm>
#include "nsMemory.h"
#include "MP3FrameParser.h"
namespace mozilla {
// An ID3Buffer contains data of an ID3v2 header. The supplied buffer must
// point to an ID3 header and at least the size of ID_HEADER_LENGTH. Run the
// Parse method to read in the header's values.
class ID3Buffer
{
public:
enum {
ID3_HEADER_LENGTH = 10
};
ID3Buffer(const uint8_t* aBuffer, uint32_t aLength)
: mBuffer(aBuffer),
mLength(aLength),
mSize(0)
{
MOZ_ASSERT(mBuffer || !mLength);
}
nsresult Parse();
int64_t GetMP3Offset() const {
return ID3_HEADER_LENGTH + mSize;
}
private:
const uint8_t* mBuffer;
uint32_t mLength;
uint32_t mSize;
};
nsresult ID3Buffer::Parse()
{
NS_ENSURE_TRUE(mBuffer && mLength >= ID3_HEADER_LENGTH, NS_ERROR_INVALID_ARG);
if ((mBuffer[0] != 'I') ||
(mBuffer[1] != 'D') ||
(mBuffer[2] != '3') ||
(mBuffer[6] & 0x80) ||
(mBuffer[7] & 0x80) ||
(mBuffer[8] & 0x80) ||
(mBuffer[9] & 0x80)) {
return NS_ERROR_INVALID_ARG;
}
mSize = ((static_cast<uint32_t>(mBuffer[6])<<21) |
(static_cast<uint32_t>(mBuffer[7])<<14) |
(static_cast<uint32_t>(mBuffer[8])<<7) |
static_cast<uint32_t>(mBuffer[9]));
return NS_OK;
}
// The MP3Buffer contains MP3 frame data. The supplied buffer must point
// to a frame header. Call the method Parse to extract information from
// the MP3 frame headers in the supplied buffer.
class MP3Buffer
{
public:
enum {
MP3_HEADER_LENGTH = 4,
MP3_FRAMESIZE_CONST = 144000,
MP3_DURATION_CONST = 8000
};
MP3Buffer(const uint8_t* aBuffer, uint32_t aLength)
: mBuffer(aBuffer),
mLength(aLength),
mDurationUs(0),
mNumFrames(0),
mBitRateSum(0),
mFrameSizeSum(0),
mTrailing(0)
{
MOZ_ASSERT(mBuffer || !mLength);
}
static const uint8_t* FindNextHeader(const uint8_t* aBuffer, uint32_t aLength);
nsresult Parse();
int64_t GetDuration() const {
return mDurationUs;
}
int64_t GetNumberOfFrames() const {
return mNumFrames;
}
int64_t GetBitRateSum() const {
return mBitRateSum;
}
int64_t GetFrameSizeSum() const {
return mFrameSizeSum;
}
int64_t GetTrailing() const {
return mTrailing;
}
private:
enum MP3FrameHeaderField {
MP3_HDR_FIELD_SYNC,
MP3_HDR_FIELD_VERSION,
MP3_HDR_FIELD_LAYER,
MP3_HDR_FIELD_BITRATE,
MP3_HDR_FIELD_SAMPLERATE,
MP3_HDR_FIELD_PADDING,
MP3_HDR_FIELDS // Must be last enumerator value
};
enum {
MP3_HDR_CONST_FRAMESYNC = 0x7ff,
MP3_HDR_CONST_VERSION = 3,
MP3_HDR_CONST_LAYER = 1
};
static uint32_t ExtractBits(uint32_t aValue, uint32_t aOffset,
uint32_t aBits);
static uint32_t ExtractFrameHeaderField(uint32_t aHeader,
enum MP3FrameHeaderField aField);
static uint32_t ExtractFrameHeader(const uint8_t* aBuffer);
static nsresult DecodeFrameHeader(const uint8_t* aBuffer,
size_t* aFrameSize,
uint32_t* aBitRate,
uint64_t* aDuration);
static const uint16_t sBitRate[16];
static const uint16_t sSampleRate[4];
const uint8_t* mBuffer;
uint32_t mLength;
// The duration of this parsers data in milliseconds.
int64_t mDurationUs;
// The number of frames in the range.
int64_t mNumFrames;
// The sum of all frame's bit rates.
int64_t mBitRateSum;
// The sum of all frame's sizes in byte.
int32_t mFrameSizeSum;
// The number of trailing bytes.
int32_t mTrailing;
};
const uint16_t MP3Buffer::sBitRate[16] = {
0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0
};
const uint16_t MP3Buffer::sSampleRate[4] = {
44100, 48000, 32000, 0
};
uint32_t MP3Buffer::ExtractBits(uint32_t aValue, uint32_t aOffset, uint32_t aBits)
{
return (aValue >> aOffset) & ((0x1ul << aBits) - 1);
}
uint32_t MP3Buffer::ExtractFrameHeaderField(uint32_t aHeader, enum MP3FrameHeaderField aField)
{
static const uint8_t sField[MP3_HDR_FIELDS][2] = {
{21, 11}, {19, 2}, {17, 2}, {12, 4}, {10, 2}, {9, 1}
};
MOZ_ASSERT(aField < MP3_HDR_FIELDS);
return ExtractBits(aHeader, sField[aField][0], sField[aField][1]);
}
uint32_t MP3Buffer::ExtractFrameHeader(const uint8_t* aBuffer)
{
MOZ_ASSERT(aBuffer);
uint32_t header = (static_cast<uint32_t>(aBuffer[0])<<24) |
(static_cast<uint32_t>(aBuffer[1])<<16) |
(static_cast<uint32_t>(aBuffer[2])<<8) |
static_cast<uint32_t>(aBuffer[3]);
uint32_t frameSync = ExtractFrameHeaderField(header, MP3_HDR_FIELD_SYNC);
uint32_t version = ExtractFrameHeaderField(header, MP3_HDR_FIELD_VERSION);
uint32_t layer = ExtractFrameHeaderField(header, MP3_HDR_FIELD_LAYER);
uint32_t bitRate = sBitRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_BITRATE)];
uint32_t sampleRate = sSampleRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_SAMPLERATE)];
// branch-less implementation of
//
// if (fields-are-valid)
// return header;
// else
// return 0;
//
return (frameSync == uint32_t(MP3_HDR_CONST_FRAMESYNC)) *
(version == uint32_t(MP3_HDR_CONST_VERSION)) *
(layer == uint32_t(MP3_HDR_CONST_LAYER)) * !!bitRate * !!sampleRate * header;
}
const uint8_t* MP3Buffer::FindNextHeader(const uint8_t* aBuffer, uint32_t aLength)
{
MOZ_ASSERT(aBuffer || !aLength);
// Find MP3's frame-sync marker while there are at least 4 bytes
// left to contain the MP3 frame header
while (aLength >= MP3_HEADER_LENGTH) {
if (ExtractFrameHeader(aBuffer)) {
break;
}
++aBuffer;
--aLength;
}
return aBuffer;
}
nsresult MP3Buffer::DecodeFrameHeader(const uint8_t* aBuffer,
uint32_t* aFrameSize,
uint32_t* aBitRate,
uint64_t* aDuration)
{
uint32_t header = ExtractFrameHeader(aBuffer);
if (!header) {
return NS_ERROR_INVALID_ARG;
}
uint32_t bitRate = sBitRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_BITRATE)];
uint32_t sampleRate = sSampleRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_SAMPLERATE)];
uint32_t padding = ExtractFrameHeaderField(header, MP3_HDR_FIELD_PADDING);
uint32_t frameSize = (uint64_t(MP3_FRAMESIZE_CONST) * bitRate) / sampleRate + padding;
MOZ_ASSERT(aBitRate);
*aBitRate = bitRate;
MOZ_ASSERT(aFrameSize);
*aFrameSize = frameSize;
MOZ_ASSERT(aDuration);
*aDuration = (uint64_t(MP3_DURATION_CONST) * frameSize) / bitRate;
return NS_OK;
}
nsresult MP3Buffer::Parse()
{
// We walk over the newly arrived data and sum up the
// bit rates, sizes, durations, etc. of the contained
// MP3 frames.
const uint8_t* buffer = mBuffer;
uint32_t length = mLength;
while (length >= MP3_HEADER_LENGTH) {
uint32_t frameSize;
uint32_t bitRate;
uint64_t duration;
nsresult rv = DecodeFrameHeader(buffer, &frameSize, &bitRate, &duration);
NS_ENSURE_SUCCESS(rv, rv);
mBitRateSum += bitRate;
mDurationUs += duration;
++mNumFrames;
mFrameSizeSum += frameSize;
if (frameSize <= length) {
length -= frameSize;
} else {
length = 0;
}
buffer += frameSize;
}
mTrailing = length;
return NS_OK;
}
MP3FrameParser::MP3FrameParser(int64_t aLength)
: mBufferLength(0),
mLock("MP3FrameParser.mLock"),
mDurationUs(0),
mBitRateSum(0),
mNumFrames(0),
mOffset(0),
mUnhandled(0),
mLength(aLength),
mTrailing(0),
mIsMP3(true)
{ }
size_t MP3FrameParser::ParseInternalBuffer(const uint8_t* aBuffer, uint32_t aLength, int64_t aOffset)
{
if (mOffset != aOffset) {
// If we don't append, we throw away our temporary buffer.
mBufferLength = 0;
return 0;
}
size_t copyLength = 0;
if (mBufferLength || !mOffset) {
// We have some data in our temporary buffer and append to it, or
// we are at the beginning of the stream. We both cases, we append
// some data to our temporary buffer and try to parse it.
copyLength = std::min<size_t>(NS_ARRAY_LENGTH(mBuffer)-mBufferLength, aLength);
memcpy(mBuffer+mBufferLength, aBuffer, copyLength*sizeof(*mBuffer));
mBufferLength += copyLength;
}
if ((mBufferLength >= ID3Buffer::ID3_HEADER_LENGTH) && (mOffset < ID3Buffer::ID3_HEADER_LENGTH)) {
// There might be an ID3 header at the very beginning of the stream.
ID3Buffer id3Buffer(mBuffer, mBufferLength);
nsresult rv = id3Buffer.Parse();
if (rv == NS_OK) {
mOffset += id3Buffer.GetMP3Offset()-(mBufferLength-copyLength);
mBufferLength = 0;
}
}
if (mBufferLength >= MP3Buffer::MP3_HEADER_LENGTH) {
// Or there could be a regular frame header somewhere
// in the stream.
MP3Buffer mp3Buffer(mBuffer, mBufferLength);
nsresult rv = mp3Buffer.Parse();
if (rv == NS_OK) {
mDurationUs += mp3Buffer.GetDuration();
mBitRateSum += mp3Buffer.GetBitRateSum();
mNumFrames += mp3Buffer.GetNumberOfFrames();
mOffset += mp3Buffer.GetFrameSizeSum()-(mBufferLength-copyLength);
mBufferLength = 0;
}
}
if (mBufferLength) {
// We have not been able to successfully parse the
// content of the temporary buffer. If the buffer is
// full already, the stream does not contain MP3.
mOffset += copyLength;
mIsMP3 = (mBufferLength < NS_ARRAY_LENGTH(mBuffer));
} else {
// We parsed the temporary buffer. The parser code
// will update the input data.
copyLength = 0;
}
if (mOffset > mLength) {
mLength = mOffset;
}
return copyLength;
}
void MP3FrameParser::Parse(const uint8_t* aBuffer, uint32_t aLength, int64_t aOffset)
{
MutexAutoLock mon(mLock);
// We first try to parse the remaining data from the last call that
// is stored in an internal buffer.
size_t bufferIncr = ParseInternalBuffer(aBuffer, aLength, aOffset);
aBuffer += bufferIncr;
aLength -= bufferIncr;
aOffset += bufferIncr;
// The number of attempts to parse the data. This should be 1 of we
// append to the end of the existing data.
int retries = 1;
if (aOffset+aLength <= mOffset) {
// We already processed this fragment.
return;
} else if (aOffset < mOffset) {
// mOffset is within the new fragment, shorten range.
aLength -= mOffset-aOffset;
aBuffer += mOffset-aOffset;
aOffset = mOffset;
} else if (aOffset > mOffset) {
// Fragment comes after current position, store difference.
mUnhandled += aOffset-mOffset;
// We might start in the middle of a frame and have find the next
// frame header. As our detection heuristics might return false
// positives, we simply try multiple times. The current value comes
// from experimentation with MP3 files. If you encounter false positives
// and incorrectly parsed MP3 files, try incrementing this value.
retries = 5;
}
uint32_t trailing = 0;
while (retries) {
MP3Buffer mp3Buffer(aBuffer, aLength);
nsresult rv = mp3Buffer.Parse();
if (rv != NS_OK) {
--retries;
if (!retries) {
mIsMP3 = false;
return;
}
// We might be in the middle of a frame, find next frame header
const uint8_t *buffer = MP3Buffer::FindNextHeader(aBuffer+1, aLength-1);
mUnhandled += buffer-aBuffer;
mOffset = aOffset + buffer-aBuffer;
aLength -= buffer-aBuffer;
aBuffer = buffer;
} else {
mDurationUs += mp3Buffer.GetDuration();
mBitRateSum += mp3Buffer.GetBitRateSum();
mNumFrames += mp3Buffer.GetNumberOfFrames();
mOffset += mp3Buffer.GetFrameSizeSum();
trailing = mp3Buffer.GetTrailing();
retries = 0;
}
}
if (trailing) {
// Store trailing bytes in temporary buffer.
MOZ_ASSERT(trailing < (NS_ARRAY_LENGTH(mBuffer)*sizeof(*mBuffer)));
memcpy(mBuffer, aBuffer+(aLength-trailing), trailing);
mBufferLength = trailing;
}
if (mOffset > mLength) {
mLength = mOffset;
}
}
void MP3FrameParser::NotifyDataArrived(const char* aBuffer, uint32_t aLength, int64_t aOffset)
{
Parse(reinterpret_cast<const uint8_t*>(aBuffer), aLength, aOffset);
}
int64_t MP3FrameParser::GetDuration()
{
MutexAutoLock mon(mLock);
if (!mNumFrames) {
return -1; // Not a single frame decoded yet
}
// Compute the duration of the unhandled fragments from
// the average bitrate.
int64_t avgBitRate = mBitRateSum / mNumFrames;
NS_ENSURE_TRUE(avgBitRate > 0, mDurationUs);
MOZ_ASSERT(mLength >= mOffset);
int64_t unhandled = mUnhandled + (mLength-mOffset);
return mDurationUs + (uint64_t(MP3Buffer::MP3_DURATION_CONST) * unhandled) / avgBitRate;
}
}