gecko/content/media/wave/nsWaveReader.cpp
Ralph Giles a81a17e866 Bug 763010 - Expose media element metadata. r=cpearce
Implements a media.mozGetMetadata() method returning a new javascript object whose properties are key value pairs respresenting metadata tags from the media resource. This data is available after readystate enters METADATA_LOADED.

Currently this is only implemented for Ogg Vorbis streams.

Media format metadata is parsed out by the media decoders. In the nsCodecStateMachine::ReadMetadata subclasses we fill in an nsDataHashtable pointer using the format-specifc api.

The hash pointer is passed up to the media element as part of the MetadataLoaded event.

The hash is deleted if the load is aborted. The audio metadata is also reset to zero (as in the constructor), resolving a todo comment.
2012-07-30 20:14:29 -04:00

526 lines
15 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsError.h"
#include "nsBuiltinDecoder.h"
#include "MediaResource.h"
#include "nsWaveReader.h"
#include "nsTimeRanges.h"
#include "VideoUtils.h"
#include "mozilla/StandardInteger.h"
using namespace mozilla;
// Un-comment to enable logging of seek bisections.
//#define SEEK_LOGGING
#ifdef PR_LOGGING
extern PRLogModuleInfo* gBuiltinDecoderLog;
#define LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
#ifdef SEEK_LOGGING
#define SEEK_LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
#else
#define SEEK_LOG(type, msg)
#endif
#else
#define LOG(type, msg)
#define SEEK_LOG(type, msg)
#endif
// Magic values that identify RIFF chunks we're interested in.
static const PRUint32 RIFF_CHUNK_MAGIC = 0x52494646;
static const PRUint32 WAVE_CHUNK_MAGIC = 0x57415645;
static const PRUint32 FRMT_CHUNK_MAGIC = 0x666d7420;
static const PRUint32 DATA_CHUNK_MAGIC = 0x64617461;
// Size of RIFF chunk header. 4 byte chunk header type and 4 byte size field.
static const PRUint16 RIFF_CHUNK_HEADER_SIZE = 8;
// Size of RIFF header. RIFF chunk and 4 byte RIFF type.
static const PRUint16 RIFF_INITIAL_SIZE = RIFF_CHUNK_HEADER_SIZE + 4;
// Size of required part of format chunk. Actual format chunks may be
// extended (for non-PCM encodings), but we skip any extended data.
static const PRUint16 WAVE_FORMAT_CHUNK_SIZE = 16;
// PCM encoding type from format chunk. Linear PCM is the only encoding
// supported by nsAudioStream.
static const PRUint16 WAVE_FORMAT_ENCODING_PCM = 1;
// Maximum number of channels supported
static const PRUint8 MAX_CHANNELS = 2;
namespace {
PRUint32
ReadUint32BE(const char** aBuffer)
{
PRUint32 result =
PRUint8((*aBuffer)[0]) << 24 |
PRUint8((*aBuffer)[1]) << 16 |
PRUint8((*aBuffer)[2]) << 8 |
PRUint8((*aBuffer)[3]);
*aBuffer += sizeof(PRUint32);
return result;
}
PRUint32
ReadUint32LE(const char** aBuffer)
{
PRUint32 result =
PRUint8((*aBuffer)[3]) << 24 |
PRUint8((*aBuffer)[2]) << 16 |
PRUint8((*aBuffer)[1]) << 8 |
PRUint8((*aBuffer)[0]);
*aBuffer += sizeof(PRUint32);
return result;
}
PRUint16
ReadUint16LE(const char** aBuffer)
{
PRUint16 result =
PRUint8((*aBuffer)[1]) << 8 |
PRUint8((*aBuffer)[0]) << 0;
*aBuffer += sizeof(PRUint16);
return result;
}
PRInt16
ReadInt16LE(const char** aBuffer)
{
return static_cast<PRInt16>(ReadUint16LE(aBuffer));
}
PRUint8
ReadUint8(const char** aBuffer)
{
PRUint8 result = PRUint8((*aBuffer)[0]);
*aBuffer += sizeof(PRUint8);
return result;
}
}
nsWaveReader::nsWaveReader(nsBuiltinDecoder* aDecoder)
: nsBuiltinDecoderReader(aDecoder)
{
MOZ_COUNT_CTOR(nsWaveReader);
}
nsWaveReader::~nsWaveReader()
{
MOZ_COUNT_DTOR(nsWaveReader);
}
nsresult nsWaveReader::Init(nsBuiltinDecoderReader* aCloneDonor)
{
return NS_OK;
}
nsresult nsWaveReader::ReadMetadata(nsVideoInfo* aInfo,
nsHTMLMediaElement::MetadataTags** aTags)
{
NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
bool loaded = LoadRIFFChunk() && LoadFormatChunk() && FindDataOffset();
if (!loaded) {
return NS_ERROR_FAILURE;
}
mInfo.mHasAudio = true;
mInfo.mHasVideo = false;
mInfo.mAudioRate = mSampleRate;
mInfo.mAudioChannels = mChannels;
*aInfo = mInfo;
*aTags = nullptr;
ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
mDecoder->GetStateMachine()->SetDuration(
static_cast<PRInt64>(BytesToTime(GetDataLength()) * USECS_PER_S));
return NS_OK;
}
bool nsWaveReader::DecodeAudioData()
{
NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
PRInt64 pos = GetPosition() - mWavePCMOffset;
PRInt64 len = GetDataLength();
PRInt64 remaining = len - pos;
NS_ASSERTION(remaining >= 0, "Current wave position is greater than wave file length");
static const PRInt64 BLOCK_SIZE = 4096;
PRInt64 readSize = NS_MIN(BLOCK_SIZE, remaining);
PRInt64 frames = readSize / mFrameSize;
PR_STATIC_ASSERT(PRUint64(BLOCK_SIZE) < UINT_MAX / sizeof(AudioDataValue) / MAX_CHANNELS);
const size_t bufferSize = static_cast<size_t>(frames * mChannels);
nsAutoArrayPtr<AudioDataValue> sampleBuffer(new AudioDataValue[bufferSize]);
PR_STATIC_ASSERT(PRUint64(BLOCK_SIZE) < UINT_MAX / sizeof(char));
nsAutoArrayPtr<char> dataBuffer(new char[static_cast<size_t>(readSize)]);
if (!ReadAll(dataBuffer, readSize)) {
mAudioQueue.Finish();
return false;
}
// convert data to samples
const char* d = dataBuffer.get();
AudioDataValue* s = sampleBuffer.get();
for (int i = 0; i < frames; ++i) {
for (unsigned int j = 0; j < mChannels; ++j) {
if (mSampleFormat == nsAudioStream::FORMAT_U8) {
PRUint8 v = ReadUint8(&d);
#if defined(MOZ_SAMPLE_TYPE_S16LE)
*s++ = (v * (1.F/PR_UINT8_MAX)) * PR_UINT16_MAX + PR_INT16_MIN;
#elif defined(MOZ_SAMPLE_TYPE_FLOAT32)
*s++ = (v * (1.F/PR_UINT8_MAX)) * 2.F - 1.F;
#endif
}
else if (mSampleFormat == nsAudioStream::FORMAT_S16_LE) {
PRInt16 v = ReadInt16LE(&d);
#if defined(MOZ_SAMPLE_TYPE_S16LE)
*s++ = v;
#elif defined(MOZ_SAMPLE_TYPE_FLOAT32)
*s++ = (PRInt32(v) - PR_INT16_MIN) / float(PR_UINT16_MAX) * 2.F - 1.F;
#endif
}
}
}
double posTime = BytesToTime(pos);
double readSizeTime = BytesToTime(readSize);
NS_ASSERTION(posTime <= INT64_MAX / USECS_PER_S, "posTime overflow");
NS_ASSERTION(readSizeTime <= INT64_MAX / USECS_PER_S, "readSizeTime overflow");
NS_ASSERTION(frames < PR_INT32_MAX, "frames overflow");
mAudioQueue.Push(new AudioData(pos,
static_cast<PRInt64>(posTime * USECS_PER_S),
static_cast<PRInt64>(readSizeTime * USECS_PER_S),
static_cast<PRInt32>(frames),
sampleBuffer.forget(),
mChannels));
return true;
}
bool nsWaveReader::DecodeVideoFrame(bool &aKeyframeSkip,
PRInt64 aTimeThreshold)
{
NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
return false;
}
nsresult nsWaveReader::Seek(PRInt64 aTarget, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime)
{
NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
LOG(PR_LOG_DEBUG, ("%p About to seek to %lld", mDecoder, aTarget));
if (NS_FAILED(ResetDecode())) {
return NS_ERROR_FAILURE;
}
double d = BytesToTime(GetDataLength());
NS_ASSERTION(d < INT64_MAX / USECS_PER_S, "Duration overflow");
PRInt64 duration = static_cast<PRInt64>(d * USECS_PER_S);
double seekTime = NS_MIN(aTarget, duration) / static_cast<double>(USECS_PER_S);
PRInt64 position = RoundDownToFrame(static_cast<PRInt64>(TimeToBytes(seekTime)));
NS_ASSERTION(INT64_MAX - mWavePCMOffset > position, "Integer overflow during wave seek");
position += mWavePCMOffset;
return mDecoder->GetResource()->Seek(nsISeekableStream::NS_SEEK_SET, position);
}
static double RoundToUsecs(double aSeconds) {
return floor(aSeconds * USECS_PER_S) / USECS_PER_S;
}
nsresult nsWaveReader::GetBuffered(nsTimeRanges* aBuffered, PRInt64 aStartTime)
{
if (!mInfo.mHasAudio) {
return NS_OK;
}
PRInt64 startOffset = mDecoder->GetResource()->GetNextCachedData(mWavePCMOffset);
while (startOffset >= 0) {
PRInt64 endOffset = mDecoder->GetResource()->GetCachedDataEnd(startOffset);
// Bytes [startOffset..endOffset] are cached.
NS_ASSERTION(startOffset >= mWavePCMOffset, "Integer underflow in GetBuffered");
NS_ASSERTION(endOffset >= mWavePCMOffset, "Integer underflow in GetBuffered");
// We need to round the buffered ranges' times to microseconds so that they
// have the same precision as the currentTime and duration attribute on
// the media element.
aBuffered->Add(RoundToUsecs(BytesToTime(startOffset - mWavePCMOffset)),
RoundToUsecs(BytesToTime(endOffset - mWavePCMOffset)));
startOffset = mDecoder->GetResource()->GetNextCachedData(endOffset);
}
return NS_OK;
}
bool
nsWaveReader::ReadAll(char* aBuf, PRInt64 aSize, PRInt64* aBytesRead)
{
PRUint32 got = 0;
if (aBytesRead) {
*aBytesRead = 0;
}
do {
PRUint32 read = 0;
if (NS_FAILED(mDecoder->GetResource()->Read(aBuf + got, PRUint32(aSize - got), &read))) {
NS_WARNING("Resource read failed");
return false;
}
if (read == 0) {
return false;
}
mDecoder->NotifyBytesConsumed(read);
got += read;
if (aBytesRead) {
*aBytesRead = got;
}
} while (got != aSize);
return true;
}
bool
nsWaveReader::LoadRIFFChunk()
{
char riffHeader[RIFF_INITIAL_SIZE];
const char* p = riffHeader;
NS_ABORT_IF_FALSE(mDecoder->GetResource()->Tell() == 0,
"LoadRIFFChunk called when resource in invalid state");
if (!ReadAll(riffHeader, sizeof(riffHeader))) {
return false;
}
PR_STATIC_ASSERT(sizeof(PRUint32) * 2 <= RIFF_INITIAL_SIZE);
if (ReadUint32BE(&p) != RIFF_CHUNK_MAGIC) {
NS_WARNING("resource data not in RIFF format");
return false;
}
// Skip over RIFF size field.
p += 4;
if (ReadUint32BE(&p) != WAVE_CHUNK_MAGIC) {
NS_WARNING("Expected WAVE chunk");
return false;
}
return true;
}
bool
nsWaveReader::ScanForwardUntil(PRUint32 aWantedChunk, PRUint32* aChunkSize)
{
NS_ABORT_IF_FALSE(aChunkSize, "Require aChunkSize argument");
*aChunkSize = 0;
for (;;) {
static const unsigned int CHUNK_HEADER_SIZE = 8;
char chunkHeader[CHUNK_HEADER_SIZE];
const char* p = chunkHeader;
if (!ReadAll(chunkHeader, sizeof(chunkHeader))) {
return false;
}
PR_STATIC_ASSERT(sizeof(PRUint32) * 2 <= CHUNK_HEADER_SIZE);
PRUint32 magic = ReadUint32BE(&p);
PRUint32 chunkSize = ReadUint32LE(&p);
if (magic == aWantedChunk) {
*aChunkSize = chunkSize;
return true;
}
// RIFF chunks are two-byte aligned, so round up if necessary.
chunkSize += chunkSize % 2;
static const unsigned int MAX_CHUNK_SIZE = 1 << 16;
PR_STATIC_ASSERT(MAX_CHUNK_SIZE < UINT_MAX / sizeof(char));
nsAutoArrayPtr<char> chunk(new char[MAX_CHUNK_SIZE]);
while (chunkSize > 0) {
PRUint32 size = NS_MIN(chunkSize, MAX_CHUNK_SIZE);
if (!ReadAll(chunk.get(), size)) {
return false;
}
chunkSize -= size;
}
}
}
bool
nsWaveReader::LoadFormatChunk()
{
PRUint32 fmtSize, rate, channels, frameSize, sampleFormat;
char waveFormat[WAVE_FORMAT_CHUNK_SIZE];
const char* p = waveFormat;
// RIFF chunks are always word (two byte) aligned.
NS_ABORT_IF_FALSE(mDecoder->GetResource()->Tell() % 2 == 0,
"LoadFormatChunk called with unaligned resource");
// The "format" chunk may not directly follow the "riff" chunk, so skip
// over any intermediate chunks.
if (!ScanForwardUntil(FRMT_CHUNK_MAGIC, &fmtSize)) {
return false;
}
if (!ReadAll(waveFormat, sizeof(waveFormat))) {
return false;
}
PR_STATIC_ASSERT(sizeof(PRUint16) +
sizeof(PRUint16) +
sizeof(PRUint32) +
4 +
sizeof(PRUint16) +
sizeof(PRUint16) <= sizeof(waveFormat));
if (ReadUint16LE(&p) != WAVE_FORMAT_ENCODING_PCM) {
NS_WARNING("WAVE is not uncompressed PCM, compressed encodings are not supported");
return false;
}
channels = ReadUint16LE(&p);
rate = ReadUint32LE(&p);
// Skip over average bytes per second field.
p += 4;
frameSize = ReadUint16LE(&p);
sampleFormat = ReadUint16LE(&p);
// PCM encoded WAVEs are not expected to have an extended "format" chunk,
// but I have found WAVEs that have a extended "format" chunk with an
// extension size of 0 bytes. Be polite and handle this rather than
// considering the file invalid. This code skips any extension of the
// "format" chunk.
if (fmtSize > WAVE_FORMAT_CHUNK_SIZE) {
char extLength[2];
const char* p = extLength;
if (!ReadAll(extLength, sizeof(extLength))) {
return false;
}
PR_STATIC_ASSERT(sizeof(PRUint16) <= sizeof(extLength));
PRUint16 extra = ReadUint16LE(&p);
if (fmtSize - (WAVE_FORMAT_CHUNK_SIZE + 2) != extra) {
NS_WARNING("Invalid extended format chunk size");
return false;
}
extra += extra % 2;
if (extra > 0) {
PR_STATIC_ASSERT(PR_UINT16_MAX + (PR_UINT16_MAX % 2) < UINT_MAX / sizeof(char));
nsAutoArrayPtr<char> chunkExtension(new char[extra]);
if (!ReadAll(chunkExtension.get(), extra)) {
return false;
}
}
}
// RIFF chunks are always word (two byte) aligned.
NS_ABORT_IF_FALSE(mDecoder->GetResource()->Tell() % 2 == 0,
"LoadFormatChunk left resource unaligned");
// Make sure metadata is fairly sane. The rate check is fairly arbitrary,
// but the channels check is intentionally limited to mono or stereo
// because that's what the audio backend currently supports.
if (rate < 100 || rate > 96000 ||
channels < 1 || channels > MAX_CHANNELS ||
(frameSize != 1 && frameSize != 2 && frameSize != 4) ||
(sampleFormat != 8 && sampleFormat != 16)) {
NS_WARNING("Invalid WAVE metadata");
return false;
}
ReentrantMonitorAutoEnter monitor(mDecoder->GetReentrantMonitor());
mSampleRate = rate;
mChannels = channels;
mFrameSize = frameSize;
if (sampleFormat == 8) {
mSampleFormat = nsAudioStream::FORMAT_U8;
} else {
mSampleFormat = nsAudioStream::FORMAT_S16_LE;
}
return true;
}
bool
nsWaveReader::FindDataOffset()
{
// RIFF chunks are always word (two byte) aligned.
NS_ABORT_IF_FALSE(mDecoder->GetResource()->Tell() % 2 == 0,
"FindDataOffset called with unaligned resource");
// The "data" chunk may not directly follow the "format" chunk, so skip
// over any intermediate chunks.
PRUint32 length;
if (!ScanForwardUntil(DATA_CHUNK_MAGIC, &length)) {
return false;
}
PRInt64 offset = mDecoder->GetResource()->Tell();
if (offset <= 0 || offset > PR_UINT32_MAX) {
NS_WARNING("PCM data offset out of range");
return false;
}
ReentrantMonitorAutoEnter monitor(mDecoder->GetReentrantMonitor());
mWaveLength = length;
mWavePCMOffset = PRUint32(offset);
return true;
}
double
nsWaveReader::BytesToTime(PRInt64 aBytes) const
{
NS_ABORT_IF_FALSE(aBytes >= 0, "Must be >= 0");
return float(aBytes) / mSampleRate / mFrameSize;
}
PRInt64
nsWaveReader::TimeToBytes(double aTime) const
{
NS_ABORT_IF_FALSE(aTime >= 0.0f, "Must be >= 0");
return RoundDownToFrame(PRInt64(aTime * mSampleRate * mFrameSize));
}
PRInt64
nsWaveReader::RoundDownToFrame(PRInt64 aBytes) const
{
NS_ABORT_IF_FALSE(aBytes >= 0, "Must be >= 0");
return aBytes - (aBytes % mFrameSize);
}
PRInt64
nsWaveReader::GetDataLength()
{
PRInt64 length = mWaveLength;
// If the decoder has a valid content length, and it's shorter than the
// expected length of the PCM data, calculate the playback duration from
// the content length rather than the expected PCM data length.
PRInt64 streamLength = mDecoder->GetResource()->GetLength();
if (streamLength >= 0) {
PRInt64 dataLength = NS_MAX<PRInt64>(0, streamLength - mWavePCMOffset);
length = NS_MIN(dataLength, length);
}
return length;
}
PRInt64
nsWaveReader::GetPosition()
{
return mDecoder->GetResource()->Tell();
}