gecko/content/media/ogg/nsOggCodecState.h
Ralph Giles daed6b4964 Bug 674225 - Add Opus support to nsOggReader. r=cpearce
Parse and decode Opus streams embedded in the Ogg
container. Based on the draft specification from
https://wiki.xiph.org/OggOpus
Support is conditional on the runtime preference
setting media.opus.enabled, which is false by
default until we're confident the spec is stable
and useful.

This patch doesn't support the gain header or
multichannel files.

The LEUint*() functions from the skeleton parser
are used to read the multi-byte header fields.
This requires moving them to earlier in the file.

Mappings for the .opus filename extension are also
added to facilitate testing with local files.
2012-05-01 17:29:34 -07:00

497 lines
17 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: ML 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla code.
*
* The Initial Developer of the Original Code is the Mozilla Corporation.
* Portions created by the Initial Developer are Copyright (C) 2010
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Chris Double <chris.double@double.co.nz>
* Chris Pearce <chris@pearce.org.nz>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#if !defined(nsOggCodecState_h_)
#define nsOggCodecState_h_
#include <ogg/ogg.h>
#include <theora/theoradec.h>
#ifdef MOZ_TREMOR
#include <tremor/ivorbiscodec.h>
#else
#include <vorbis/codec.h>
#endif
#ifdef MOZ_OPUS
#include <opus/opus.h>
#endif
#include <nsDeque.h>
#include <nsTArray.h>
#include <nsClassHashtable.h>
#include "VideoUtils.h"
#include "mozilla/StandardInteger.h"
// Uncomment the following to validate that we're predicting the number
// of Vorbis samples in each packet correctly.
#define VALIDATE_VORBIS_SAMPLE_CALCULATION
#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION
#include <map>
#endif
// Deallocates a packet, used in nsPacketQueue below.
class OggPacketDeallocator : public nsDequeFunctor {
virtual void* operator() (void* aPacket) {
ogg_packet* p = static_cast<ogg_packet*>(aPacket);
delete [] p->packet;
delete p;
return nsnull;
}
};
// A queue of ogg_packets. When we read a page, we extract the page's packets
// and buffer them in the owning stream's nsOggCodecState. This is because
// if we're skipping up to the next keyframe in very large frame sized videos,
// there may be several megabytes of data between keyframes, and the
// ogg_stream_state would end up resizing its buffer every time we added a
// new 4KB page to the bitstream, which kills performance on Windows. This
// also gives us the option to timestamp packets rather than decoded
// frames/samples, reducing the amount of frames/samples we must decode to
// determine start-time at a particular offset, and gives us finer control
// over memory usage.
class nsPacketQueue : private nsDeque {
public:
nsPacketQueue() : nsDeque(new OggPacketDeallocator()) {}
~nsPacketQueue() { Erase(); }
bool IsEmpty() { return nsDeque::GetSize() == 0; }
void Append(ogg_packet* aPacket);
ogg_packet* PopFront() { return static_cast<ogg_packet*>(nsDeque::PopFront()); }
ogg_packet* PeekFront() { return static_cast<ogg_packet*>(nsDeque::PeekFront()); }
void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); }
void PushBack(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); }
void Erase() { nsDeque::Erase(); }
};
// Encapsulates the data required for decoding an ogg bitstream and for
// converting granulepos to timestamps.
class nsOggCodecState {
public:
// Ogg types we know about
enum CodecType {
TYPE_VORBIS=0,
TYPE_THEORA=1,
TYPE_OPUS=2,
TYPE_SKELETON=3,
TYPE_UNKNOWN=4
};
virtual ~nsOggCodecState();
// Factory for creating nsCodecStates. Use instead of constructor.
// aPage should be a beginning-of-stream page.
static nsOggCodecState* Create(ogg_page* aPage);
virtual CodecType GetType() { return TYPE_UNKNOWN; }
// Reads a header packet. Returns true when last header has been read.
virtual bool DecodeHeader(ogg_packet* aPacket) {
return (mDoneReadingHeaders = true);
}
// Returns the end time that a granulepos represents.
virtual PRInt64 Time(PRInt64 granulepos) { return -1; }
// Returns the start time that a granulepos represents.
virtual PRInt64 StartTime(PRInt64 granulepos) { return -1; }
// Initializes the codec state.
virtual bool Init();
// Returns true when this bitstream has finished reading all its
// header packets.
bool DoneReadingHeaders() { return mDoneReadingHeaders; }
// Deactivates the bitstream. Only the primary video and audio bitstreams
// should be active.
void Deactivate() {
mActive = false;
mDoneReadingHeaders = true;
Reset();
}
// Resets decoding state.
virtual nsresult Reset();
// Returns true if the nsOggCodecState thinks this packet is a header
// packet. Note this does not verify the validity of the header packet,
// it just guarantees that the packet is marked as a header packet (i.e.
// it is definintely not a data packet). Do not use this to identify
// streams, use it to filter header packets from data packets while
// decoding.
virtual bool IsHeader(ogg_packet* aPacket) { return false; }
// Returns the next packet in the stream, or nsnull if there are no more
// packets buffered in the packet queue. More packets can be buffered by
// inserting one or more pages into the stream by calling PageIn(). The
// caller is responsible for deleting returned packet's using
// nsOggCodecState::ReleasePacket(). The packet will have a valid granulepos.
ogg_packet* PacketOut();
// Releases the memory used by a cloned packet. Every packet returned by
// PacketOut() must be free'd using this function.
static void ReleasePacket(ogg_packet* aPacket);
// Extracts all packets from the page, and inserts them into the packet
// queue. They can be extracted by calling PacketOut(). Packets from an
// inactive stream are not buffered, i.e. this call has no effect for
// inactive streams. Multiple pages may need to be inserted before
// PacketOut() starts to return packets, as granulepos may need to be
// captured.
virtual nsresult PageIn(ogg_page* aPage);
// Number of packets read.
PRUint64 mPacketCount;
// Serial number of the bitstream.
PRUint32 mSerial;
// Ogg specific state.
ogg_stream_state mState;
// Queue of as yet undecoded packets. Packets are guaranteed to have
// a valid granulepos.
nsPacketQueue mPackets;
// Is the bitstream active; whether we're decoding and playing this bitstream.
bool mActive;
// True when all headers packets have been read.
bool mDoneReadingHeaders;
protected:
// Constructs a new nsOggCodecState. aActive denotes whether the stream is
// active. For streams of unsupported or unknown types, aActive should be
// false.
nsOggCodecState(ogg_page* aBosPage, bool aActive);
// Deallocates all packets stored in mUnstamped, and clears the array.
void ClearUnstamped();
// Extracts packets out of mState until a data packet with a non -1
// granulepos is encountered, or no more packets are readable. Header
// packets are pushed into the packet queue immediately, and data packets
// are buffered in mUnstamped. Once a non -1 granulepos packet is read
// the granulepos of the packets in mUnstamped can be inferred, and they
// can be pushed over to mPackets. Used by PageIn() implementations in
// subclasses.
nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos);
// Temporary buffer in which to store packets while we're reading packets
// in order to capture granulepos.
nsTArray<ogg_packet*> mUnstamped;
};
class nsVorbisState : public nsOggCodecState {
public:
nsVorbisState(ogg_page* aBosPage);
virtual ~nsVorbisState();
CodecType GetType() { return TYPE_VORBIS; }
bool DecodeHeader(ogg_packet* aPacket);
PRInt64 Time(PRInt64 granulepos);
bool Init();
nsresult Reset();
bool IsHeader(ogg_packet* aPacket);
nsresult PageIn(ogg_page* aPage);
// Returns the end time that a granulepos represents.
static PRInt64 Time(vorbis_info* aInfo, PRInt64 aGranulePos);
vorbis_info mInfo;
vorbis_comment mComment;
vorbis_dsp_state mDsp;
vorbis_block mBlock;
private:
// Reconstructs the granulepos of Vorbis packets stored in the mUnstamped
// array.
nsresult ReconstructVorbisGranulepos();
// The "block size" of the previously decoded Vorbis packet, or 0 if we've
// not yet decoded anything. This is used to calculate the number of samples
// in a Vorbis packet, since each Vorbis packet depends on the previous
// packet while being decoded.
long mPrevVorbisBlockSize;
// Granulepos (end sample) of the last decoded Vorbis packet. This is used
// to calculate the Vorbis granulepos when we don't find a granulepos to
// back-propagate from.
PRInt64 mGranulepos;
#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION
// When validating that we've correctly predicted Vorbis packets' number
// of samples, we store each packet's predicted number of samples in this
// map, and verify we decode the predicted number of samples.
std::map<ogg_packet*, long> mVorbisPacketSamples;
#endif
// Records that aPacket is predicted to have aSamples samples.
// This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
// is not defined.
void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples);
// Verifies that aPacket has had its number of samples predicted.
// This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
// is not defined.
void AssertHasRecordedPacketSamples(ogg_packet* aPacket);
public:
// Asserts that the number of samples predicted for aPacket is aSamples.
// This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
// is not defined.
void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples);
};
// Returns 1 if the Theora info struct is decoding a media of Theora
// version (maj,min,sub) or later, otherwise returns 0.
int TheoraVersion(th_info* info,
unsigned char maj,
unsigned char min,
unsigned char sub);
class nsTheoraState : public nsOggCodecState {
public:
nsTheoraState(ogg_page* aBosPage);
virtual ~nsTheoraState();
CodecType GetType() { return TYPE_THEORA; }
bool DecodeHeader(ogg_packet* aPacket);
PRInt64 Time(PRInt64 granulepos);
PRInt64 StartTime(PRInt64 granulepos);
bool Init();
bool IsHeader(ogg_packet* aPacket);
nsresult PageIn(ogg_page* aPage);
// Returns the maximum number of microseconds which a keyframe can be offset
// from any given interframe.
PRInt64 MaxKeyframeOffset();
// Returns the end time that a granulepos represents.
static PRInt64 Time(th_info* aInfo, PRInt64 aGranulePos);
th_info mInfo;
th_comment mComment;
th_setup_info *mSetup;
th_dec_ctx* mCtx;
float mPixelAspectRatio;
private:
// Reconstructs the granulepos of Theora packets stored in the
// mUnstamped array. mUnstamped must be filled with consecutive packets from
// the stream, with the last packet having a known granulepos. Using this
// known granulepos, and the known frame numbers, we recover the granulepos
// of all frames in the array. This enables us to determine their timestamps.
void ReconstructTheoraGranulepos();
};
class nsOpusState : public nsOggCodecState {
#ifdef MOZ_OPUS
public:
nsOpusState(ogg_page* aBosPage);
virtual ~nsOpusState();
CodecType GetType() { return TYPE_OPUS; }
bool DecodeHeader(ogg_packet* aPacket);
PRInt64 Time(PRInt64 granulepos);
bool Init();
nsresult Reset();
bool IsHeader(ogg_packet* aPacket);
nsresult PageIn(ogg_page* aPage);
// Various fields from the Ogg Opus header.
int mRate; // Sample rate the decoder uses (always 48 kHz).
int mNominalRate; // Original sample rate of the data (informational).
int mChannels; // Number of channels the stream encodes.
int mPreSkip; // Number of samples to strip after decoder reset.
float mGain; // Gain (dB) to apply to decoder output.
int mChannelMapping; // Channel mapping family.
int mStreams; // Number of packed streams in each packet.
OpusDecoder *mDecoder;
private:
// Reconstructs the granulepos of Opus packets stored in the
// mUnstamped array. mUnstamped must be filled with consecutive packets from
// the stream, with the last packet having a known granulepos. Using this
// known granulepos, and the known frame numbers, we recover the granulepos
// of all frames in the array. This enables us to determine their timestamps.
void ReconstructGranulepos();
#endif /* MOZ_OPUS */
};
// Constructs a 32bit version number out of two 16 bit major,minor
// version numbers.
#define SKELETON_VERSION(major, minor) (((major)<<16)|(minor))
class nsSkeletonState : public nsOggCodecState {
public:
nsSkeletonState(ogg_page* aBosPage);
~nsSkeletonState();
CodecType GetType() { return TYPE_SKELETON; }
bool DecodeHeader(ogg_packet* aPacket);
PRInt64 Time(PRInt64 granulepos) { return -1; }
bool Init() { return true; }
bool IsHeader(ogg_packet* aPacket) { return true; }
// Return true if the given time (in milliseconds) is within
// the presentation time defined in the skeleton track.
bool IsPresentable(PRInt64 aTime) { return aTime >= mPresentationTime; }
// Stores the offset of the page on which a keyframe starts,
// and its presentation time.
class nsKeyPoint {
public:
nsKeyPoint()
: mOffset(INT64_MAX),
mTime(INT64_MAX) {}
nsKeyPoint(PRInt64 aOffset, PRInt64 aTime)
: mOffset(aOffset),
mTime(aTime) {}
// Offset from start of segment/link-in-the-chain in bytes.
PRInt64 mOffset;
// Presentation time in usecs.
PRInt64 mTime;
bool IsNull() {
return mOffset == INT64_MAX &&
mTime == INT64_MAX;
}
};
// Stores a keyframe's byte-offset, presentation time and the serialno
// of the stream it belongs to.
class nsSeekTarget {
public:
nsSeekTarget() : mSerial(0) {}
nsKeyPoint mKeyPoint;
PRUint32 mSerial;
bool IsNull() {
return mKeyPoint.IsNull() &&
mSerial == 0;
}
};
// Determines from the seek index the keyframe which you must seek back to
// in order to get all keyframes required to render all streams with
// serialnos in aTracks, at time aTarget.
nsresult IndexedSeekTarget(PRInt64 aTarget,
nsTArray<PRUint32>& aTracks,
nsSeekTarget& aResult);
bool HasIndex() const {
return mIndex.IsInitialized() && mIndex.Count() > 0;
}
// Returns the duration of the active tracks in the media, if we have
// an index. aTracks must be filled with the serialnos of the active tracks.
// The duration is calculated as the greatest end time of all active tracks,
// minus the smalled start time of all the active tracks.
nsresult GetDuration(const nsTArray<PRUint32>& aTracks, PRInt64& aDuration);
private:
// Decodes an index packet. Returns false on failure.
bool DecodeIndex(ogg_packet* aPacket);
// Gets the keypoint you must seek to in order to get the keyframe required
// to render the stream at time aTarget on stream with serial aSerialno.
nsresult IndexedSeekTargetForTrack(PRUint32 aSerialno,
PRInt64 aTarget,
nsKeyPoint& aResult);
// Version of the decoded skeleton track, as per the SKELETON_VERSION macro.
PRUint32 mVersion;
// Presentation time of the resource in milliseconds
PRInt64 mPresentationTime;
// Length of the resource in bytes.
PRInt64 mLength;
// Stores the keyframe index and duration information for a particular
// stream.
class nsKeyFrameIndex {
public:
nsKeyFrameIndex(PRInt64 aStartTime, PRInt64 aEndTime)
: mStartTime(aStartTime),
mEndTime(aEndTime)
{
MOZ_COUNT_CTOR(nsKeyFrameIndex);
}
~nsKeyFrameIndex() {
MOZ_COUNT_DTOR(nsKeyFrameIndex);
}
void Add(PRInt64 aOffset, PRInt64 aTimeMs) {
mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs));
}
const nsKeyPoint& Get(PRUint32 aIndex) const {
return mKeyPoints[aIndex];
}
PRUint32 Length() const {
return mKeyPoints.Length();
}
// Presentation time of the first sample in this stream in usecs.
const PRInt64 mStartTime;
// End time of the last sample in this stream in usecs.
const PRInt64 mEndTime;
private:
nsTArray<nsKeyPoint> mKeyPoints;
};
// Maps Ogg serialnos to the index-keypoint list.
nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex;
};
#endif