Bug 1102666 - Parse the edts block in MoofParser and offset composition time accordingly. r=ajones

--HG--
extra : rebase_source : b5be2a267415c0ee675d24110ab2feb1fc10f822
This commit is contained in:
Matt Woodrow 2014-11-26 18:00:31 +13:00
parent b1bc92c14e
commit 2d4dd39d33
9 changed files with 118 additions and 38 deletions

View File

@ -131,6 +131,7 @@ TrackConfig::Update(sp<MetaData>& aMetaData, const char* aMimeType)
// aMimeType points to a string from MediaDefs.cpp so we don't need to copy it // aMimeType points to a string from MediaDefs.cpp so we don't need to copy it
mime_type = aMimeType; mime_type = aMimeType;
duration = FindInt64(aMetaData, kKeyDuration); duration = FindInt64(aMetaData, kKeyDuration);
media_time = FindInt64(aMetaData, kKeyMediaTime);
mTrackId = FindInt32(aMetaData, kKeyTrackID); mTrackId = FindInt32(aMetaData, kKeyTrackID);
crypto.Update(aMetaData); crypto.Update(aMetaData);
} }
@ -215,11 +216,11 @@ MP4Sample::~MP4Sample()
} }
void void
MP4Sample::Update() MP4Sample::Update(int64_t& aMediaTime)
{ {
sp<MetaData> m = mMediaBuffer->meta_data(); sp<MetaData> m = mMediaBuffer->meta_data();
decode_timestamp = FindInt64(m, kKeyDecodingTime); decode_timestamp = FindInt64(m, kKeyDecodingTime);
composition_timestamp = FindInt64(m, kKeyTime); composition_timestamp = FindInt64(m, kKeyTime) - aMediaTime;
duration = FindInt64(m, kKeyDuration); duration = FindInt64(m, kKeyDuration);
byte_offset = FindInt64(m, kKey64BitFileOffset); byte_offset = FindInt64(m, kKey64BitFileOffset);
is_sync_point = FindInt32(m, kKeyIsSyncFrame); is_sync_point = FindInt32(m, kKeyIsSyncFrame);

View File

@ -22,7 +22,7 @@ MoofParser::RebuildFragmentedIndex(const nsTArray<MediaByteRange>& aByteRanges)
mInitRange = MediaByteRange(0, box.Range().mEnd); mInitRange = MediaByteRange(0, box.Range().mEnd);
ParseMoov(box); ParseMoov(box);
} else if (box.IsType("moof")) { } else if (box.IsType("moof")) {
Moof moof(box, mTrex, mMdhd); Moof moof(box, mTrex, mMdhd, mEdts);
if (!mMoofs.IsEmpty()) { if (!mMoofs.IsEmpty()) {
// Stitch time ranges together in the case of a (hopefully small) time // Stitch time ranges together in the case of a (hopefully small) time
@ -81,6 +81,8 @@ MoofParser::ParseTrak(Box& aBox)
if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) { if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) {
ParseMdia(box, tkhd); ParseMdia(box, tkhd);
} }
} else if (box.IsType("edts")) {
mEdts = Edts(box);
} }
} }
} }
@ -108,18 +110,18 @@ MoofParser::ParseMvex(Box& aBox)
} }
} }
Moof::Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd) : Moof::Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts) :
mRange(aBox.Range()), mMaxRoundingError(0) mRange(aBox.Range()), mMaxRoundingError(0)
{ {
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("traf")) { if (box.IsType("traf")) {
ParseTraf(box, aTrex, aMdhd); ParseTraf(box, aTrex, aMdhd, aEdts);
} }
} }
} }
void void
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd) Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts)
{ {
Tfhd tfhd(aTrex); Tfhd tfhd(aTrex);
Tfdt tfdt; Tfdt tfdt;
@ -132,7 +134,7 @@ Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd)
} }
} else if (box.IsType("trun")) { } else if (box.IsType("trun")) {
if (!aTrex.mTrackId || tfhd.mTrackId == aTrex.mTrackId) { if (!aTrex.mTrackId || tfhd.mTrackId == aTrex.mTrackId) {
ParseTrun(box, tfhd, tfdt, aMdhd); ParseTrun(box, tfhd, tfdt, aMdhd, aEdts);
} }
} }
} }
@ -161,7 +163,7 @@ public:
}; };
void void
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd) Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd, Edts& aEdts)
{ {
if (!aMdhd.mTimescale) { if (!aMdhd.mTimescale) {
return; return;
@ -209,8 +211,8 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd)
offset += sampleSize; offset += sampleSize;
sample.mCompositionRange = Interval<Microseconds>( sample.mCompositionRange = Interval<Microseconds>(
aMdhd.ToMicroseconds(decodeTime + ctsOffset), aMdhd.ToMicroseconds(decodeTime + ctsOffset - aEdts.mMediaStart),
aMdhd.ToMicroseconds(decodeTime + ctsOffset + sampleDuration)); aMdhd.ToMicroseconds(decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart));
decodeTime += sampleDuration; decodeTime += sampleDuration;
sample.mSync = !(sampleFlags & 0x1010000); sample.mSync = !(sampleFlags & 0x1010000);
@ -328,4 +330,35 @@ Tfdt::Tfdt(Box& aBox)
} }
reader->DiscardRemaining(); reader->DiscardRemaining();
} }
Edts::Edts(Box& aBox)
: mMediaStart(0)
{
Box child = aBox.FirstChild();
if (!child.IsType("elst")) {
return;
}
BoxReader reader(child);
uint32_t flags = reader->ReadU32();
uint8_t version = flags >> 24;
uint32_t entryCount = reader->ReadU32();
NS_ASSERTION(entryCount == 1, "Can't handle videos with multiple edits");
if (entryCount != 1) {
reader->DiscardRemaining();
return;
}
uint64_t segment_duration;
if (version == 1) {
segment_duration = reader->ReadU64();
mMediaStart = reader->Read64();
} else {
segment_duration = reader->ReadU32();
mMediaStart = reader->Read32();
}
NS_ASSERTION(segment_duration == 0, "Can't handle edits with fixed durations");
reader->DiscardRemaining();
}
} }

View File

@ -96,6 +96,16 @@ public:
return mozilla::BigEndian::readUint64(ptr); return mozilla::BigEndian::readUint64(ptr);
} }
int64_t Read64()
{
auto ptr = Read(8);
if (!ptr) {
MOZ_ASSERT(false);
return 0;
}
return mozilla::BigEndian::readInt64(ptr);
}
const uint8_t* Read(size_t aCount) const uint8_t* Read(size_t aCount)
{ {
if (aCount > mRemaining) { if (aCount > mRemaining) {

View File

@ -78,10 +78,11 @@ public:
class TrackConfig class TrackConfig
{ {
public: public:
TrackConfig() : mime_type(nullptr), mTrackId(0), duration(0) {} TrackConfig() : mime_type(nullptr), mTrackId(0), duration(0), media_time(0) {}
const char* mime_type; const char* mime_type;
uint32_t mTrackId; uint32_t mTrackId;
int64_t duration; int64_t duration;
int64_t media_time;
CryptoTrack crypto; CryptoTrack crypto;
void Update(stagefright::sp<stagefright::MetaData>& aMetaData, void Update(stagefright::sp<stagefright::MetaData>& aMetaData,
@ -144,7 +145,7 @@ class MP4Sample
public: public:
MP4Sample(); MP4Sample();
~MP4Sample(); ~MP4Sample();
void Update(); void Update(int64_t& aMediaTime);
void Pad(size_t aPaddingBytes); void Pad(size_t aPaddingBytes);
stagefright::MediaBuffer* mMediaBuffer; stagefright::MediaBuffer* mMediaBuffer;

View File

@ -96,6 +96,15 @@ public:
uint64_t mBaseMediaDecodeTime; uint64_t mBaseMediaDecodeTime;
}; };
class Edts
{
public:
Edts() : mMediaStart(0) {}
explicit Edts(Box& aBox);
int64_t mMediaStart;
};
struct Sample struct Sample
{ {
mozilla::MediaByteRange mByteRange; mozilla::MediaByteRange mByteRange;
@ -106,7 +115,7 @@ struct Sample
class Moof class Moof
{ {
public: public:
Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd); Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts);
void FixRounding(const Moof& aMoof); void FixRounding(const Moof& aMoof);
mozilla::MediaByteRange mRange; mozilla::MediaByteRange mRange;
@ -115,8 +124,8 @@ public:
nsTArray<Sample> mIndex; nsTArray<Sample> mIndex;
private: private:
void ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd); void ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts);
void ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd); void ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd, Edts& aEdts);
uint64_t mMaxRoundingError; uint64_t mMaxRoundingError;
}; };
@ -146,6 +155,7 @@ public:
Mdhd mMdhd; Mdhd mMdhd;
Trex mTrex; Trex mTrex;
Tfdt mTfdt; Tfdt mTfdt;
Edts mEdts;
nsTArray<Moof> mMoofs; nsTArray<Moof> mMoofs;
}; };
} }

View File

@ -173,7 +173,7 @@ MP4Demuxer::DemuxAudioSample()
return nullptr; return nullptr;
} }
sample->Update(); sample->Update(mAudioConfig.media_time);
return sample.forget(); return sample.forget();
} }
@ -190,7 +190,7 @@ MP4Demuxer::DemuxVideoSample()
return nullptr; return nullptr;
} }
sample->Update(); sample->Update(mVideoConfig.media_time);
sample->prefix_data = mVideoConfig.annex_b; sample->prefix_data = mVideoConfig.annex_b;
return sample.forget(); return sample.forget();

View File

@ -77,6 +77,7 @@ enum {
kKeyIsDRM = 'idrm', // int32_t (bool) kKeyIsDRM = 'idrm', // int32_t (bool)
kKeyEncoderDelay = 'encd', // int32_t (frames) kKeyEncoderDelay = 'encd', // int32_t (frames)
kKeyEncoderPadding = 'encp', // int32_t (frames) kKeyEncoderPadding = 'encp', // int32_t (frames)
kKeyMediaTime = 'mtme', // int64_t (usecs)
kKeyAlbum = 'albu', // cstring kKeyAlbum = 'albu', // cstring
kKeyArtist = 'arti', // cstring kKeyArtist = 'arti', // cstring

View File

@ -838,6 +838,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
track->includes_expensive_metadata = false; track->includes_expensive_metadata = false;
track->skipTrack = false; track->skipTrack = false;
track->timescale = 0; track->timescale = 0;
track->segment_duration = 0;
track->media_time = 0;
track->meta->setCString(kKeyMIMEType, "application/octet-stream"); track->meta->setCString(kKeyMIMEType, "application/octet-stream");
} }
@ -910,12 +912,10 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
ALOGW("ignoring edit list because timescale is 0"); ALOGW("ignoring edit list because timescale is 0");
} else { } else {
off64_t entriesoffset = data_offset + 8; off64_t entriesoffset = data_offset + 8;
uint64_t segment_duration;
int64_t media_time;
if (version == 1) { if (version == 1) {
if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || if (!mDataSource->getUInt64(entriesoffset, &mLastTrack->segment_duration) ||
!mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&mLastTrack->media_time)) {
return ERROR_IO; return ERROR_IO;
} }
} else if (version == 0) { } else if (version == 0) {
@ -925,28 +925,14 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
!mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
return ERROR_IO; return ERROR_IO;
} }
segment_duration = sd; mLastTrack->segment_duration = sd;
media_time = mt; mLastTrack->media_time = mt;
} else { } else {
return ERROR_IO; return ERROR_IO;
} }
uint64_t halfscale = mHeaderTimescale / 2; storeEditList();
segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
int64_t duration;
int32_t samplerate;
if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
int64_t delay = (media_time * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
int64_t paddingus = duration - (segment_duration + media_time);
int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
}
} }
*offset += chunk_size; *offset += chunk_size;
break; break;
@ -1115,6 +1101,10 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
mLastTrack->timescale = ntohl(timescale); mLastTrack->timescale = ntohl(timescale);
// Now that we've parsed the media timescale, we can interpret
// the edit list data.
storeEditList();
int64_t duration = 0; int64_t duration = 0;
if (version == 1) { if (version == 1) {
if (mDataSource->readAt( if (mDataSource->readAt(
@ -1812,6 +1802,33 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
return OK; return OK;
} }
void MPEG4Extractor::storeEditList()
{
if (mHeaderTimescale == 0 ||
mLastTrack->timescale == 0) {
return;
}
uint64_t segment_duration = (mLastTrack->segment_duration * 1000000)/ mHeaderTimescale;
// media_time is measured in media time scale units.
int64_t media_time = (mLastTrack->media_time * 1000000) / mLastTrack->timescale;
mLastTrack->meta->setInt64(kKeyMediaTime, media_time);
int64_t duration;
int32_t samplerate;
if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
int64_t delay = (media_time * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
int64_t paddingus = duration - (segment_duration + media_time);
int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
}
}
status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
ALOGV("MPEG4Extractor::parseSegmentIndex"); ALOGV("MPEG4Extractor::parseSegmentIndex");

View File

@ -86,6 +86,11 @@ private:
Track *next; Track *next;
sp<MetaData> meta; sp<MetaData> meta;
uint32_t timescale; uint32_t timescale;
// Temporary storage for elst until we've
// parsed mdhd and can interpret them.
uint64_t segment_duration;
int64_t media_time;
sp<SampleTable> sampleTable; sp<SampleTable> sampleTable;
bool includes_expensive_metadata; bool includes_expensive_metadata;
bool skipTrack; bool skipTrack;
@ -141,6 +146,8 @@ private:
status_t parseSegmentIndex(off64_t data_offset, size_t data_size); status_t parseSegmentIndex(off64_t data_offset, size_t data_size);
void storeEditList();
Track *findTrackByMimePrefix(const char *mimePrefix); Track *findTrackByMimePrefix(const char *mimePrefix);
MPEG4Extractor(const MPEG4Extractor &); MPEG4Extractor(const MPEG4Extractor &);