/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsError.h" #include "MediaDecoderStateMachine.h" #include "AbstractMediaDecoder.h" #include "MediaResource.h" #include "WebMReader.h" #include "WebMBufferedParser.h" #include "VideoUtils.h" #include "mozilla/dom/TimeRanges.h" #include "VorbisUtils.h" #define VPX_DONT_DEFINE_STDINT_TYPES #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" using mozilla::NesteggPacketHolder; template <> class nsAutoRefTraits : public nsPointerRefTraits { public: static void Release(NesteggPacketHolder* aHolder) { delete aHolder; } }; namespace mozilla { using namespace layers; // Un-comment to enable logging of seek bisections. //#define SEEK_LOGGING #ifdef PR_LOGGING extern PRLogModuleInfo* gMediaDecoderLog; PRLogModuleInfo* gNesteggLog; #define LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg) #ifdef SEEK_LOGGING #define SEEK_LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg) #else #define SEEK_LOG(type, msg) #endif #else #define LOG(type, msg) #define SEEK_LOG(type, msg) #endif static const unsigned NS_PER_USEC = 1000; static const double NS_PER_S = 1e9; static const double USEC_PER_S = 1e6; // If a seek request is within SEEK_DECODE_MARGIN microseconds of the // current time, decode ahead from the current frame rather than performing // a full seek. static const int SEEK_DECODE_MARGIN = 250000; // Functions for reading and seeking using MediaResource required for // nestegg_io. The 'user data' passed to these functions is the // decoder from which the media resource is obtained. static int webm_read(void *aBuffer, size_t aLength, void *aUserData) { NS_ASSERTION(aUserData, "aUserData must point to a valid AbstractMediaDecoder"); AbstractMediaDecoder* decoder = reinterpret_cast(aUserData); MediaResource* resource = decoder->GetResource(); NS_ASSERTION(resource, "Decoder has no media resource"); nsresult rv = NS_OK; bool eof = false; char *p = static_cast(aBuffer); while (NS_SUCCEEDED(rv) && aLength > 0) { uint32_t bytes = 0; rv = resource->Read(p, aLength, &bytes); if (bytes == 0) { eof = true; break; } decoder->NotifyBytesConsumed(bytes); aLength -= bytes; p += bytes; } return NS_FAILED(rv) ? -1 : eof ? 0 : 1; } static int webm_seek(int64_t aOffset, int aWhence, void *aUserData) { NS_ASSERTION(aUserData, "aUserData must point to a valid AbstractMediaDecoder"); AbstractMediaDecoder* decoder = reinterpret_cast(aUserData); MediaResource* resource = decoder->GetResource(); NS_ASSERTION(resource, "Decoder has no media resource"); nsresult rv = resource->Seek(aWhence, aOffset); return NS_SUCCEEDED(rv) ? 0 : -1; } static int64_t webm_tell(void *aUserData) { NS_ASSERTION(aUserData, "aUserData must point to a valid AbstractMediaDecoder"); AbstractMediaDecoder* decoder = reinterpret_cast(aUserData); MediaResource* resource = decoder->GetResource(); NS_ASSERTION(resource, "Decoder has no media resource"); return resource->Tell(); } static void webm_log(nestegg * context, unsigned int severity, char const * format, ...) { #ifdef PR_LOGGING va_list args; char msg[256]; const char * sevStr; switch(severity) { case NESTEGG_LOG_DEBUG: sevStr = "DBG"; break; case NESTEGG_LOG_INFO: sevStr = "INF"; break; case NESTEGG_LOG_WARNING: sevStr = "WRN"; break; case NESTEGG_LOG_ERROR: sevStr = "ERR"; break; case NESTEGG_LOG_CRITICAL: sevStr = "CRT"; break; default: sevStr = "UNK"; break; } va_start(args, format); PR_snprintf(msg, sizeof(msg), "%p [Nestegg-%s] ", context, sevStr); PR_vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), format, args); PR_LOG(gNesteggLog, PR_LOG_DEBUG, (msg)); va_end(args); #endif } WebMReader::WebMReader(AbstractMediaDecoder* aDecoder) #ifdef MOZ_DASH : DASHRepReader(aDecoder), #else : MediaDecoderReader(aDecoder), #endif mContext(nullptr), mPacketCount(0), mChannels(0), mVideoTrack(0), mAudioTrack(0), mAudioStartUsec(-1), mAudioFrames(0), mHasVideo(false), mHasAudio(false) #ifdef MOZ_DASH , mMainReader(nullptr), mSwitchingCluster(-1), mNextReader(nullptr), mSeekToCluster(-1), mCurrentOffset(-1), mPushVideoPacketToNextReader(false), mReachedSwitchAccessPoint(false) #endif { MOZ_COUNT_CTOR(WebMReader); #ifdef PR_LOGGING if (!gNesteggLog) { gNesteggLog = PR_NewLogModule("Nestegg"); } #endif // Zero these member vars to avoid crashes in VP8 destroy and Vorbis clear // functions when destructor is called before |Init|. memset(&mVP8, 0, sizeof(vpx_codec_ctx_t)); memset(&mVorbisBlock, 0, sizeof(vorbis_block)); memset(&mVorbisDsp, 0, sizeof(vorbis_dsp_state)); memset(&mVorbisInfo, 0, sizeof(vorbis_info)); memset(&mVorbisComment, 0, sizeof(vorbis_comment)); } WebMReader::~WebMReader() { Cleanup(); mVideoPackets.Reset(); mAudioPackets.Reset(); vpx_codec_destroy(&mVP8); vorbis_block_clear(&mVorbisBlock); vorbis_dsp_clear(&mVorbisDsp); vorbis_info_clear(&mVorbisInfo); vorbis_comment_clear(&mVorbisComment); MOZ_COUNT_DTOR(WebMReader); } nsresult WebMReader::Init(MediaDecoderReader* aCloneDonor) { if (vpx_codec_dec_init(&mVP8, vpx_codec_vp8_dx(), NULL, 0)) { return NS_ERROR_FAILURE; } vorbis_info_init(&mVorbisInfo); vorbis_comment_init(&mVorbisComment); memset(&mVorbisDsp, 0, sizeof(vorbis_dsp_state)); memset(&mVorbisBlock, 0, sizeof(vorbis_block)); if (aCloneDonor) { mBufferedState = static_cast(aCloneDonor)->mBufferedState; } else { mBufferedState = new WebMBufferedState; } return NS_OK; } nsresult WebMReader::ResetDecode() { mAudioFrames = 0; mAudioStartUsec = -1; nsresult res = NS_OK; if (NS_FAILED(MediaDecoderReader::ResetDecode())) { res = NS_ERROR_FAILURE; } // Ignore failed results from vorbis_synthesis_restart. They // aren't fatal and it fails when ResetDecode is called at a // time when no vorbis data has been read. vorbis_synthesis_restart(&mVorbisDsp); mVideoPackets.Reset(); mAudioPackets.Reset(); #ifdef MOZ_DASH LOG(PR_LOG_DEBUG, ("Resetting DASH seek vars")); mSwitchingCluster = -1; mNextReader = nullptr; mSeekToCluster = -1; mCurrentOffset = -1; mPushVideoPacketToNextReader = false; mReachedSwitchAccessPoint = false; #endif return res; } void WebMReader::Cleanup() { if (mContext) { nestegg_destroy(mContext); mContext = nullptr; } } nsresult WebMReader::ReadMetadata(VideoInfo* aInfo, MetadataTags** aTags) { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); #ifdef MOZ_DASH LOG(PR_LOG_DEBUG, ("Reader [%p] for Decoder [%p]: Reading WebM Metadata: " "init bytes [%d - %d] cues bytes [%d - %d]", this, mDecoder, mInitByteRange.mStart, mInitByteRange.mEnd, mCuesByteRange.mStart, mCuesByteRange.mEnd)); #endif nestegg_io io; io.read = webm_read; io.seek = webm_seek; io.tell = webm_tell; io.userdata = mDecoder; #ifdef MOZ_DASH int64_t maxOffset = mInitByteRange.IsNull() ? -1 : mInitByteRange.mEnd; #else int64_t maxOffset = -1; #endif int r = nestegg_init(&mContext, io, &webm_log, maxOffset); if (r == -1) { return NS_ERROR_FAILURE; } uint64_t duration = 0; r = nestegg_duration(mContext, &duration); if (r == 0) { ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor()); mDecoder->SetMediaDuration(duration / NS_PER_USEC); } unsigned int ntracks = 0; r = nestegg_track_count(mContext, &ntracks); if (r == -1) { Cleanup(); return NS_ERROR_FAILURE; } mInfo.mHasAudio = false; mInfo.mHasVideo = false; for (uint32_t track = 0; track < ntracks; ++track) { int id = nestegg_track_codec_id(mContext, track); if (id == -1) { Cleanup(); return NS_ERROR_FAILURE; } int type = nestegg_track_type(mContext, track); if (!mHasVideo && type == NESTEGG_TRACK_VIDEO) { nestegg_video_params params; r = nestegg_track_video_params(mContext, track, ¶ms); if (r == -1) { Cleanup(); return NS_ERROR_FAILURE; } // Picture region, taking into account cropping, before scaling // to the display size. nsIntRect pictureRect(params.crop_left, params.crop_top, params.width - (params.crop_right + params.crop_left), params.height - (params.crop_bottom + params.crop_top)); // If the cropping data appears invalid then use the frame data if (pictureRect.width <= 0 || pictureRect.height <= 0 || pictureRect.x < 0 || pictureRect.y < 0) { pictureRect.x = 0; pictureRect.y = 0; pictureRect.width = params.width; pictureRect.height = params.height; } // Validate the container-reported frame and pictureRect sizes. This ensures // that our video frame creation code doesn't overflow. nsIntSize displaySize(params.display_width, params.display_height); nsIntSize frameSize(params.width, params.height); if (!VideoInfo::ValidateVideoRegion(frameSize, pictureRect, displaySize)) { // Video track's frame sizes will overflow. Ignore the video track. continue; } mVideoTrack = track; mHasVideo = true; mInfo.mHasVideo = true; mInfo.mDisplay = displaySize; mPicture = pictureRect; mInitialFrame = frameSize; switch (params.stereo_mode) { case NESTEGG_VIDEO_MONO: mInfo.mStereoMode = STEREO_MODE_MONO; break; case NESTEGG_VIDEO_STEREO_LEFT_RIGHT: mInfo.mStereoMode = STEREO_MODE_LEFT_RIGHT; break; case NESTEGG_VIDEO_STEREO_BOTTOM_TOP: mInfo.mStereoMode = STEREO_MODE_BOTTOM_TOP; break; case NESTEGG_VIDEO_STEREO_TOP_BOTTOM: mInfo.mStereoMode = STEREO_MODE_TOP_BOTTOM; break; case NESTEGG_VIDEO_STEREO_RIGHT_LEFT: mInfo.mStereoMode = STEREO_MODE_RIGHT_LEFT; break; } } else if (!mHasAudio && type == NESTEGG_TRACK_AUDIO) { nestegg_audio_params params; r = nestegg_track_audio_params(mContext, track, ¶ms); if (r == -1) { Cleanup(); return NS_ERROR_FAILURE; } mAudioTrack = track; mHasAudio = true; mInfo.mHasAudio = true; // Get the Vorbis header data unsigned int nheaders = 0; r = nestegg_track_codec_data_count(mContext, track, &nheaders); if (r == -1 || nheaders != 3) { Cleanup(); return NS_ERROR_FAILURE; } for (uint32_t header = 0; header < nheaders; ++header) { unsigned char* data = 0; size_t length = 0; r = nestegg_track_codec_data(mContext, track, header, &data, &length); if (r == -1) { Cleanup(); return NS_ERROR_FAILURE; } ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0); r = vorbis_synthesis_headerin(&mVorbisInfo, &mVorbisComment, &opacket); if (r != 0) { Cleanup(); return NS_ERROR_FAILURE; } } r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo); if (r != 0) { Cleanup(); return NS_ERROR_FAILURE; } r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock); if (r != 0) { Cleanup(); return NS_ERROR_FAILURE; } mInfo.mAudioRate = mVorbisDsp.vi->rate; mInfo.mAudioChannels = mVorbisDsp.vi->channels; mChannels = mInfo.mAudioChannels; } } #ifdef MOZ_DASH // Byte range for cues has been specified; load them. if (!mCuesByteRange.IsNull()) { maxOffset = mCuesByteRange.mEnd; // Iterate through cluster ranges until nestegg returns the last one NS_ENSURE_TRUE(mClusterByteRanges.IsEmpty(), NS_ERROR_ALREADY_INITIALIZED); int clusterNum = 0; bool done = false; uint64_t timestamp; do { mClusterByteRanges.AppendElement(); r = nestegg_get_cue_point(mContext, clusterNum, maxOffset, &(mClusterByteRanges[clusterNum].mStart), &(mClusterByteRanges[clusterNum].mEnd), ×tamp); if (r != 0) { Cleanup(); return NS_ERROR_FAILURE; } LOG(PR_LOG_DEBUG, ("Reader [%p] for Decoder [%p]: Cluster [%d]: " "start [%lld] end [%lld], timestamp [%.2llfs]", this, mDecoder, clusterNum, mClusterByteRanges[clusterNum].mStart, mClusterByteRanges[clusterNum].mEnd, timestamp/NS_PER_S)); mClusterByteRanges[clusterNum].mStartTime = timestamp/NS_PER_USEC; // Last cluster will have '-1' as end value if (mClusterByteRanges[clusterNum].mEnd == -1) { mClusterByteRanges[clusterNum].mEnd = (mCuesByteRange.mStart-1); done = true; } else { clusterNum++; } } while (!done); } #endif // We can't seek in buffered regions if we have no cues. mDecoder->SetMediaSeekable(nestegg_has_cues(mContext) == 1); *aInfo = mInfo; *aTags = nullptr; #ifdef MOZ_DASH mDecoder->OnReadMetadataCompleted(); #endif return NS_OK; } ogg_packet WebMReader::InitOggPacket(unsigned char* aData, size_t aLength, bool aBOS, bool aEOS, int64_t aGranulepos) { ogg_packet packet; packet.packet = aData; packet.bytes = aLength; packet.b_o_s = aBOS; packet.e_o_s = aEOS; packet.granulepos = aGranulepos; packet.packetno = mPacketCount++; return packet; } bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset) { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); int r = 0; unsigned int count = 0; r = nestegg_packet_count(aPacket, &count); if (r == -1) { return false; } uint64_t tstamp = 0; r = nestegg_packet_tstamp(aPacket, &tstamp); if (r == -1) { return false; } const uint32_t rate = mVorbisDsp.vi->rate; uint64_t tstamp_usecs = tstamp / NS_PER_USEC; if (mAudioStartUsec == -1) { // This is the first audio chunk. Assume the start time of our decode // is the start of this chunk. mAudioStartUsec = tstamp_usecs; } // If there's a gap between the start of this audio chunk and the end of // the previous audio chunk, we need to increment the packet count so that // the vorbis decode doesn't use data from before the gap to help decode // from after the gap. CheckedInt64 tstamp_frames = UsecsToFrames(tstamp_usecs, rate); CheckedInt64 decoded_frames = UsecsToFrames(mAudioStartUsec, rate); if (!tstamp_frames.isValid() || !decoded_frames.isValid()) { NS_WARNING("Int overflow converting WebM times to frames"); return false; } decoded_frames += mAudioFrames; if (!decoded_frames.isValid()) { NS_WARNING("Int overflow adding decoded_frames"); return false; } if (tstamp_frames.value() > decoded_frames.value()) { #ifdef DEBUG CheckedInt64 usecs = FramesToUsecs(tstamp_frames.value() - decoded_frames.value(), rate); LOG(PR_LOG_DEBUG, ("WebMReader detected gap of %lld, %lld frames, in audio stream\n", usecs.isValid() ? usecs.value() : -1, tstamp_frames.value() - decoded_frames.value())); #endif mPacketCount++; mAudioStartUsec = tstamp_usecs; mAudioFrames = 0; } int32_t total_frames = 0; for (uint32_t i = 0; i < count; ++i) { unsigned char* data; size_t length; r = nestegg_packet_data(aPacket, i, &data, &length); if (r == -1) { return false; } ogg_packet opacket = InitOggPacket(data, length, false, false, -1); if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) { return false; } if (vorbis_synthesis_blockin(&mVorbisDsp, &mVorbisBlock) != 0) { return false; } VorbisPCMValue** pcm = 0; int32_t frames = 0; while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) { nsAutoArrayPtr buffer(new AudioDataValue[frames * mChannels]); for (uint32_t j = 0; j < mChannels; ++j) { VorbisPCMValue* channel = pcm[j]; for (uint32_t i = 0; i < uint32_t(frames); ++i) { buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); } } CheckedInt64 duration = FramesToUsecs(frames, rate); if (!duration.isValid()) { NS_WARNING("Int overflow converting WebM audio duration"); return false; } CheckedInt64 total_duration = FramesToUsecs(total_frames, rate); if (!total_duration.isValid()) { NS_WARNING("Int overflow converting WebM audio total_duration"); return false; } CheckedInt64 time = total_duration + tstamp_usecs; if (!time.isValid()) { NS_WARNING("Int overflow adding total_duration and tstamp_usecs"); nestegg_free_packet(aPacket); return false; }; total_frames += frames; AudioQueue().Push(new AudioData(aOffset, time.value(), duration.value(), frames, buffer.forget(), mChannels)); mAudioFrames += frames; if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) { return false; } } } return true; } nsReturnRef WebMReader::NextPacket(TrackType aTrackType) #ifdef MOZ_DASH { nsAutoRef holder; // Get packet from next reader if we're at a switching point; most likely we // did not download the next packet for this reader's stream, so we have to // get it from the next one. Note: Switch to next reader only for video; // audio switching is not supported in the DASH-WebM On Demand profile. if (aTrackType == VIDEO && (uint32_t)mSwitchingCluster < mClusterByteRanges.Length() && mCurrentOffset == mClusterByteRanges[mSwitchingCluster].mStart) { if (mVideoPackets.GetSize() > 0) { holder = NextPacketInternal(VIDEO); LOG(PR_LOG_DEBUG, ("WebMReader[%p] got packet from mVideoPackets @[%lld]", this, holder->mOffset)); } else { mReachedSwitchAccessPoint = true; NS_ASSERTION(mNextReader, "Stream switch has been requested but mNextReader is null"); holder = mNextReader->NextPacket(aTrackType); mPushVideoPacketToNextReader = true; // Reset for possible future switches. mSwitchingCluster = -1; LOG(PR_LOG_DEBUG, ("WebMReader[%p] got packet from mNextReader[%p] @[%lld]", this, mNextReader.get(), (holder ? holder->mOffset : 0))); } } else { holder = NextPacketInternal(aTrackType); if (holder) { mCurrentOffset = holder->mOffset; } } return holder.out(); } nsReturnRef WebMReader::NextPacketInternal(TrackType aTrackType) #endif { // The packet queue that packets will be pushed on if they // are not the type we are interested in. WebMPacketQueue& otherPackets = aTrackType == VIDEO ? mAudioPackets : mVideoPackets; // The packet queue for the type that we are interested in. WebMPacketQueue &packets = aTrackType == VIDEO ? mVideoPackets : mAudioPackets; // Flag to indicate that we do need to playback these types of // packets. bool hasType = aTrackType == VIDEO ? mHasVideo : mHasAudio; // Flag to indicate that we do need to playback the other type // of track. bool hasOtherType = aTrackType == VIDEO ? mHasAudio : mHasVideo; // Track we are interested in uint32_t ourTrack = aTrackType == VIDEO ? mVideoTrack : mAudioTrack; // Value of other track uint32_t otherTrack = aTrackType == VIDEO ? mAudioTrack : mVideoTrack; nsAutoRef holder; if (packets.GetSize() > 0) { holder.own(packets.PopFront()); } else { // Keep reading packets until we find a packet // for the track we want. do { nestegg_packet* packet; int r = nestegg_read_packet(mContext, &packet); if (r <= 0) { return nsReturnRef(); } int64_t offset = mDecoder->GetResource()->Tell(); holder.own(new NesteggPacketHolder(packet, offset)); unsigned int track = 0; r = nestegg_packet_track(packet, &track); if (r == -1) { return nsReturnRef(); } if (hasOtherType && otherTrack == track) { // Save the packet for when we want these packets otherPackets.Push(holder.disown()); continue; } // The packet is for the track we want to play if (hasType && ourTrack == track) { break; } } while (true); } return holder.out(); } bool WebMReader::DecodeAudioData() { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); nsAutoRef holder(NextPacket(AUDIO)); if (!holder) { AudioQueue().Finish(); return false; } return DecodeAudioPacket(holder->mPacket, holder->mOffset); } bool WebMReader::DecodeVideoFrame(bool &aKeyframeSkip, int64_t aTimeThreshold) { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); // Record number of frames decoded and parsed. Automatically update the // stats counters using the AutoNotifyDecoded stack-based class. uint32_t parsed = 0, decoded = 0; AbstractMediaDecoder::AutoNotifyDecoded autoNotify(mDecoder, parsed, decoded); nsAutoRef holder(NextPacket(VIDEO)); if (!holder) { VideoQueue().Finish(); return false; } nestegg_packet* packet = holder->mPacket; unsigned int track = 0; int r = nestegg_packet_track(packet, &track); if (r == -1) { return false; } unsigned int count = 0; r = nestegg_packet_count(packet, &count); if (r == -1) { return false; } uint64_t tstamp = 0; r = nestegg_packet_tstamp(packet, &tstamp); if (r == -1) { return false; } // The end time of this frame is the start time of the next frame. Fetch // the timestamp of the next packet for this track. If we've reached the // end of the resource, use the file's duration as the end time of this // video frame. uint64_t next_tstamp = 0; { nsAutoRef next_holder(NextPacket(VIDEO)); if (next_holder) { r = nestegg_packet_tstamp(next_holder->mPacket, &next_tstamp); if (r == -1) { return false; } PushVideoPacket(next_holder.disown()); } else { ReentrantMonitorAutoEnter decoderMon(mDecoder->GetReentrantMonitor()); int64_t endTime = mDecoder->GetEndMediaTime(); if (endTime == -1) { return false; } next_tstamp = endTime * NS_PER_USEC; } } int64_t tstamp_usecs = tstamp / NS_PER_USEC; for (uint32_t i = 0; i < count; ++i) { unsigned char* data; size_t length; r = nestegg_packet_data(packet, i, &data, &length); if (r == -1) { return false; } vpx_codec_stream_info_t si; memset(&si, 0, sizeof(si)); si.sz = sizeof(si); vpx_codec_peek_stream_info(vpx_codec_vp8_dx(), data, length, &si); if (aKeyframeSkip && (!si.is_kf || tstamp_usecs < aTimeThreshold)) { // Skipping to next keyframe... parsed++; // Assume 1 frame per chunk. continue; } if (aKeyframeSkip && si.is_kf) { aKeyframeSkip = false; } if (vpx_codec_decode(&mVP8, data, length, NULL, 0)) { return false; } // If the timestamp of the video frame is less than // the time threshold required then it is not added // to the video queue and won't be displayed. if (tstamp_usecs < aTimeThreshold) { parsed++; // Assume 1 frame per chunk. continue; } vpx_codec_iter_t iter = NULL; vpx_image_t *img; while ((img = vpx_codec_get_frame(&mVP8, &iter))) { NS_ASSERTION(img->fmt == IMG_FMT_I420, "WebM image format is not I420"); // Chroma shifts are rounded down as per the decoding examples in the VP8 SDK VideoData::YCbCrBuffer b; b.mPlanes[0].mData = img->planes[0]; b.mPlanes[0].mStride = img->stride[0]; b.mPlanes[0].mHeight = img->d_h; b.mPlanes[0].mWidth = img->d_w; b.mPlanes[0].mOffset = b.mPlanes[0].mSkip = 0; b.mPlanes[1].mData = img->planes[1]; b.mPlanes[1].mStride = img->stride[1]; b.mPlanes[1].mHeight = (img->d_h + 1) >> img->y_chroma_shift; b.mPlanes[1].mWidth = (img->d_w + 1) >> img->x_chroma_shift; b.mPlanes[1].mOffset = b.mPlanes[1].mSkip = 0; b.mPlanes[2].mData = img->planes[2]; b.mPlanes[2].mStride = img->stride[2]; b.mPlanes[2].mHeight = (img->d_h + 1) >> img->y_chroma_shift; b.mPlanes[2].mWidth = (img->d_w + 1) >> img->x_chroma_shift; b.mPlanes[2].mOffset = b.mPlanes[2].mSkip = 0; nsIntRect picture = mPicture; if (img->d_w != static_cast(mInitialFrame.width) || img->d_h != static_cast(mInitialFrame.height)) { // Frame size is different from what the container reports. This is legal // in WebM, and we will preserve the ratio of the crop rectangle as it // was reported relative to the picture size reported by the container. picture.x = (mPicture.x * img->d_w) / mInitialFrame.width; picture.y = (mPicture.y * img->d_h) / mInitialFrame.height; picture.width = (img->d_w * mPicture.width) / mInitialFrame.width; picture.height = (img->d_h * mPicture.height) / mInitialFrame.height; } VideoData *v = VideoData::Create(mInfo, mDecoder->GetImageContainer(), holder->mOffset, tstamp_usecs, next_tstamp / NS_PER_USEC, b, si.is_kf, -1, picture); if (!v) { return false; } parsed++; decoded++; NS_ASSERTION(decoded <= parsed, "Expect only 1 frame per chunk per packet in WebM..."); VideoQueue().Push(v); } } return true; } void WebMReader::PushVideoPacket(NesteggPacketHolder* aItem) { #ifdef MOZ_DASH if (mPushVideoPacketToNextReader) { NS_ASSERTION(mNextReader, "Stream switch has been requested but mNextReader is null"); mNextReader->mVideoPackets.PushFront(aItem); mPushVideoPacketToNextReader = false; } else { #endif mVideoPackets.PushFront(aItem); #ifdef MOZ_DASH } #endif } nsresult WebMReader::Seek(int64_t aTarget, int64_t aStartTime, int64_t aEndTime, int64_t aCurrentTime) { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); LOG(PR_LOG_DEBUG, ("Reader [%p] for Decoder [%p]: About to seek to %fs", this, mDecoder, aTarget/1000000.0)); if (NS_FAILED(ResetDecode())) { return NS_ERROR_FAILURE; } uint32_t trackToSeek = mHasVideo ? mVideoTrack : mAudioTrack; int r = nestegg_track_seek(mContext, trackToSeek, aTarget * NS_PER_USEC); if (r != 0) { return NS_ERROR_FAILURE; } return DecodeToTarget(aTarget); } #ifdef MOZ_DASH bool WebMReader::IsDataCachedAtEndOfSubsegments() { MediaResource* resource = mDecoder->GetResource(); NS_ENSURE_TRUE(resource, false); if (resource->IsDataCachedToEndOfResource(0)) { return true; } if (mClusterByteRanges.IsEmpty()) { return false; } nsTArray ranges; nsresult rv = resource->GetCachedRanges(ranges); NS_ENSURE_SUCCESS(rv, false); if (ranges.IsEmpty()) { return false; } // Return true if data at the end of the final subsegment is cached. uint32_t finalSubsegmentIndex = mClusterByteRanges.Length()-1; uint64_t finalSubEndOffset = mClusterByteRanges[finalSubsegmentIndex].mEnd; uint32_t finalRangeIndex = ranges.Length()-1; uint64_t finalRangeStartOffset = ranges[finalRangeIndex].mStart; uint64_t finalRangeEndOffset = ranges[finalRangeIndex].mEnd; return (finalRangeStartOffset < finalSubEndOffset && finalSubEndOffset <= finalRangeEndOffset); } #endif nsresult WebMReader::GetBuffered(dom::TimeRanges* aBuffered, int64_t aStartTime) { MediaResource* resource = mDecoder->GetResource(); uint64_t timecodeScale; if (!mContext || nestegg_tstamp_scale(mContext, &timecodeScale) == -1) { return NS_OK; } // Special case completely cached files. This also handles local files. bool isFullyCached = resource->IsDataCachedToEndOfResource(0); if (isFullyCached) { uint64_t duration = 0; if (nestegg_duration(mContext, &duration) == 0) { aBuffered->Add(0, duration / NS_PER_S); } } uint32_t bufferedLength = 0; aBuffered->GetLength(&bufferedLength); // Either we the file is not fully cached, or we couldn't find a duration in // the WebM bitstream. if (!isFullyCached || !bufferedLength) { MediaResource* resource = mDecoder->GetResource(); nsTArray ranges; nsresult res = resource->GetCachedRanges(ranges); NS_ENSURE_SUCCESS(res, res); for (uint32_t index = 0; index < ranges.Length(); index++) { uint64_t start, end; bool rv = mBufferedState->CalculateBufferedForRange(ranges[index].mStart, ranges[index].mEnd, &start, &end); if (rv) { double startTime = start * timecodeScale / NS_PER_S - aStartTime; double endTime = end * timecodeScale / NS_PER_S - aStartTime; #ifdef MOZ_DASH // If this range extends to the end of a cluster, the true end time is // the cluster's end timestamp. Since WebM frames do not have an end // timestamp, a fully cached cluster must be reported with the correct // end time of its final frame. Otherwise, buffered ranges could be // reported with missing frames at cluster boundaries, specifically // boundaries where stream switching has occurred. if (!mClusterByteRanges.IsEmpty()) { for (uint32_t clusterIndex = 0; clusterIndex < (mClusterByteRanges.Length()-1); clusterIndex++) { if (ranges[index].mEnd >= mClusterByteRanges[clusterIndex].mEnd) { double clusterEndTime = mClusterByteRanges[clusterIndex+1].mStartTime / USEC_PER_S; if (endTime < clusterEndTime) { LOG(PR_LOG_DEBUG, ("End of cluster: endTime becoming %0.3fs", clusterEndTime)); endTime = clusterEndTime; } } } } #endif // If this range extends to the end of the file, the true end time // is the file's duration. if (resource->IsDataCachedToEndOfResource(ranges[index].mStart)) { uint64_t duration = 0; if (nestegg_duration(mContext, &duration) == 0) { endTime = duration / NS_PER_S; } } aBuffered->Add(startTime, endTime); } } } return NS_OK; } void WebMReader::NotifyDataArrived(const char* aBuffer, uint32_t aLength, int64_t aOffset) { mBufferedState->NotifyDataArrived(aBuffer, aLength, aOffset); } #ifdef MOZ_DASH int64_t WebMReader::GetSubsegmentForSeekTime(int64_t aSeekToTime) { NS_ENSURE_TRUE(0 <= aSeekToTime, -1); // Check the first n-1 subsegments. End time is the start time of the next // subsegment. for (uint32_t i = 1; i < (mClusterByteRanges.Length()); i++) { if (aSeekToTime < mClusterByteRanges[i].mStartTime) { return i-1; } } // Check the last subsegment. End time is the end time of the file. NS_ASSERTION(mDecoder, "Decoder should not be null!"); if (aSeekToTime <= mDecoder->GetMediaDuration()) { return mClusterByteRanges.Length()-1; } return (-1); } nsresult WebMReader::GetSubsegmentByteRanges(nsTArray& aByteRanges) { NS_ENSURE_TRUE(mContext, NS_ERROR_NULL_POINTER); NS_ENSURE_TRUE(aByteRanges.IsEmpty(), NS_ERROR_ALREADY_INITIALIZED); NS_ENSURE_FALSE(mClusterByteRanges.IsEmpty(), NS_ERROR_NOT_INITIALIZED); NS_ENSURE_FALSE(mCuesByteRange.IsNull(), NS_ERROR_NOT_INITIALIZED); for (uint32_t i = 0; i < mClusterByteRanges.Length(); i++) { aByteRanges.AppendElement(); aByteRanges[i] = mClusterByteRanges[i]; } return NS_OK; } void WebMReader::RequestSwitchAtSubsegment(int32_t aSubsegmentIdx, MediaDecoderReader* aNextReader) { NS_ASSERTION(NS_IsMainThread() || mDecoder->OnDecodeThread(), "Should be on main thread or decode thread."); mDecoder->GetReentrantMonitor().AssertCurrentThreadIn(); // Only allow one switch at a time; ignore if one is already requested. if (mSwitchingCluster != -1) { return; } NS_ENSURE_TRUE_VOID((uint32_t)aSubsegmentIdx < mClusterByteRanges.Length()); mSwitchingCluster = aSubsegmentIdx; NS_ENSURE_TRUE_VOID(aNextReader); NS_ENSURE_TRUE_VOID(aNextReader != this); mNextReader = static_cast(aNextReader); } void WebMReader::RequestSeekToSubsegment(uint32_t aIdx) { NS_ASSERTION(NS_IsMainThread() || mDecoder->OnDecodeThread(), "Should be on main thread or decode thread."); NS_ASSERTION(mDecoder, "decoder should not be null!"); mDecoder->GetReentrantMonitor().AssertCurrentThreadIn(); // Don't seek if we're about to switch to another reader. if (mSwitchingCluster != -1) { return; } // Only allow seeking if a request was not already made. if (mSeekToCluster != -1) { return; } NS_ENSURE_TRUE_VOID(aIdx < mClusterByteRanges.Length()); mSeekToCluster = aIdx; // XXX Hack to get the resource to seek to the correct offset if the decode // thread is in shutdown, e.g. if the video is not autoplay. if (mDecoder->IsShutdown()) { ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor()); mDecoder->GetResource()->Seek(PR_SEEK_SET, mClusterByteRanges[mSeekToCluster].mStart); } } void WebMReader::PrepareToDecode() { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); if (mSeekToCluster != -1) { ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor()); SeekToCluster(mSeekToCluster); } } void WebMReader::SeekToCluster(uint32_t aIdx) { NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); NS_ASSERTION(0 <= mSeekToCluster, "mSeekToCluster should be set."); NS_ENSURE_TRUE_VOID(aIdx < mClusterByteRanges.Length()); LOG(PR_LOG_DEBUG, ("Reader [%p] for Decoder [%p]: seeking to " "subsegment [%lld] at offset [%lld]", this, mDecoder, aIdx, mClusterByteRanges[aIdx].mStart)); int r = nestegg_offset_seek(mContext, mClusterByteRanges[aIdx].mStart); NS_ENSURE_TRUE_VOID(r == 0); mSeekToCluster = -1; } bool WebMReader::HasReachedSubsegment(uint32_t aSubsegmentIndex) { NS_ASSERTION(mDecoder, "Decoder is null."); NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); NS_ENSURE_TRUE(aSubsegmentIndex < mClusterByteRanges.Length(), false); NS_ASSERTION(mDecoder->GetResource(), "Decoder has no media resource."); if (mReachedSwitchAccessPoint) { LOG(PR_LOG_DEBUG, ("Reader [%p] for Decoder [%p]: reached switching offset [%lld] = " "mClusterByteRanges[%d].mStart[%lld]", this, mDecoder, mCurrentOffset, aSubsegmentIndex, mClusterByteRanges[aSubsegmentIndex].mStart)); mReachedSwitchAccessPoint = false; return true; } return false; } #endif /* MOZ_DASH */ } // namespace mozilla