gecko/dom/media/encoder/VP8TrackEncoder.cpp

557 lines
19 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "VP8TrackEncoder.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#include "VideoSegment.h"
#include "VideoUtils.h"
#include "prsystem.h"
#include "WebMWriter.h"
#include "libyuv.h"
#include "GeckoProfiler.h"
namespace mozilla {
#ifdef PR_LOGGING
PRLogModuleInfo* gVP8TrackEncoderLog;
#define VP8LOG(msg, ...) PR_LOG(gVP8TrackEncoderLog, PR_LOG_DEBUG, \
(msg, ##__VA_ARGS__))
// Debug logging macro with object pointer and class name.
#else
#define VP8LOG(msg, ...)
#endif
#define DEFAULT_BITRATE 2500 // in kbit/s
#define DEFAULT_ENCODE_FRAMERATE 30
using namespace mozilla::layers;
VP8TrackEncoder::VP8TrackEncoder()
: VideoTrackEncoder()
, mEncodedFrameDuration(0)
, mEncodedTimestamp(0)
, mRemainingTicks(0)
, mVPXContext(new vpx_codec_ctx_t())
, mVPXImageWrapper(new vpx_image_t())
{
MOZ_COUNT_CTOR(VP8TrackEncoder);
#ifdef PR_LOGGING
if (!gVP8TrackEncoderLog) {
gVP8TrackEncoderLog = PR_NewLogModule("VP8TrackEncoder");
}
#endif
}
VP8TrackEncoder::~VP8TrackEncoder()
{
if (mInitialized) {
vpx_codec_destroy(mVPXContext);
}
if (mVPXImageWrapper) {
vpx_img_free(mVPXImageWrapper);
}
MOZ_COUNT_DTOR(VP8TrackEncoder);
}
nsresult
VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth,
int32_t aDisplayHeight,TrackRate aTrackRate)
{
if (aWidth < 1 || aHeight < 1 || aDisplayWidth < 1 || aDisplayHeight < 1
|| aTrackRate <= 0) {
return NS_ERROR_FAILURE;
}
ReentrantMonitorAutoEnter mon(mReentrantMonitor);
mTrackRate = aTrackRate;
mEncodedFrameRate = DEFAULT_ENCODE_FRAMERATE;
mEncodedFrameDuration = mTrackRate / mEncodedFrameRate;
mFrameWidth = aWidth;
mFrameHeight = aHeight;
mDisplayWidth = aDisplayWidth;
mDisplayHeight = aDisplayHeight;
// Encoder configuration structure.
vpx_codec_enc_cfg_t config;
memset(&config, 0, sizeof(vpx_codec_enc_cfg_t));
if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &config, 0)) {
return NS_ERROR_FAILURE;
}
// Creating a wrapper to the image - setting image data to NULL. Actual
// pointer will be set in encode. Setting align to 1, as it is meaningless
// (actual memory is not allocated).
vpx_img_wrap(mVPXImageWrapper, VPX_IMG_FMT_I420,
mFrameWidth, mFrameHeight, 1, nullptr);
config.g_w = mFrameWidth;
config.g_h = mFrameHeight;
// TODO: Maybe we should have various aFrameRate bitrate pair for each devices?
// or for different platform
config.rc_target_bitrate = DEFAULT_BITRATE; // in kbit/s
// Setting the time base of the codec
config.g_timebase.num = 1;
config.g_timebase.den = mTrackRate;
config.g_error_resilient = 0;
config.g_lag_in_frames = 0; // 0- no frame lagging
int32_t number_of_cores = PR_GetNumberOfProcessors();
if (mFrameWidth * mFrameHeight > 1280 * 960 && number_of_cores >= 6) {
config.g_threads = 3; // 3 threads for 1080p.
} else if (mFrameWidth * mFrameHeight > 640 * 480 && number_of_cores >= 3) {
config.g_threads = 2; // 2 threads for qHD/HD.
} else {
config.g_threads = 1; // 1 thread for VGA or less
}
// rate control settings
config.rc_dropframe_thresh = 0;
config.rc_end_usage = VPX_CBR;
config.g_pass = VPX_RC_ONE_PASS;
config.rc_resize_allowed = 1;
config.rc_undershoot_pct = 100;
config.rc_overshoot_pct = 15;
config.rc_buf_initial_sz = 500;
config.rc_buf_optimal_sz = 600;
config.rc_buf_sz = 1000;
config.kf_mode = VPX_KF_AUTO;
// Ensure that we can output one I-frame per second.
config.kf_max_dist = mEncodedFrameRate;
vpx_codec_flags_t flags = 0;
flags |= VPX_CODEC_USE_OUTPUT_PARTITION;
if (vpx_codec_enc_init(mVPXContext, vpx_codec_vp8_cx(), &config, flags)) {
return NS_ERROR_FAILURE;
}
vpx_codec_control(mVPXContext, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(mVPXContext, VP8E_SET_CPUUSED, -6);
vpx_codec_control(mVPXContext, VP8E_SET_TOKEN_PARTITIONS,
VP8_ONE_TOKENPARTITION);
mInitialized = true;
mon.NotifyAll();
return NS_OK;
}
already_AddRefed<TrackMetadataBase>
VP8TrackEncoder::GetMetadata()
{
PROFILER_LABEL("VP8TrackEncoder", "GetMetadata",
js::ProfileEntry::Category::OTHER);
{
// Wait if mEncoder is not initialized.
ReentrantMonitorAutoEnter mon(mReentrantMonitor);
while (!mCanceled && !mInitialized) {
mon.Wait();
}
}
if (mCanceled || mEncodingComplete) {
return nullptr;
}
nsRefPtr<VP8Metadata> meta = new VP8Metadata();
meta->mWidth = mFrameWidth;
meta->mHeight = mFrameHeight;
meta->mDisplayWidth = mDisplayWidth;
meta->mDisplayHeight = mDisplayHeight;
meta->mEncodedFrameRate = mEncodedFrameRate;
return meta.forget();
}
nsresult
VP8TrackEncoder::GetEncodedPartitions(EncodedFrameContainer& aData)
{
vpx_codec_iter_t iter = nullptr;
EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME;
nsTArray<uint8_t> frameData;
const vpx_codec_cx_pkt_t *pkt = nullptr;
while ((pkt = vpx_codec_get_cx_data(mVPXContext, &iter)) != nullptr) {
switch (pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
// Copy the encoded data from libvpx to frameData
frameData.AppendElements((uint8_t*)pkt->data.frame.buf,
pkt->data.frame.sz);
break;
}
default: {
break;
}
}
// End of frame
if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
frameType = EncodedFrame::VP8_I_FRAME;
}
break;
}
}
if (!frameData.IsEmpty() &&
(pkt->data.frame.pts == mEncodedTimestamp)) {
// Copy the encoded data to aData.
EncodedFrame* videoData = new EncodedFrame();
videoData->SetFrameType(frameType);
// Convert the timestamp and duration to Usecs.
CheckedInt64 timestamp = FramesToUsecs(mEncodedTimestamp, mTrackRate);
if (timestamp.isValid()) {
videoData->SetTimeStamp(
(uint64_t)FramesToUsecs(mEncodedTimestamp, mTrackRate).value());
}
CheckedInt64 duration = FramesToUsecs(pkt->data.frame.duration, mTrackRate);
if (duration.isValid()) {
videoData->SetDuration(
(uint64_t)FramesToUsecs(pkt->data.frame.duration, mTrackRate).value());
}
videoData->SwapInFrameData(frameData);
VP8LOG("GetEncodedPartitions TimeStamp %lld Duration %lld\n",
videoData->GetTimeStamp(), videoData->GetDuration());
VP8LOG("frameType %d\n", videoData->GetFrameType());
aData.AppendEncodedFrame(videoData);
}
return NS_OK;
}
static bool isYUV420(const PlanarYCbCrImage::Data *aData)
{
if (aData->mYSize == aData->mCbCrSize * 2) {
return true;
}
return false;
}
static bool isYUV422(const PlanarYCbCrImage::Data *aData)
{
if ((aData->mYSize.width == aData->mCbCrSize.width * 2) &&
(aData->mYSize.height == aData->mCbCrSize.height)) {
return true;
}
return false;
}
static bool isYUV444(const PlanarYCbCrImage::Data *aData)
{
if (aData->mYSize == aData->mCbCrSize) {
return true;
}
return false;
}
nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk &aChunk)
{
nsRefPtr<Image> img;
if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) {
if (!mMuteFrame) {
mMuteFrame = VideoFrame::CreateBlackImage(gfxIntSize(mFrameWidth, mFrameHeight));
MOZ_ASSERT(mMuteFrame);
}
img = mMuteFrame;
} else {
img = aChunk.mFrame.GetImage();
}
ImageFormat format = img->GetFormat();
if (format != ImageFormat::PLANAR_YCBCR) {
VP8LOG("Unsupported video format\n");
return NS_ERROR_FAILURE;
}
// Cast away constness b/c some of the accessors are non-const
PlanarYCbCrImage* yuv =
const_cast<PlanarYCbCrImage *>(static_cast<const PlanarYCbCrImage *>(img.get()));
// Big-time assumption here that this is all contiguous data coming
// from getUserMedia or other sources.
MOZ_ASSERT(yuv);
if (!yuv->IsValid()) {
NS_WARNING("PlanarYCbCrImage is not valid");
return NS_ERROR_FAILURE;
}
const PlanarYCbCrImage::Data *data = yuv->GetData();
if (isYUV420(data) && !data->mCbSkip) { // 420 planar
mVPXImageWrapper->planes[VPX_PLANE_Y] = data->mYChannel;
mVPXImageWrapper->planes[VPX_PLANE_U] = data->mCbChannel;
mVPXImageWrapper->planes[VPX_PLANE_V] = data->mCrChannel;
mVPXImageWrapper->stride[VPX_PLANE_Y] = data->mYStride;
mVPXImageWrapper->stride[VPX_PLANE_U] = data->mCbCrStride;
mVPXImageWrapper->stride[VPX_PLANE_V] = data->mCbCrStride;
} else {
uint32_t yPlaneSize = mFrameWidth * mFrameHeight;
uint32_t halfWidth = (mFrameWidth + 1) / 2;
uint32_t halfHeight = (mFrameHeight + 1) / 2;
uint32_t uvPlaneSize = halfWidth * halfHeight;
if (mI420Frame.IsEmpty()) {
mI420Frame.SetLength(yPlaneSize + uvPlaneSize * 2);
}
MOZ_ASSERT(mI420Frame.Length() >= (yPlaneSize + uvPlaneSize * 2));
uint8_t *y = mI420Frame.Elements();
uint8_t *cb = mI420Frame.Elements() + yPlaneSize;
uint8_t *cr = mI420Frame.Elements() + yPlaneSize + uvPlaneSize;
if (isYUV420(data) && data->mCbSkip) {
// If mCbSkip is set, we assume it's nv12 or nv21.
if (data->mCbChannel < data->mCrChannel) { // nv12
libyuv::NV12ToI420(data->mYChannel, data->mYStride,
data->mCbChannel, data->mCbCrStride,
y, mFrameWidth,
cb, halfWidth,
cr, halfWidth,
mFrameWidth, mFrameHeight);
} else { // nv21
libyuv::NV21ToI420(data->mYChannel, data->mYStride,
data->mCrChannel, data->mCbCrStride,
y, mFrameWidth,
cb, halfWidth,
cr, halfWidth,
mFrameWidth, mFrameHeight);
}
} else if (isYUV444(data) && !data->mCbSkip) {
libyuv::I444ToI420(data->mYChannel, data->mYStride,
data->mCbChannel, data->mCbCrStride,
data->mCrChannel, data->mCbCrStride,
y, mFrameWidth,
cb, halfWidth,
cr, halfWidth,
mFrameWidth, mFrameHeight);
} else if (isYUV422(data) && !data->mCbSkip) {
libyuv::I422ToI420(data->mYChannel, data->mYStride,
data->mCbChannel, data->mCbCrStride,
data->mCrChannel, data->mCbCrStride,
y, mFrameWidth,
cb, halfWidth,
cr, halfWidth,
mFrameWidth, mFrameHeight);
} else {
VP8LOG("Unsupported planar format\n");
return NS_ERROR_NOT_IMPLEMENTED;
}
mVPXImageWrapper->planes[VPX_PLANE_Y] = y;
mVPXImageWrapper->planes[VPX_PLANE_U] = cb;
mVPXImageWrapper->planes[VPX_PLANE_V] = cr;
mVPXImageWrapper->stride[VPX_PLANE_Y] = mFrameWidth;
mVPXImageWrapper->stride[VPX_PLANE_U] = halfWidth;
mVPXImageWrapper->stride[VPX_PLANE_V] = halfWidth;
}
return NS_OK;
}
// These two define value used in GetNextEncodeOperation to determine the
// EncodeOperation for next target frame.
#define I_FRAME_RATIO (0.5)
#define SKIP_FRAME_RATIO (0.75)
/**
* Compares the elapsed time from the beginning of GetEncodedTrack and
* the processed frame duration in mSourceSegment
* in order to set the nextEncodeOperation for next target frame.
*/
VP8TrackEncoder::EncodeOperation
VP8TrackEncoder::GetNextEncodeOperation(TimeDuration aTimeElapsed,
StreamTime aProcessedDuration)
{
int64_t durationInUsec =
FramesToUsecs(aProcessedDuration + mEncodedFrameDuration,
mTrackRate).value();
if (aTimeElapsed.ToMicroseconds() > (durationInUsec * SKIP_FRAME_RATIO)) {
// The encoder is too slow.
// We should skip next frame to consume the mSourceSegment.
return SKIP_FRAME;
} else if (aTimeElapsed.ToMicroseconds() > (durationInUsec * I_FRAME_RATIO)) {
// The encoder is a little slow.
// We force the encoder to encode an I-frame to accelerate.
return ENCODE_I_FRAME;
} else {
return ENCODE_NORMAL_FRAME;
}
}
StreamTime
VP8TrackEncoder::CalculateRemainingTicks(StreamTime aDurationCopied,
StreamTime aEncodedDuration)
{
return mRemainingTicks + aEncodedDuration - aDurationCopied;
}
// Try to extend the encodedDuration as long as possible if the target frame
// has a long duration.
StreamTime
VP8TrackEncoder::CalculateEncodedDuration(StreamTime aDurationCopied)
{
StreamTime temp64 = aDurationCopied;
StreamTime encodedDuration = mEncodedFrameDuration;
temp64 -= mRemainingTicks;
while (temp64 > mEncodedFrameDuration) {
temp64 -= mEncodedFrameDuration;
encodedDuration += mEncodedFrameDuration;
}
return encodedDuration;
}
/**
* Encoding flow in GetEncodedTrack():
* 1: Check the mInitialized state and the packet duration.
* 2: Move the data from mRawSegment to mSourceSegment.
* 3: Encode the video chunks in mSourceSegment in a for-loop.
* 3.1: Pick the video chunk by mRemainingTicks.
* 3.2: Calculate the encoding duration for the parameter of vpx_codec_encode().
* The encoding duration is a multiple of mEncodedFrameDuration.
* 3.3: Setup the video chunk to mVPXImageWrapper by PrepareRawFrame().
* 3.4: Send frame into vp8 encoder by vpx_codec_encode().
* 3.5: Get the output frame from encoder by calling GetEncodedPartitions().
* 3.6: Calculate the mRemainingTicks for next target frame.
* 3.7: Set the nextEncodeOperation for the next target frame.
* There is a heuristic: If the frame duration we have processed in
* mSourceSegment is 100ms, means that we can't spend more than 100ms to
* encode it.
* 4. Remove the encoded chunks in mSourceSegment after for-loop.
*
* Ex1: Input frame rate is 100 => input frame duration is 10ms for each.
* mEncodedFrameRate is 30 => output frame duration is 33ms.
* In this case, the frame duration in mSourceSegment will be:
* 1st : 0~10ms
* 2nd : 10~20ms
* 3rd : 20~30ms
* 4th : 30~40ms
* ...
* The VP8 encoder will take the 1st and 4th frames to encode. At beginning
* mRemainingTicks is 0 for 1st frame, then the mRemainingTicks is set
* to 23 to pick the 4th frame. (mEncodedFrameDuration - 1st frame duration)
*
* Ex2: Input frame rate is 25 => frame duration is 40ms for each.
* mEncodedFrameRate is 30 => output frame duration is 33ms.
* In this case, the frame duration in mSourceSegment will be:
* 1st : 0~40ms
* 2nd : 40~80ms
* 3rd : 80~120ms
* 4th : 120~160ms
* ...
* Because the input frame duration is 40ms larger than 33ms, so the first
* encoded frame duration will be 66ms by calling CalculateEncodedDuration.
* And the mRemainingTicks will be set to 26
* (CalculateRemainingTicks 0+66-40) in order to pick the next frame(2nd)
* in mSourceSegment.
*/
nsresult
VP8TrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
{
PROFILER_LABEL("VP8TrackEncoder", "GetEncodedTrack",
js::ProfileEntry::Category::OTHER);
{
// Move all the samples from mRawSegment to mSourceSegment. We only hold
// the monitor in this block.
ReentrantMonitorAutoEnter mon(mReentrantMonitor);
// Wait if mEncoder is not initialized, or when not enough raw data, but is
// not the end of stream nor is being canceled.
while (!mCanceled && (!mInitialized ||
(mRawSegment.GetDuration() + mSourceSegment.GetDuration() <
mEncodedFrameDuration && !mEndOfStream))) {
mon.Wait();
}
if (mCanceled || mEncodingComplete) {
return NS_ERROR_FAILURE;
}
mSourceSegment.AppendFrom(&mRawSegment);
}
VideoSegment::ChunkIterator iter(mSourceSegment);
StreamTime durationCopied = 0;
StreamTime totalProcessedDuration = 0;
TimeStamp timebase = TimeStamp::Now();
EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME;
for (; !iter.IsEnded(); iter.Next()) {
VideoChunk &chunk = *iter;
// Accumulate chunk's duration to durationCopied until it reaches
// mRemainingTicks.
durationCopied += chunk.GetDuration();
MOZ_ASSERT(mRemainingTicks <= mEncodedFrameDuration);
VP8LOG("durationCopied %lld mRemainingTicks %lld\n",
durationCopied, mRemainingTicks);
if (durationCopied >= mRemainingTicks) {
VP8LOG("nextEncodeOperation is %d\n",nextEncodeOperation);
// Calculate encodedDuration for this target frame.
StreamTime encodedDuration = CalculateEncodedDuration(durationCopied);
// Encode frame.
if (nextEncodeOperation != SKIP_FRAME) {
nsresult rv = PrepareRawFrame(chunk);
NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE);
// Encode the data with VP8 encoder
int flags = (nextEncodeOperation == ENCODE_NORMAL_FRAME) ?
0 : VPX_EFLAG_FORCE_KF;
if (vpx_codec_encode(mVPXContext, mVPXImageWrapper, mEncodedTimestamp,
(unsigned long)encodedDuration, flags,
VPX_DL_REALTIME)) {
return NS_ERROR_FAILURE;
}
// Get the encoded data from VP8 encoder.
GetEncodedPartitions(aData);
} else {
// SKIP_FRAME
// Extend the duration of the last encoded data in aData
// because this frame will be skip.
nsRefPtr<EncodedFrame> last = nullptr;
last = aData.GetEncodedFrames().LastElement();
if (last) {
last->SetDuration(last->GetDuration() + encodedDuration);
}
}
// Move forward the mEncodedTimestamp.
mEncodedTimestamp += encodedDuration;
totalProcessedDuration += durationCopied;
// Calculate mRemainingTicks for next target frame.
mRemainingTicks = CalculateRemainingTicks(durationCopied,
encodedDuration);
// Check the remain data is enough for next target frame.
if (mSourceSegment.GetDuration() - totalProcessedDuration
>= mEncodedFrameDuration) {
TimeDuration elapsedTime = TimeStamp::Now() - timebase;
nextEncodeOperation = GetNextEncodeOperation(elapsedTime,
totalProcessedDuration);
// Reset durationCopied for next iteration.
durationCopied = 0;
} else {
// Process done, there is no enough data left for next iteration,
// break the for-loop.
break;
}
}
}
// Remove the chunks we have processed.
mSourceSegment.RemoveLeading(totalProcessedDuration);
VP8LOG("RemoveLeading %lld\n",totalProcessedDuration);
// End of stream, pull the rest frames in encoder.
if (mEndOfStream) {
VP8LOG("mEndOfStream is true\n");
mEncodingComplete = true;
if (vpx_codec_encode(mVPXContext, nullptr, mEncodedTimestamp,
mEncodedFrameDuration, 0, VPX_DL_REALTIME)) {
return NS_ERROR_FAILURE;
}
GetEncodedPartitions(aData);
}
return NS_OK ;
}
} // namespace mozilla