Bug 1019291 - Construct Annex B samples in the decoder. r=cpeace

Move Annex B sample formatting from the demuxer to a static utility function. Return NAL unit syntax samples by default, since more platforms prefer to unpack the AVCC data themselves. Pass the raw AVCC payload to ffmpeg through its extradata field. It can handle either sample format, expecting one or the other depending on whether extradata is present. Pass input samples through a new WMFOutputSource::Input method. Save a reference to the VideoDecoderConfig in the WFMVideoOutputSource and use it to convert samples to Annex B before forwarding them to the MFTDecoder.
2024-09-13 09:24:08 -07:00 · 2014-06-19 15:59:00 -07:00 · 2014-06-19 15:59:00 -07:00 · 79b23c4048
commit 79b23c4048
parent 1c401dad5e
18 changed files with 107 additions and 40 deletions
--- a/content/media/fmp4/ffmpeg/FFmpegDataDecoder.cpp
+++ b/content/media/fmp4/ffmpeg/FFmpegDataDecoder.cpp
@ -87,6 +87,9 @@ FFmpegDataDecoder::Init()
  // FFmpeg will call back to this to negotiate a video pixel format.
  mCodecContext.get_format = ChoosePixelFormat;

+  mCodecContext.extradata = mExtraData.begin();
+  mCodecContext.extradata_size = mExtraData.length();
+
  AVDictionary* opts = nullptr;
  if (avcodec_open2(&mCodecContext, codec, &opts) < 0) {
    NS_WARNING("Couldn't initialise ffmpeg decoder");
--- a/content/media/fmp4/ffmpeg/FFmpegDataDecoder.h
+++ b/content/media/fmp4/ffmpeg/FFmpegDataDecoder.h
@ -10,6 +10,7 @@
 #include "FFmpegDecoderModule.h"
 #include "FFmpegRuntimeLinker.h"
 #include "FFmpegCompat.h"
+#include "mozilla/Vector.h"

 namespace mozilla
 {
@ -31,6 +32,7 @@ public:
 protected:
  MediaTaskQueue* mTaskQueue;
  AVCodecContext mCodecContext;
+  Vector<uint8_t> mExtraData;

 private:
  static bool sFFmpegInitDone;
--- a/content/media/fmp4/ffmpeg/FFmpegH264Decoder.cpp
+++ b/content/media/fmp4/ffmpeg/FFmpegH264Decoder.cpp
@ -33,6 +33,7 @@ FFmpegH264Decoder::FFmpegH264Decoder(
  , mImageContainer(aImageContainer)
 {
  MOZ_COUNT_CTOR(FFmpegH264Decoder);
+  mExtraData.append(aConfig.extra_data.begin(), aConfig.extra_data.length());
 }

 nsresult
--- a/content/media/fmp4/wmf/WMFAudioOutputSource.cpp
+++ b/content/media/fmp4/wmf/WMFAudioOutputSource.cpp
@ -5,6 +5,7 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 #include "WMFAudioOutputSource.h"
+#include "mp4_demuxer/DecoderData.h"
 #include "VideoUtils.h"
 #include "WMFUtils.h"
 #include "nsTArray.h"
@ -127,6 +128,14 @@ WMFAudioOutputSource::Init()
  return decoder.forget();
 }

+HRESULT
+WMFAudioOutputSource::Input(mp4_demuxer::MP4Sample* aSample)
+{
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(aSample->data);
+  uint32_t length = aSample->size;
+  return mDecoder->Input(data, length, aSample->composition_timestamp);
+}
+
 HRESULT
 WMFAudioOutputSource::Output(int64_t aStreamOffset,
                        nsAutoPtr<MediaData>& aOutData)
--- a/content/media/fmp4/wmf/WMFAudioOutputSource.h
+++ b/content/media/fmp4/wmf/WMFAudioOutputSource.h
@ -22,6 +22,8 @@ public:

  virtual TemporaryRef<MFTDecoder> Init() MOZ_OVERRIDE;

+  virtual HRESULT Input(mp4_demuxer::MP4Sample* aSample) MOZ_OVERRIDE;
+
  // Note WMF's AAC decoder sometimes output negatively timestamped samples,
  // presumably they're the preroll samples, and we strip them. We may return
  // a null aOutput in this case.
--- a/content/media/fmp4/wmf/WMFDecoderModule.cpp
+++ b/content/media/fmp4/wmf/WMFDecoderModule.cpp
@ -70,7 +70,8 @@ WMFDecoderModule::CreateH264Decoder(const mp4_demuxer::VideoDecoderConfig& aConf
                                    MediaTaskQueue* aVideoTaskQueue,
                                    MediaDataDecoderCallback* aCallback)
 {
-  return new WMFMediaDataDecoder(new WMFVideoOutputSource(aLayersBackend,
+  return new WMFMediaDataDecoder(new WMFVideoOutputSource(aConfig,
+                                                          aLayersBackend,
                                                          aImageContainer,
                                                          sDXVAEnabled),
                                 aVideoTaskQueue,
--- a/content/media/fmp4/wmf/WMFMediaDataDecoder.cpp
+++ b/content/media/fmp4/wmf/WMFMediaDataDecoder.cpp
@ -67,11 +67,9 @@ WMFMediaDataDecoder::Input(mp4_demuxer::MP4Sample* aSample)
 void
 WMFMediaDataDecoder::ProcessDecode(mp4_demuxer::MP4Sample* aSample)
 {
-  const uint8_t* data = reinterpret_cast<const uint8_t*>(aSample->data);
-  uint32_t length = aSample->size;
-  HRESULT hr = mDecoder->Input(data, length, aSample->composition_timestamp);
+  HRESULT hr = mSource->Input(aSample);
  if (FAILED(hr)) {
-    NS_WARNING("WMFAudioDecoder failed to input data");
+    NS_WARNING("WMFOutputSource rejected sample");
    mCallback->Error();
    return;
  }
--- a/content/media/fmp4/wmf/WMFMediaDataDecoder.h
+++ b/content/media/fmp4/wmf/WMFMediaDataDecoder.h
@ -13,6 +13,8 @@
 #include "MFTDecoder.h"
 #include "mozilla/RefPtr.h"

+class mp4_demuxer::MP4Sample;
+
 namespace mozilla {

 // Encapsulates the initialization of the MFTDecoder appropriate for decoding
@ -26,6 +28,11 @@ public:
  // Returns nullptr on failure.
  virtual TemporaryRef<MFTDecoder> Init() = 0;

+  // Submit a compressed sample for decoding.
+  // This should forward to the MFTDecoder after performing
+  // any required sample formatting.
+  virtual HRESULT Input(mp4_demuxer::MP4Sample* aSample) = 0;
+
  // Produces decoded output, if possible. Blocks until output can be produced,
  // or until no more is able to be produced.
  // Returns S_OK on success, or MF_E_TRANSFORM_NEED_MORE_INPUT if there's not
--- a/content/media/fmp4/wmf/WMFVideoOutputSource.cpp
+++ b/content/media/fmp4/wmf/WMFVideoOutputSource.cpp
@ -13,6 +13,8 @@
 #include "nsThreadUtils.h"
 #include "Layers.h"
 #include "mozilla/layers/LayersTypes.h"
+#include "mp4_demuxer/AnnexB.h"
+#include "mp4_demuxer/DecoderData.h"
 #include "prlog.h"
 #include "gfx2DGlue.h"

@ -30,12 +32,15 @@ using mozilla::layers::LayersBackend;

 namespace mozilla {

-WMFVideoOutputSource::WMFVideoOutputSource(mozilla::layers::LayersBackend aLayersBackend,
-                                 mozilla::layers::ImageContainer* aImageContainer,
-                                 bool aDXVAEnabled)
+WMFVideoOutputSource::WMFVideoOutputSource(
+                            const mp4_demuxer::VideoDecoderConfig& aConfig,
+                            mozilla::layers::LayersBackend aLayersBackend,
+                            mozilla::layers::ImageContainer* aImageContainer,
+                            bool aDXVAEnabled)
  : mVideoStride(0)
  , mVideoWidth(0)
  , mVideoHeight(0)
+  , mConfig(aConfig)
  , mImageContainer(aImageContainer)
  , mDXVAEnabled(aDXVAEnabled)
  , mLayersBackend(aLayersBackend)
@ -138,6 +143,17 @@ WMFVideoOutputSource::Init()
  return decoder.forget();
 }

+HRESULT
+WMFVideoOutputSource::Input(mp4_demuxer::MP4Sample* aSample)
+{
+  // We must prepare samples in AVC Annex B.
+  mp4_demuxer::AnnexB::ConvertSample(aSample, mConfig.annex_b);
+  // Forward sample data to the decoder.
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(aSample->data);
+  uint32_t length = aSample->size;
+  return mDecoder->Input(data, length, aSample->composition_timestamp);
+}
+
 HRESULT
 WMFVideoOutputSource::ConfigureVideoFrameGeometry()
 {
--- a/content/media/fmp4/wmf/WMFVideoOutputSource.h
+++ b/content/media/fmp4/wmf/WMFVideoOutputSource.h
@ -20,13 +20,16 @@ class DXVA2Manager;

 class WMFVideoOutputSource : public WMFOutputSource {
 public:
-  WMFVideoOutputSource(mozilla::layers::LayersBackend aLayersBackend,
+  WMFVideoOutputSource(const mp4_demuxer::VideoDecoderConfig& aConfig,
+                       mozilla::layers::LayersBackend aLayersBackend,
                       mozilla::layers::ImageContainer* aImageContainer,
                       bool aDXVAEnabled);
  ~WMFVideoOutputSource();

  virtual TemporaryRef<MFTDecoder> Init() MOZ_OVERRIDE;

+  virtual HRESULT Input(mp4_demuxer::MP4Sample* aSample) MOZ_OVERRIDE;
+
  virtual HRESULT Output(int64_t aStreamOffset,
                         nsAutoPtr<MediaData>& aOutput) MOZ_OVERRIDE;

@ -51,6 +54,8 @@ private:
  uint32_t mVideoHeight;
  nsIntRect mPictureRegion;

+  const mp4_demuxer::VideoDecoderConfig& mConfig;
+
  RefPtr<MFTDecoder> mDecoder;
  RefPtr<layers::ImageContainer> mImageContainer;
  nsAutoPtr<DXVA2Manager> mDXVA2Manager;
--- a/media/libstagefright/binding/AnnexB.cpp
+++ b/media/libstagefright/binding/AnnexB.cpp
@ -2,6 +2,7 @@
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

+#include "mozilla/ArrayUtils.h"
 #include "mp4_demuxer/AnnexB.h"
 #include "mp4_demuxer/ByteReader.h"
 #include "mp4_demuxer/DecoderData.h"
@ -13,6 +14,21 @@ namespace mp4_demuxer

 static const uint8_t kAnnexBDelimiter[] = { 0, 0, 0, 1 };

+void
+AnnexB::ConvertSample(MP4Sample* aSample,
+                      const mozilla::Vector<uint8_t>& annexB)
+{
+  MOZ_ASSERT(aSample);
+  MOZ_ASSERT(aSample->data);
+  MOZ_ASSERT(aSample->size >= ArrayLength(kAnnexBDelimiter));
+  // Overwrite the NAL length with the Annex B separator.
+  memcpy(aSample->data, kAnnexBDelimiter, ArrayLength(kAnnexBDelimiter));
+  // Prepend the Annex B header with SPS and PPS tables to keyframes.
+  if (aSample->is_sync_point) {
+    aSample->Prepend(annexB.begin(), annexB.length());
+  }
+}
+
 Vector<uint8_t>
 AnnexB::ConvertExtraDataToAnnexB(mozilla::Vector<uint8_t>& aExtraData)
 {
@ -36,9 +52,9 @@ AnnexB::ConvertExtraDataToAnnexB(mozilla::Vector<uint8_t>& aExtraData)
  ByteReader reader(aExtraData);
  const uint8_t* ptr = reader.Read(5);
  if (ptr && ptr[0] == 1) {
-    // Append SPS then PSP
-    ConvertSpsOrPsp(reader, reader.ReadU8() & 31, &annexB);
-    ConvertSpsOrPsp(reader, reader.ReadU8(), &annexB);
+    // Append SPS then PPS
+    ConvertSPSOrPPS(reader, reader.ReadU8() & 31, &annexB);
+    ConvertSPSOrPPS(reader, reader.ReadU8(), &annexB);

    MOZ_ASSERT(!reader.Remaining());
  }
@ -47,7 +63,7 @@ AnnexB::ConvertExtraDataToAnnexB(mozilla::Vector<uint8_t>& aExtraData)
 }

 void
-AnnexB::ConvertSpsOrPsp(ByteReader& aReader, uint8_t aCount,
+AnnexB::ConvertSPSOrPPS(ByteReader& aReader, uint8_t aCount,
                        Vector<uint8_t>* aAnnexB)
 {
  for (int i = 0; i < aCount; i++) {
@ -62,4 +78,5 @@ AnnexB::ConvertSpsOrPsp(ByteReader& aReader, uint8_t aCount,
    aAnnexB->append(ptr, length);
  }
 }
-}
+
+} // namespace mp4_demuxer
--- a/media/libstagefright/binding/DecoderData.cpp
+++ b/media/libstagefright/binding/DecoderData.cpp
@ -83,7 +83,7 @@ VideoDecoderConfig::Update(sp<MetaData>& aMetaData, const char* aMimeType)
  uint32_t type;

  if (aMetaData->findData(kKeyAVCC, &type, &data, &size)) {
-    mozilla::Vector<uint8_t> extra_data;
+    extra_data.clear();
    extra_data.append(reinterpret_cast<const uint8_t*>(data), size);
    annex_b = AnnexB::ConvertExtraDataToAnnexB(extra_data);
  }
--- a/media/libstagefright/binding/include/mp4_demuxer/AnnexB.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/AnnexB.h
@ -2,24 +2,33 @@
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

-#ifndef ANNEX_B_H_
-#define ANNEX_B_H_
+#ifndef MP4_DEMUXER_ANNEX_B_H_
+#define MP4_DEMUXER_ANNEX_B_H_

 #include "mozilla/Vector.h"

 namespace mp4_demuxer
 {
 class ByteReader;
+class MP4Sample;
+
 class AnnexB
 {
 public:
+  // Convert a sample from NAL unit syntax to Annex B.
+  static void ConvertSample(MP4Sample* aSample,
+                            const mozilla::Vector<uint8_t>& annexB);
+
+  // Parse an AVCC box and construct the Annex B sample header.
  static mozilla::Vector<uint8_t> ConvertExtraDataToAnnexB(
    mozilla::Vector<uint8_t>& aExtraData);

 private:
-  static void ConvertSpsOrPsp(ByteReader& aReader, uint8_t aCount,
+  // AVCC box parser helper.
+  static void ConvertSPSOrPPS(ByteReader& aReader, uint8_t aCount,
                              mozilla::Vector<uint8_t>* aAnnexB);
 };
-}

-#endif
+} // namespace mp4_demuxer
+
+#endif // MP4_DEMUXER_ANNEX_B_H_
--- a/media/libstagefright/binding/include/mp4_demuxer/DecoderData.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/DecoderData.h
@ -67,7 +67,8 @@ public:
  int32_t display_width;
  int32_t display_height;

-  mozilla::Vector<uint8_t> annex_b;
+  mozilla::Vector<uint8_t> extra_data; // Unparsed AVCDecoderConfig payload.
+  mozilla::Vector<uint8_t> annex_b;    // Parsed version for sample prepend.

  void Update(stagefright::sp<stagefright::MetaData>& aMetaData, const char* aMimeType);
  bool IsValid();
--- a/media/libstagefright/binding/include/mp4_demuxer/mp4_demuxer.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/mp4_demuxer.h
@ -44,8 +44,8 @@ public:
  void SeekAudio(Microseconds aTime);
  void SeekVideo(Microseconds aTime);

-  // DemuxAudioSample and DemuxVideoSample functions return nullptr on end of
-  // stream or error.
+  // DemuxAudioSample and DemuxVideoSample functions
+  // return nullptr on end of stream or error.
  MP4Sample* DemuxAudioSample();
  MP4Sample* DemuxVideoSample();

--- a/media/libstagefright/binding/mp4_demuxer.cpp
+++ b/media/libstagefright/binding/mp4_demuxer.cpp
@ -65,7 +65,8 @@ private:
  nsAutoPtr<Stream> mSource;
 };

-MP4Demuxer::MP4Demuxer(Stream* source) : mPrivate(new StageFrightPrivate())
+MP4Demuxer::MP4Demuxer(Stream* source)
+  : mPrivate(new StageFrightPrivate())
 {
  mPrivate->mExtractor = new MPEG4Extractor(new DataSourceAdapter(source));
 }
@ -180,11 +181,7 @@ MP4Demuxer::DemuxVideoSample()

  sample->Update();

-  if (sample->is_sync_point) {
-    sample->Prepend(mVideoConfig.annex_b.begin(),
-                    mVideoConfig.annex_b.length());
-  }
-
  return sample.forget();
 }
-}
+
+} // namespace mp4_demuxer
--- a/media/libstagefright/frameworks/av/media/libstagefright/MPEG4Extractor.cpp
+++ b/media/libstagefright/frameworks/av/media/libstagefright/MPEG4Extractor.cpp
@ -3301,11 +3301,10 @@ status_t MPEG4Source::read(
                }

                CHECK(dstOffset + 4 <= mBuffer->size());
-
-                dstData[dstOffset++] = 0;
-                dstData[dstOffset++] = 0;
-                dstData[dstOffset++] = 0;
-                dstData[dstOffset++] = 1;
+                dstData[dstOffset++] = (uint8_t) (nalLength >> 24);
+                dstData[dstOffset++] = (uint8_t) (nalLength >> 16);
+                dstData[dstOffset++] = (uint8_t) (nalLength >> 8);
+                dstData[dstOffset++] = (uint8_t) nalLength;
                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
                srcOffset += nalLength;
                dstOffset += nalLength;
@ -3589,11 +3588,10 @@ status_t MPEG4Source::fragmentedRead(
                }

                CHECK(dstOffset + 4 <= mBuffer->size());
-
-                dstData[dstOffset++] = 0;
-                dstData[dstOffset++] = 0;
-                dstData[dstOffset++] = 0;
-                dstData[dstOffset++] = 1;
+                dstData[dstOffset++] = (uint8_t) (nalLength >> 24);
+                dstData[dstOffset++] = (uint8_t) (nalLength >> 16);
+                dstData[dstOffset++] = (uint8_t) (nalLength >> 8);
+                dstData[dstOffset++] = (uint8_t) nalLength;
                memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
                srcOffset += nalLength;
                dstOffset += nalLength;
--- a/media/libstagefright/moz.build
+++ b/media/libstagefright/moz.build
@ -46,6 +46,7 @@ if CONFIG['OS_TARGET'] != 'Android':
    ]

 EXPORTS.mp4_demuxer += [
+    'binding/include/mp4_demuxer/AnnexB.h',
    'binding/include/mp4_demuxer/DecoderData.h',
    'binding/include/mp4_demuxer/mp4_demuxer.h',
 ]