Bug 818822 - Resample all inputs of the MediaStreamGraph to the ideal audio rate. r=roc

2024-09-13 09:24:08 -07:00 · 2014-03-24 11:06:05 +01:00 · 2014-03-24 11:06:05 +01:00 · c258f1f2ba
commit c258f1f2ba
parent 7c497a9676
9 changed files with 173 additions and 33 deletions
--- a/content/media/AudioNodeExternalInputStream.cpp
+++ b/content/media/AudioNodeExternalInputStream.cpp
@ -107,15 +107,6 @@ ResampleChannelBuffer(SpeexResamplerState* aResampler, uint32_t aChannel,
  }
 }

-class SharedChannelArrayBuffer : public ThreadSharedObject {
-public:
-  SharedChannelArrayBuffer(nsTArray<nsTArray<float> >* aBuffers)
-  {
-    mBuffers.SwapElements(*aBuffers);
-  }
-  nsTArray<nsTArray<float> > mBuffers;
-};
-
 void
 AudioNodeExternalInputStream::TrackMapEntry::ResampleChannels(const nsTArray<const void*>& aBuffers,
                                                              uint32_t aInputDuration,
@ -178,7 +169,7 @@ AudioNodeExternalInputStream::TrackMapEntry::ResampleChannels(const nsTArray<con
  }

  uint32_t length = resampledBuffers[0].Length();
-  nsRefPtr<ThreadSharedObject> buf = new SharedChannelArrayBuffer(&resampledBuffers);
+  nsRefPtr<ThreadSharedObject> buf = new SharedChannelArrayBuffer<float>(&resampledBuffers);
  mResampledData.AppendFrames(buf.forget(), bufferPtrs, length);
 }

--- a/content/media/AudioSampleFormat.h
+++ b/content/media/AudioSampleFormat.h
@ -49,8 +49,7 @@ public:

 typedef AudioSampleTraits<AUDIO_OUTPUT_FORMAT>::Type AudioDataValue;

-// Single-sample conversion
-
+// Single-sample conversion 
 /*
 * Use "2^N" conversion since it's simple, fast, "bit transparent", used by
 * many other libraries and apparently behaves reasonably.
--- a/content/media/AudioSegment.cpp
+++ b/content/media/AudioSegment.cpp
@ -8,6 +8,7 @@
 #include "AudioStream.h"
 #include "AudioChannelFormat.h"
 #include "Latency.h"
+#include "speex/speex_resampler.h"

 namespace mozilla {

@ -109,6 +110,29 @@ DownmixAndInterleave(const nsTArray<const void*>& aChannelData,
                             aDuration, aVolume, aOutputChannels, aOutput);
 }

+void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler)
+{
+  uint32_t inRate, outRate;
+
+  if (mChunks.IsEmpty()) {
+    return;
+  }
+
+  speex_resampler_get_rate(aResampler, &inRate, &outRate);
+
+  switch (mChunks[0].mBufferFormat) {
+    case AUDIO_FORMAT_FLOAT32:
+      Resample<float>(aResampler, inRate, outRate);
+    break;
+    case AUDIO_FORMAT_S16:
+      Resample<int16_t>(aResampler, inRate, outRate);
+    break;
+    default:
+      MOZ_ASSERT(false);
+    break;
+  }
+}
+
 void
 AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput)
 {
--- a/content/media/AudioSegment.h
+++ b/content/media/AudioSegment.h
@ -9,12 +9,23 @@
 #include "MediaSegment.h"
 #include "AudioSampleFormat.h"
 #include "SharedBuffer.h"
+#include "WebAudioUtils.h"
 #ifdef MOZILLA_INTERNAL_API
 #include "mozilla/TimeStamp.h"
 #endif

 namespace mozilla {

+template<typename T>
+class SharedChannelArrayBuffer : public ThreadSharedObject {
+public:
+  SharedChannelArrayBuffer(nsTArray<nsTArray<T>>* aBuffers)
+  {
+    mBuffers.SwapElements(*aBuffers);
+  }
+  nsTArray<nsTArray<T>> mBuffers;
+};
+
 class AudioStream;

 /**
@ -111,6 +122,7 @@ struct AudioChunk {
 #endif
 };

+
 /**
 * A list of audio samples consisting of a sequence of slices of SharedBuffers.
 * The audio rate is determined by the track, not stored in this class.
@ -121,6 +133,43 @@ public:

  AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}

+  // Resample the whole segment in place.
+  template<typename T>
+  void Resample(SpeexResamplerState* aResampler, uint32_t aInRate, uint32_t aOutRate)
+  {
+    mDuration = 0;
+
+    for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
+      nsAutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
+      nsAutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
+      AudioChunk& c = *ci;
+      uint32_t channels = c.mChannelData.Length();
+      output.SetLength(channels);
+      bufferPtrs.SetLength(channels);
+      uint32_t inFrames = c.mDuration,
+      outFrames = c.mDuration * aOutRate / aInRate;
+      for (uint32_t i = 0; i < channels; i++) {
+        const T* in = static_cast<const T*>(c.mChannelData[i]);
+        T* out = output[i].AppendElements(outFrames);
+
+        dom::WebAudioUtils::SpeexResamplerProcess(aResampler, i,
+                                                  in, &inFrames,
+                                                  out, &outFrames);
+
+        bufferPtrs[i] = out;
+        output[i].SetLength(outFrames);
+      }
+      c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(&output);
+      for (uint32_t i = 0; i < channels; i++) {
+        c.mChannelData[i] = bufferPtrs[i];
+      }
+      c.mDuration = outFrames;
+      mDuration += c.mDuration;
+    }
+  }
+
+  void ResampleChunks(SpeexResamplerState* aResampler);
+
  void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
                    const nsTArray<const float*>& aChannelData,
                    int32_t aDuration)
@ -168,6 +217,12 @@ public:
  void ApplyVolume(float aVolume);
  void WriteTo(uint64_t aID, AudioStream* aOutput);

+  int ChannelCount() {
+    NS_WARN_IF_FALSE(!mChunks.IsEmpty(),
+        "Cannot query channel count on a AudioSegment with no chunks.");
+    return mChunks.IsEmpty() ? 0 : mChunks[0].mChannelData.Length();
+  }
+
  static Type StaticType() { return AUDIO; }
 };

--- a/content/media/MediaStreamGraph.cpp
+++ b/content/media/MediaStreamGraph.cpp
@ -26,6 +26,7 @@
 #include "DOMMediaStream.h"
 #include "GeckoProfiler.h"
 #include "mozilla/unused.h"
+#include "speex/speex_resampler.h"

 using namespace mozilla::layers;
 using namespace mozilla::dom;
@ -172,15 +173,16 @@ MediaStreamGraphImpl::ExtractPendingInput(SourceMediaStream* aStream,
        MediaStreamListener* l = aStream->mListeners[j];
        TrackTicks offset = (data->mCommands & SourceMediaStream::TRACK_CREATE)
            ? data->mStart : aStream->mBuffer.FindTrack(data->mID)->GetSegment()->GetDuration();
-        l->NotifyQueuedTrackChanges(this, data->mID, data->mRate,
+        l->NotifyQueuedTrackChanges(this, data->mID, data->mOutputRate,
                                    offset, data->mCommands, *data->mData);
      }
      if (data->mCommands & SourceMediaStream::TRACK_CREATE) {
        MediaSegment* segment = data->mData.forget();
        STREAM_LOG(PR_LOG_DEBUG, ("SourceMediaStream %p creating track %d, rate %d, start %lld, initial end %lld",
-                                  aStream, data->mID, data->mRate, int64_t(data->mStart),
+                                  aStream, data->mID, data->mOutputRate, int64_t(data->mStart),
                                  int64_t(segment->GetDuration())));
-        aStream->mBuffer.AddTrack(data->mID, data->mRate, data->mStart, segment);
+
+        aStream->mBuffer.AddTrack(data->mID, data->mOutputRate, data->mStart, segment);
        // The track has taken ownership of data->mData, so let's replace
        // data->mData with an empty clone.
        data->mData = segment->CreateEmptyClone();
@ -332,7 +334,7 @@ MediaStreamGraphImpl::GetAudioPosition(MediaStream* aStream)
    return mCurrentTime;
  }
  return aStream->mAudioOutputStreams[0].mAudioPlaybackStartTime +
-      TicksToTimeRoundDown(aStream->mAudioOutputStreams[0].mStream->GetRate(),
+      TicksToTimeRoundDown(IdealAudioRate(),
                           positionInFrames);
 }

@ -811,7 +813,7 @@ MediaStreamGraphImpl::CreateOrDestroyAudioStreams(GraphTime aAudioOutputStartTim
        audioOutputStream->mStream = new AudioStream();
        // XXX for now, allocate stereo output. But we need to fix this to
        // match the system's ideal channel configuration.
-        audioOutputStream->mStream->Init(2, tracks->GetRate(), AUDIO_CHANNEL_NORMAL, AudioStream::LowLatency);
+        audioOutputStream->mStream->Init(2, IdealAudioRate(), AUDIO_CHANNEL_NORMAL, AudioStream::LowLatency);
        audioOutputStream->mTrackID = tracks->GetID();

        LogLatency(AsyncLatencyLogger::AudioStreamCreate,
@ -868,10 +870,10 @@ MediaStreamGraphImpl::PlayAudio(MediaStream* aStream,
        // the amount of silent samples we've inserted for blocking never gets
        // more than one sample away from the ideal amount.
        TrackTicks startTicks =
-            TimeToTicksRoundDown(track->GetRate(), audioOutput.mBlockedAudioTime);
+            TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime);
        audioOutput.mBlockedAudioTime += end - t;
        TrackTicks endTicks =
-            TimeToTicksRoundDown(track->GetRate(), audioOutput.mBlockedAudioTime);
+            TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime);

        output.InsertNullDataAtStart(endTicks - startTicks);
        STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing blocking-silence samples for %f to %f",
@ -1392,12 +1394,6 @@ MediaStreamGraphImpl::ForceShutDown()
  }
 }

-void
-MediaStreamGraphImpl::Init()
-{
-  AudioStream::InitPreferredSampleRate();
-}
-
 namespace {

 class MediaStreamGraphInitThreadRunnable : public nsRunnable {
@ -1410,7 +1406,6 @@ public:
  {
    char aLocal;
    profiler_register_thread("MediaStreamGraph", &aLocal);
-    mGraph->Init();
    mGraph->RunThread();
    return NS_OK;
  }
@ -1782,7 +1777,7 @@ MediaStream::EnsureTrack(TrackID aTrackId, TrackRate aSampleRate)
    nsAutoPtr<MediaSegment> segment(new AudioSegment());
    for (uint32_t j = 0; j < mListeners.Length(); ++j) {
      MediaStreamListener* l = mListeners[j];
-      l->NotifyQueuedTrackChanges(Graph(), aTrackId, aSampleRate, 0,
+      l->NotifyQueuedTrackChanges(Graph(), aTrackId, IdealAudioRate(), 0,
                                  MediaStreamListener::TRACK_EVENT_CREATED,
                                  *segment);
    }
@ -2129,7 +2124,10 @@ SourceMediaStream::AddTrack(TrackID aID, TrackRate aRate, TrackTicks aStart,
  MutexAutoLock lock(mMutex);
  TrackData* data = mUpdateTracks.AppendElement();
  data->mID = aID;
-  data->mRate = aRate;
+  data->mInputRate = aRate;
+  // We resample all audio input tracks to the sample rate of the audio mixer.
+  data->mOutputRate = aSegment->GetType() == MediaSegment::AUDIO ?
+                      IdealAudioRate() : aRate;
  data->mStart = aStart;
  data->mCommands = TRACK_CREATE;
  data->mData = aSegment;
@ -2139,6 +2137,28 @@ SourceMediaStream::AddTrack(TrackID aID, TrackRate aRate, TrackTicks aStart,
  }
 }

+void
+SourceMediaStream::ResampleAudioToGraphSampleRate(TrackData* aTrackData, MediaSegment* aSegment)
+{
+  if (aSegment->GetType() != MediaSegment::AUDIO ||
+      aTrackData->mInputRate == IdealAudioRate()) {
+    return;
+  }
+  AudioSegment* segment = static_cast<AudioSegment*>(aSegment);
+  if (!aTrackData->mResampler) {
+    int channels = segment->ChannelCount();
+    SpeexResamplerState* state = speex_resampler_init(channels,
+                                                      aTrackData->mInputRate,
+                                                      IdealAudioRate(),
+                                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
+                                                      nullptr);
+    if (state) {
+      aTrackData->mResampler.own(state);
+    }
+  }
+  segment->ResampleChunks(aTrackData->mResampler);
+}
+
 bool
 SourceMediaStream::AppendToTrack(TrackID aID, MediaSegment* aSegment, MediaSegment *aRawSegment)
 {
@ -2158,6 +2178,8 @@ SourceMediaStream::AppendToTrack(TrackID aID, MediaSegment* aSegment, MediaSegme
      // or inserting into the graph
      ApplyTrackDisabling(aID, aSegment, aRawSegment);

+      ResampleAudioToGraphSampleRate(track, aSegment);
+
      // Must notify first, since AppendFrom() will empty out aSegment
      NotifyDirectConsumers(track, aRawSegment ? aRawSegment : aSegment);
      track->mData->AppendFrom(aSegment); // note: aSegment is now dead
@ -2182,7 +2204,7 @@ SourceMediaStream::NotifyDirectConsumers(TrackData *aTrack,
  for (uint32_t j = 0; j < mDirectListeners.Length(); ++j) {
    MediaStreamDirectListener* l = mDirectListeners[j];
    TrackTicks offset = 0; // FIX! need a separate TrackTicks.... or the end of the internal buffer
-    l->NotifyRealtimeData(static_cast<MediaStreamGraph*>(GraphImpl()), aTrack->mID, aTrack->mRate,
+    l->NotifyRealtimeData(static_cast<MediaStreamGraph*>(GraphImpl()), aTrack->mID, aTrack->mOutputRate,
                          offset, aTrack->mCommands, *aSegment);
  }
 }
@ -2521,6 +2543,8 @@ MediaStreamGraph::GetInstance()

    gGraph = new MediaStreamGraphImpl(true);
    STREAM_LOG(PR_LOG_DEBUG, ("Starting up MediaStreamGraph %p", gGraph));
+
+    AudioStream::InitPreferredSampleRate();
  }

  return gGraph;
--- a/content/media/MediaStreamGraph.h
+++ b/content/media/MediaStreamGraph.h
@ -16,9 +16,18 @@
 #include "VideoFrameContainer.h"
 #include "VideoSegment.h"
 #include "MainThreadUtils.h"
+#include "nsAutoRef.h"
+#include "speex/speex_resampler.h"

 class nsIRunnable;

+template <>
+class nsAutoRefTraits<SpeexResamplerState> : public nsPointerRefTraits<SpeexResamplerState>
+{
+  public:
+  static void Release(SpeexResamplerState* aState) { speex_resampler_destroy(aState); }
+};
+
 namespace mozilla {

 class DOMMediaStream;
@ -662,6 +671,9 @@ public:
   */
  void AddTrack(TrackID aID, TrackRate aRate, TrackTicks aStart,
                MediaSegment* aSegment);
+
+  struct TrackData;
+  void ResampleAudioToGraphSampleRate(TrackData* aTrackData, MediaSegment* aSegment);
  /**
   * Append media data to a track. Ownership of aSegment remains with the caller,
   * but aSegment is emptied.
@ -752,7 +764,13 @@ public:
   */
  struct TrackData {
    TrackID mID;
-    TrackRate mRate;
+    // Sample rate of the input data.
+    TrackRate mInputRate;
+    // Sample rate of the output data, always equal to IdealAudioRate()
+    TrackRate mOutputRate;
+    // Resampler if the rate of the input track does not match the
+    // MediaStreamGraph's.
+    nsAutoRef<SpeexResamplerState> mResampler;
    TrackTicks mStart;
    // Each time the track updates are flushed to the media graph thread,
    // this is cleared.
@ -1003,7 +1021,7 @@ protected:
  bool mInCycle;
 };

-// Returns ideal audio rate for processing
+// Returns ideal audio rate for processing.
 inline TrackRate IdealAudioRate() { return AudioStream::PreferredSampleRate(); }

 /**
--- a/content/media/MediaStreamGraphImpl.h
+++ b/content/media/MediaStreamGraphImpl.h
@ -52,6 +52,10 @@ static const int AUDIO_TARGET_MS = 2*MEDIA_GRAPH_TARGET_PERIOD_MS +
 static const int VIDEO_TARGET_MS = 2*MEDIA_GRAPH_TARGET_PERIOD_MS +
    SCHEDULE_SAFETY_MARGIN_MS;

+/**
+ * Rate at which we run the video tracks.
+ */
+
 /**
 * A per-stream update message passed from the media graph thread to the
 * main thread.
--- a/content/media/webaudio/WebAudioUtils.cpp
+++ b/content/media/webaudio/WebAudioUtils.cpp
@ -90,5 +90,25 @@ WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
 #endif
 }

+int
+WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
+                                     uint32_t aChannel,
+                                     const int16_t* aIn, uint32_t* aInLen,
+                                     int16_t* aOut, uint32_t* aOutLen)
+{
+#ifdef MOZ_SAMPLE_TYPE_S16
+  return speex_resampler_process_int(aResampler, aChannel, aIn, aInLen, aOut, aOutLen);
+#else
+  nsAutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE*4> tmp1;
+  nsAutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE*4> tmp2;
+  tmp1.SetLength(*aInLen);
+  tmp2.SetLength(*aOutLen);
+  ConvertAudioSamples(aIn, tmp1.Elements(), *aInLen);
+  int result = speex_resampler_process_float(aResampler, aChannel, tmp1.Elements(), aInLen, tmp2.Elements(), aOutLen);
+  ConvertAudioSamples(tmp2.Elements(), aOut, *aOutLen);
+  return result;
+#endif
+}
+
 }
 }
--- a/content/media/webaudio/WebAudioUtils.h
+++ b/content/media/webaudio/WebAudioUtils.h
@ -19,7 +19,6 @@ typedef struct SpeexResamplerState_ SpeexResamplerState;
 namespace mozilla {

 class AudioNodeStream;
-class MediaStream;

 namespace dom {

@ -210,7 +209,13 @@ struct WebAudioUtils {
                        uint32_t aChannel,
                        const int16_t* aIn, uint32_t* aInLen,
                        float* aOut, uint32_t* aOutLen);
-};
+
+  static int
+  SpeexResamplerProcess(SpeexResamplerState* aResampler,
+                        uint32_t aChannel,
+                        const int16_t* aIn, uint32_t* aInLen,
+                        int16_t* aOut, uint32_t* aOutLen);
+  };

 }
 }