Bug 818822 - Resample all inputs of the MediaStreamGraph to the ideal audio rate. r=roc

This commit is contained in:
Paul Adenot 2014-03-24 11:06:05 +01:00
parent d626d88ea6
commit 35375cac06
9 changed files with 173 additions and 33 deletions

View File

@ -107,15 +107,6 @@ ResampleChannelBuffer(SpeexResamplerState* aResampler, uint32_t aChannel,
}
}
class SharedChannelArrayBuffer : public ThreadSharedObject {
public:
SharedChannelArrayBuffer(nsTArray<nsTArray<float> >* aBuffers)
{
mBuffers.SwapElements(*aBuffers);
}
nsTArray<nsTArray<float> > mBuffers;
};
void
AudioNodeExternalInputStream::TrackMapEntry::ResampleChannels(const nsTArray<const void*>& aBuffers,
uint32_t aInputDuration,
@ -178,7 +169,7 @@ AudioNodeExternalInputStream::TrackMapEntry::ResampleChannels(const nsTArray<con
}
uint32_t length = resampledBuffers[0].Length();
nsRefPtr<ThreadSharedObject> buf = new SharedChannelArrayBuffer(&resampledBuffers);
nsRefPtr<ThreadSharedObject> buf = new SharedChannelArrayBuffer<float>(&resampledBuffers);
mResampledData.AppendFrames(buf.forget(), bufferPtrs, length);
}

View File

@ -49,8 +49,7 @@ public:
typedef AudioSampleTraits<AUDIO_OUTPUT_FORMAT>::Type AudioDataValue;
// Single-sample conversion
// Single-sample conversion
/*
* Use "2^N" conversion since it's simple, fast, "bit transparent", used by
* many other libraries and apparently behaves reasonably.

View File

@ -8,6 +8,7 @@
#include "AudioStream.h"
#include "AudioChannelFormat.h"
#include "Latency.h"
#include "speex/speex_resampler.h"
namespace mozilla {
@ -109,6 +110,29 @@ DownmixAndInterleave(const nsTArray<const void*>& aChannelData,
aDuration, aVolume, aOutputChannels, aOutput);
}
void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler)
{
uint32_t inRate, outRate;
if (mChunks.IsEmpty()) {
return;
}
speex_resampler_get_rate(aResampler, &inRate, &outRate);
switch (mChunks[0].mBufferFormat) {
case AUDIO_FORMAT_FLOAT32:
Resample<float>(aResampler, inRate, outRate);
break;
case AUDIO_FORMAT_S16:
Resample<int16_t>(aResampler, inRate, outRate);
break;
default:
MOZ_ASSERT(false);
break;
}
}
void
AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput)
{

View File

@ -9,12 +9,23 @@
#include "MediaSegment.h"
#include "AudioSampleFormat.h"
#include "SharedBuffer.h"
#include "WebAudioUtils.h"
#ifdef MOZILLA_INTERNAL_API
#include "mozilla/TimeStamp.h"
#endif
namespace mozilla {
template<typename T>
class SharedChannelArrayBuffer : public ThreadSharedObject {
public:
SharedChannelArrayBuffer(nsTArray<nsTArray<T>>* aBuffers)
{
mBuffers.SwapElements(*aBuffers);
}
nsTArray<nsTArray<T>> mBuffers;
};
class AudioStream;
/**
@ -111,6 +122,7 @@ struct AudioChunk {
#endif
};
/**
* A list of audio samples consisting of a sequence of slices of SharedBuffers.
* The audio rate is determined by the track, not stored in this class.
@ -121,6 +133,43 @@ public:
AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
// Resample the whole segment in place.
template<typename T>
void Resample(SpeexResamplerState* aResampler, uint32_t aInRate, uint32_t aOutRate)
{
mDuration = 0;
for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
nsAutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
nsAutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
AudioChunk& c = *ci;
uint32_t channels = c.mChannelData.Length();
output.SetLength(channels);
bufferPtrs.SetLength(channels);
uint32_t inFrames = c.mDuration,
outFrames = c.mDuration * aOutRate / aInRate;
for (uint32_t i = 0; i < channels; i++) {
const T* in = static_cast<const T*>(c.mChannelData[i]);
T* out = output[i].AppendElements(outFrames);
dom::WebAudioUtils::SpeexResamplerProcess(aResampler, i,
in, &inFrames,
out, &outFrames);
bufferPtrs[i] = out;
output[i].SetLength(outFrames);
}
c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(&output);
for (uint32_t i = 0; i < channels; i++) {
c.mChannelData[i] = bufferPtrs[i];
}
c.mDuration = outFrames;
mDuration += c.mDuration;
}
}
void ResampleChunks(SpeexResamplerState* aResampler);
void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
const nsTArray<const float*>& aChannelData,
int32_t aDuration)
@ -168,6 +217,12 @@ public:
void ApplyVolume(float aVolume);
void WriteTo(uint64_t aID, AudioStream* aOutput);
int ChannelCount() {
NS_WARN_IF_FALSE(!mChunks.IsEmpty(),
"Cannot query channel count on a AudioSegment with no chunks.");
return mChunks.IsEmpty() ? 0 : mChunks[0].mChannelData.Length();
}
static Type StaticType() { return AUDIO; }
};

View File

@ -26,6 +26,7 @@
#include "DOMMediaStream.h"
#include "GeckoProfiler.h"
#include "mozilla/unused.h"
#include "speex/speex_resampler.h"
using namespace mozilla::layers;
using namespace mozilla::dom;
@ -172,15 +173,16 @@ MediaStreamGraphImpl::ExtractPendingInput(SourceMediaStream* aStream,
MediaStreamListener* l = aStream->mListeners[j];
TrackTicks offset = (data->mCommands & SourceMediaStream::TRACK_CREATE)
? data->mStart : aStream->mBuffer.FindTrack(data->mID)->GetSegment()->GetDuration();
l->NotifyQueuedTrackChanges(this, data->mID, data->mRate,
l->NotifyQueuedTrackChanges(this, data->mID, data->mOutputRate,
offset, data->mCommands, *data->mData);
}
if (data->mCommands & SourceMediaStream::TRACK_CREATE) {
MediaSegment* segment = data->mData.forget();
STREAM_LOG(PR_LOG_DEBUG, ("SourceMediaStream %p creating track %d, rate %d, start %lld, initial end %lld",
aStream, data->mID, data->mRate, int64_t(data->mStart),
aStream, data->mID, data->mOutputRate, int64_t(data->mStart),
int64_t(segment->GetDuration())));
aStream->mBuffer.AddTrack(data->mID, data->mRate, data->mStart, segment);
aStream->mBuffer.AddTrack(data->mID, data->mOutputRate, data->mStart, segment);
// The track has taken ownership of data->mData, so let's replace
// data->mData with an empty clone.
data->mData = segment->CreateEmptyClone();
@ -332,7 +334,7 @@ MediaStreamGraphImpl::GetAudioPosition(MediaStream* aStream)
return mCurrentTime;
}
return aStream->mAudioOutputStreams[0].mAudioPlaybackStartTime +
TicksToTimeRoundDown(aStream->mAudioOutputStreams[0].mStream->GetRate(),
TicksToTimeRoundDown(IdealAudioRate(),
positionInFrames);
}
@ -811,7 +813,7 @@ MediaStreamGraphImpl::CreateOrDestroyAudioStreams(GraphTime aAudioOutputStartTim
audioOutputStream->mStream = new AudioStream();
// XXX for now, allocate stereo output. But we need to fix this to
// match the system's ideal channel configuration.
audioOutputStream->mStream->Init(2, tracks->GetRate(), AUDIO_CHANNEL_NORMAL, AudioStream::LowLatency);
audioOutputStream->mStream->Init(2, IdealAudioRate(), AUDIO_CHANNEL_NORMAL, AudioStream::LowLatency);
audioOutputStream->mTrackID = tracks->GetID();
LogLatency(AsyncLatencyLogger::AudioStreamCreate,
@ -868,10 +870,10 @@ MediaStreamGraphImpl::PlayAudio(MediaStream* aStream,
// the amount of silent samples we've inserted for blocking never gets
// more than one sample away from the ideal amount.
TrackTicks startTicks =
TimeToTicksRoundDown(track->GetRate(), audioOutput.mBlockedAudioTime);
TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime);
audioOutput.mBlockedAudioTime += end - t;
TrackTicks endTicks =
TimeToTicksRoundDown(track->GetRate(), audioOutput.mBlockedAudioTime);
TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime);
output.InsertNullDataAtStart(endTicks - startTicks);
STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing blocking-silence samples for %f to %f",
@ -1392,12 +1394,6 @@ MediaStreamGraphImpl::ForceShutDown()
}
}
void
MediaStreamGraphImpl::Init()
{
AudioStream::InitPreferredSampleRate();
}
namespace {
class MediaStreamGraphInitThreadRunnable : public nsRunnable {
@ -1410,7 +1406,6 @@ public:
{
char aLocal;
profiler_register_thread("MediaStreamGraph", &aLocal);
mGraph->Init();
mGraph->RunThread();
return NS_OK;
}
@ -1782,7 +1777,7 @@ MediaStream::EnsureTrack(TrackID aTrackId, TrackRate aSampleRate)
nsAutoPtr<MediaSegment> segment(new AudioSegment());
for (uint32_t j = 0; j < mListeners.Length(); ++j) {
MediaStreamListener* l = mListeners[j];
l->NotifyQueuedTrackChanges(Graph(), aTrackId, aSampleRate, 0,
l->NotifyQueuedTrackChanges(Graph(), aTrackId, IdealAudioRate(), 0,
MediaStreamListener::TRACK_EVENT_CREATED,
*segment);
}
@ -2129,7 +2124,10 @@ SourceMediaStream::AddTrack(TrackID aID, TrackRate aRate, TrackTicks aStart,
MutexAutoLock lock(mMutex);
TrackData* data = mUpdateTracks.AppendElement();
data->mID = aID;
data->mRate = aRate;
data->mInputRate = aRate;
// We resample all audio input tracks to the sample rate of the audio mixer.
data->mOutputRate = aSegment->GetType() == MediaSegment::AUDIO ?
IdealAudioRate() : aRate;
data->mStart = aStart;
data->mCommands = TRACK_CREATE;
data->mData = aSegment;
@ -2139,6 +2137,28 @@ SourceMediaStream::AddTrack(TrackID aID, TrackRate aRate, TrackTicks aStart,
}
}
void
SourceMediaStream::ResampleAudioToGraphSampleRate(TrackData* aTrackData, MediaSegment* aSegment)
{
if (aSegment->GetType() != MediaSegment::AUDIO ||
aTrackData->mInputRate == IdealAudioRate()) {
return;
}
AudioSegment* segment = static_cast<AudioSegment*>(aSegment);
if (!aTrackData->mResampler) {
int channels = segment->ChannelCount();
SpeexResamplerState* state = speex_resampler_init(channels,
aTrackData->mInputRate,
IdealAudioRate(),
SPEEX_RESAMPLER_QUALITY_DEFAULT,
nullptr);
if (state) {
aTrackData->mResampler.own(state);
}
}
segment->ResampleChunks(aTrackData->mResampler);
}
bool
SourceMediaStream::AppendToTrack(TrackID aID, MediaSegment* aSegment, MediaSegment *aRawSegment)
{
@ -2158,6 +2178,8 @@ SourceMediaStream::AppendToTrack(TrackID aID, MediaSegment* aSegment, MediaSegme
// or inserting into the graph
ApplyTrackDisabling(aID, aSegment, aRawSegment);
ResampleAudioToGraphSampleRate(track, aSegment);
// Must notify first, since AppendFrom() will empty out aSegment
NotifyDirectConsumers(track, aRawSegment ? aRawSegment : aSegment);
track->mData->AppendFrom(aSegment); // note: aSegment is now dead
@ -2182,7 +2204,7 @@ SourceMediaStream::NotifyDirectConsumers(TrackData *aTrack,
for (uint32_t j = 0; j < mDirectListeners.Length(); ++j) {
MediaStreamDirectListener* l = mDirectListeners[j];
TrackTicks offset = 0; // FIX! need a separate TrackTicks.... or the end of the internal buffer
l->NotifyRealtimeData(static_cast<MediaStreamGraph*>(GraphImpl()), aTrack->mID, aTrack->mRate,
l->NotifyRealtimeData(static_cast<MediaStreamGraph*>(GraphImpl()), aTrack->mID, aTrack->mOutputRate,
offset, aTrack->mCommands, *aSegment);
}
}
@ -2521,6 +2543,8 @@ MediaStreamGraph::GetInstance()
gGraph = new MediaStreamGraphImpl(true);
STREAM_LOG(PR_LOG_DEBUG, ("Starting up MediaStreamGraph %p", gGraph));
AudioStream::InitPreferredSampleRate();
}
return gGraph;

View File

@ -16,9 +16,18 @@
#include "VideoFrameContainer.h"
#include "VideoSegment.h"
#include "MainThreadUtils.h"
#include "nsAutoRef.h"
#include "speex/speex_resampler.h"
class nsIRunnable;
template <>
class nsAutoRefTraits<SpeexResamplerState> : public nsPointerRefTraits<SpeexResamplerState>
{
public:
static void Release(SpeexResamplerState* aState) { speex_resampler_destroy(aState); }
};
namespace mozilla {
class DOMMediaStream;
@ -662,6 +671,9 @@ public:
*/
void AddTrack(TrackID aID, TrackRate aRate, TrackTicks aStart,
MediaSegment* aSegment);
struct TrackData;
void ResampleAudioToGraphSampleRate(TrackData* aTrackData, MediaSegment* aSegment);
/**
* Append media data to a track. Ownership of aSegment remains with the caller,
* but aSegment is emptied.
@ -752,7 +764,13 @@ public:
*/
struct TrackData {
TrackID mID;
TrackRate mRate;
// Sample rate of the input data.
TrackRate mInputRate;
// Sample rate of the output data, always equal to IdealAudioRate()
TrackRate mOutputRate;
// Resampler if the rate of the input track does not match the
// MediaStreamGraph's.
nsAutoRef<SpeexResamplerState> mResampler;
TrackTicks mStart;
// Each time the track updates are flushed to the media graph thread,
// this is cleared.
@ -1003,7 +1021,7 @@ protected:
bool mInCycle;
};
// Returns ideal audio rate for processing
// Returns ideal audio rate for processing.
inline TrackRate IdealAudioRate() { return AudioStream::PreferredSampleRate(); }
/**

View File

@ -52,6 +52,10 @@ static const int AUDIO_TARGET_MS = 2*MEDIA_GRAPH_TARGET_PERIOD_MS +
static const int VIDEO_TARGET_MS = 2*MEDIA_GRAPH_TARGET_PERIOD_MS +
SCHEDULE_SAFETY_MARGIN_MS;
/**
* Rate at which we run the video tracks.
*/
/**
* A per-stream update message passed from the media graph thread to the
* main thread.

View File

@ -90,5 +90,25 @@ WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
#endif
}
int
WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
uint32_t aChannel,
const int16_t* aIn, uint32_t* aInLen,
int16_t* aOut, uint32_t* aOutLen)
{
#ifdef MOZ_SAMPLE_TYPE_S16
return speex_resampler_process_int(aResampler, aChannel, aIn, aInLen, aOut, aOutLen);
#else
nsAutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE*4> tmp1;
nsAutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE*4> tmp2;
tmp1.SetLength(*aInLen);
tmp2.SetLength(*aOutLen);
ConvertAudioSamples(aIn, tmp1.Elements(), *aInLen);
int result = speex_resampler_process_float(aResampler, aChannel, tmp1.Elements(), aInLen, tmp2.Elements(), aOutLen);
ConvertAudioSamples(tmp2.Elements(), aOut, *aOutLen);
return result;
#endif
}
}
}

View File

@ -19,7 +19,6 @@ typedef struct SpeexResamplerState_ SpeexResamplerState;
namespace mozilla {
class AudioNodeStream;
class MediaStream;
namespace dom {
@ -210,7 +209,13 @@ struct WebAudioUtils {
uint32_t aChannel,
const int16_t* aIn, uint32_t* aInLen,
float* aOut, uint32_t* aOutLen);
};
static int
SpeexResamplerProcess(SpeexResamplerState* aResampler,
uint32_t aChannel,
const int16_t* aIn, uint32_t* aInLen,
int16_t* aOut, uint32_t* aOutLen);
};
}
}