gecko/content/media/AudioSegment.cpp
Paul Adenot 06e81488b1 Bug 919215 - Start the AudioStream on creation when in low-latency mode, and let it underrun. r=roc
The BufferedAudioStream buffers the data it gets through the Write() calls and
what is consumed by the callback. This means that if the audio producer starts
Write()ing data right after Start()ing the stream, data will accumulate in this
buffer and won't be consumed. Eventually, the buffer will be of a certain size
before it begins to be consumed by the callback, and this means an
umcompressible latency (because the data will be written at more or less the
same rate as it is produced).

This patch start the BufferedAudioStream right away when it is created, dropping
the silent AudioSegment until it finds real data (and padding with silence is
then done at the beginning). The stream will underrun, but the callback will
synthetize silence, avoiding overbuffering in the BufferedAudioStream. This
ensures minimal latency cause by the buffering.

Note that the clock will still advance, so this will not change the behavior of
content that has leading silence.
2013-11-19 10:43:15 +13:00

181 lines
6.9 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "AudioSegment.h"
#include "AudioStream.h"
#include "AudioChannelFormat.h"
#include "Latency.h"
namespace mozilla {
template <class SrcT, class DestT>
static void
InterleaveAndConvertBuffer(const SrcT** aSourceChannels,
int32_t aLength, float aVolume,
int32_t aChannels,
DestT* aOutput)
{
DestT* output = aOutput;
for (int32_t i = 0; i < aLength; ++i) {
for (int32_t channel = 0; channel < aChannels; ++channel) {
float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
*output = FloatToAudioSample<DestT>(v);
++output;
}
}
}
void
InterleaveAndConvertBuffer(const void** aSourceChannels,
AudioSampleFormat aSourceFormat,
int32_t aLength, float aVolume,
int32_t aChannels,
AudioDataValue* aOutput)
{
switch (aSourceFormat) {
case AUDIO_FORMAT_FLOAT32:
InterleaveAndConvertBuffer(reinterpret_cast<const float**>(aSourceChannels),
aLength,
aVolume,
aChannels,
aOutput);
break;
case AUDIO_FORMAT_S16:
InterleaveAndConvertBuffer(reinterpret_cast<const int16_t**>(aSourceChannels),
aLength,
aVolume,
aChannels,
aOutput);
break;
}
}
void
AudioSegment::ApplyVolume(float aVolume)
{
for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
ci->mVolume *= aVolume;
}
}
static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES] = {0};
void
DownmixAndInterleave(const nsTArray<const void*>& aChannelData,
AudioSampleFormat aSourceFormat, int32_t aDuration,
float aVolume, uint32_t aOutputChannels,
AudioDataValue* aOutput)
{
nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData;
nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixConversionBuffer;
nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixOutputBuffer;
channelData.SetLength(aChannelData.Length());
if (aSourceFormat != AUDIO_FORMAT_FLOAT32) {
NS_ASSERTION(aSourceFormat == AUDIO_FORMAT_S16, "unknown format");
downmixConversionBuffer.SetLength(aDuration*aChannelData.Length());
for (uint32_t i = 0; i < aChannelData.Length(); ++i) {
float* conversionBuf = downmixConversionBuffer.Elements() + (i*aDuration);
const int16_t* sourceBuf = static_cast<const int16_t*>(aChannelData[i]);
for (uint32_t j = 0; j < (uint32_t)aDuration; ++j) {
conversionBuf[j] = AudioSampleToFloat(sourceBuf[j]);
}
channelData[i] = conversionBuf;
}
} else {
for (uint32_t i = 0; i < aChannelData.Length(); ++i) {
channelData[i] = aChannelData[i];
}
}
downmixOutputBuffer.SetLength(aDuration*aOutputChannels);
nsAutoTArray<float*,GUESS_AUDIO_CHANNELS> outputChannelBuffers;
nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> outputChannelData;
outputChannelBuffers.SetLength(aOutputChannels);
outputChannelData.SetLength(aOutputChannels);
for (uint32_t i = 0; i < (uint32_t)aOutputChannels; ++i) {
outputChannelData[i] = outputChannelBuffers[i] =
downmixOutputBuffer.Elements() + aDuration*i;
}
if (channelData.Length() > aOutputChannels) {
AudioChannelsDownMix(channelData, outputChannelBuffers.Elements(),
aOutputChannels, aDuration);
}
InterleaveAndConvertBuffer(outputChannelData.Elements(), AUDIO_FORMAT_FLOAT32,
aDuration, aVolume, aOutputChannels, aOutput);
}
void
AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput)
{
uint32_t outputChannels = aOutput->GetChannels();
nsAutoTArray<AudioDataValue,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> buf;
nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData;
for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
AudioChunk& c = *ci;
TrackTicks offset = 0;
while (offset < c.mDuration) {
TrackTicks durationTicks =
std::min<TrackTicks>(c.mDuration - offset, AUDIO_PROCESSING_FRAMES);
if (uint64_t(outputChannels)*durationTicks > INT32_MAX || offset > INT32_MAX) {
NS_ERROR("Buffer overflow");
return;
}
uint32_t duration = uint32_t(durationTicks);
// If we have written data in the past, or we have real (non-silent) data
// to write, we can proceed. Otherwise, it means we just started the
// AudioStream, and we don't have real data to write to it (just silence).
// To avoid overbuffering in the AudioStream, we simply drop the silence,
// here. The stream will underrun and output silence anyways.
if (c.mBuffer || aOutput->GetWritten()) {
buf.SetLength(outputChannels*duration);
if (c.mBuffer) {
channelData.SetLength(c.mChannelData.Length());
for (uint32_t i = 0; i < channelData.Length(); ++i) {
channelData[i] =
AddAudioSampleOffset(c.mChannelData[i], c.mBufferFormat, int32_t(offset));
}
if (channelData.Length() < outputChannels) {
// Up-mix. Note that this might actually make channelData have more
// than outputChannels temporarily.
AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel);
}
if (channelData.Length() > outputChannels) {
// Down-mix.
DownmixAndInterleave(channelData, c.mBufferFormat, duration,
c.mVolume, outputChannels, buf.Elements());
} else {
InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat,
duration, c.mVolume,
outputChannels,
buf.Elements());
}
} else {
// Assumes that a bit pattern of zeroes == 0.0f
memset(buf.Elements(), 0, buf.Length()*sizeof(AudioDataValue));
}
aOutput->Write(buf.Elements(), int32_t(duration), &(c.mTimeStamp));
}
if(!c.mTimeStamp.IsNull()) {
TimeStamp now = TimeStamp::Now();
// would be more efficient to c.mTimeStamp to ms on create time then pass here
LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID,
(now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp);
}
offset += duration;
}
}
aOutput->Start();
}
}