From bc5607910dac0d76e7e7fe860e0a5fbc4f1b1071 Mon Sep 17 00:00:00 2001
From: Jonathan Thomas <jonathan@openshot.org>
Date: Wed, 30 May 2018 03:20:31 -0500
Subject: [PATCH] Fixing audio pops due to resampling (this fixes a bunch of
 audio popping-related bugs). Now Frame objects track their own
 max_audio_sample_count, as we add audio data... so we have an accurate bounds
 on each frame.

---
 include/Frame.h       |  2 ++
 include/FrameMapper.h |  4 ----
 src/FFmpegReader.cpp  |  4 ++--
 src/Frame.cpp         | 38 +++++++++++++++++++++++++-------------
 src/FrameMapper.cpp   | 22 +++++-----------------
 src/Timeline.cpp      |  5 -----
 6 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/include/Frame.h b/include/Frame.h
index 841468fd..a7ad509f 100644
--- a/include/Frame.h
+++ b/include/Frame.h
@@ -129,6 +129,7 @@ namespace openshot
 		int height;
 		int sample_rate;
 		string color;
+		int64_t max_audio_sample; ///< The max audio sample count added to this frame
 
 		/// Constrain a color value from 0 to 255
 		int constrain(int color_value);
@@ -138,6 +139,7 @@ namespace openshot
 		bool has_audio_data; ///< This frame has been loaded with audio data
 		bool has_image_data; ///< This frame has been loaded with pixel data
 
+
 		/// Constructor - blank frame (300x200 blank image, 48kHz audio silence)
 		Frame();
 
diff --git a/include/FrameMapper.h b/include/FrameMapper.h
index dc756bd5..e70fdbc5 100644
--- a/include/FrameMapper.h
+++ b/include/FrameMapper.h
@@ -147,7 +147,6 @@ namespace openshot
 		CacheMemory final_cache; 		// Cache of actual Frame objects
 		bool is_dirty; 			// When this is true, the next call to GetFrame will re-init the mapping
 		AVAudioResampleContext *avr;	// Audio resampling context object
-		int64_t timeline_frame_offset;	// Timeline frame offset
 
 		// Internal methods used by init
 		void AddField(int64_t frame);
@@ -176,9 +175,6 @@ namespace openshot
 		/// Change frame rate or audio mapping details
 		void ChangeMapping(Fraction target_fps, PulldownType pulldown,  int target_sample_rate, int target_channels, ChannelLayout target_channel_layout);
 
-		// Set offset relative to parent timeline
-		void SetTimelineFrameOffset(int64_t offset);
-
 		/// Close the openshot::FrameMapper and internal reader
 		void Close();
 
diff --git a/src/FFmpegReader.cpp b/src/FFmpegReader.cpp
index e0e5c19f..68c606f7 100644
--- a/src/FFmpegReader.cpp
+++ b/src/FFmpegReader.cpp
@@ -1300,7 +1300,7 @@ void FFmpegReader::Seek(int64_t requested_frame)
 	seek_count++;
 
 	// If seeking near frame 1, we need to close and re-open the file (this is more reliable than seeking)
-	int buffer_amount = OPEN_MP_NUM_PROCESSORS * 2;
+	int buffer_amount = max(OPEN_MP_NUM_PROCESSORS, 8);
 	if (requested_frame - buffer_amount < 20)
 	{
 		// Close and re-open file (basically seeking to frame 1)
@@ -1751,7 +1751,7 @@ void FFmpegReader::CheckWorkingFrames(bool end_of_stream, int64_t requested_fram
 			// Get check count for this frame
 			checked_frames_size = checked_frames.size();
             if (!checked_count_tripped || f->number >= requested_frame)
-		    checked_count = checked_frames[f->number];
+			    checked_count = checked_frames[f->number];
             else
                 // Force checked count over the limit
                 checked_count = max_checked_count;
diff --git a/src/Frame.cpp b/src/Frame.cpp
index 0c3b7974..0ff8f0cc 100644
--- a/src/Frame.cpp
+++ b/src/Frame.cpp
@@ -32,7 +32,8 @@ using namespace openshot;
 
 // Constructor - blank frame (300x200 blank image, 48kHz audio silence)
 Frame::Frame() : number(1), pixel_ratio(1,1), channels(2), width(1), height(1), color("#000000"),
-		channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false)
+		channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false),
+		max_audio_sample(0)
 {
 	// Init the image magic and audio buffer
 	audio = std::shared_ptr<juce::AudioSampleBuffer>(new juce::AudioSampleBuffer(channels, 0));
@@ -44,7 +45,8 @@ Frame::Frame() : number(1), pixel_ratio(1,1), channels(2), width(1), height(1),
 // Constructor - image only (48kHz audio silence)
 Frame::Frame(int64_t number, int width, int height, string color)
 	: number(number), pixel_ratio(1,1), channels(2), width(width), height(height), color(color),
-	  channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false)
+	  channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false),
+	  max_audio_sample(0)
 {
 	// Init the image magic and audio buffer
 	audio = std::shared_ptr<juce::AudioSampleBuffer>(new juce::AudioSampleBuffer(channels, 0));
@@ -56,7 +58,8 @@ Frame::Frame(int64_t number, int width, int height, string color)
 // Constructor - audio only (300x200 blank image)
 Frame::Frame(int64_t number, int samples, int channels) :
 		number(number), pixel_ratio(1,1), channels(channels), width(1), height(1), color("#000000"),
-		channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false)
+		channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false),
+		max_audio_sample(0)
 {
 	// Init the image magic and audio buffer
 	audio = std::shared_ptr<juce::AudioSampleBuffer>(new juce::AudioSampleBuffer(channels, samples));
@@ -68,7 +71,8 @@ Frame::Frame(int64_t number, int samples, int channels) :
 // Constructor - image & audio
 Frame::Frame(int64_t number, int width, int height, string color, int samples, int channels)
 	: number(number), pixel_ratio(1,1), channels(channels), width(width), height(height), color(color),
-	  channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false)
+	  channel_layout(LAYOUT_STEREO), sample_rate(44100), qbuffer(NULL), has_audio_data(false), has_image_data(false),
+	  max_audio_sample(0)
 {
 	// Init the image magic and audio buffer
 	audio = std::shared_ptr<juce::AudioSampleBuffer>(new juce::AudioSampleBuffer(channels, samples));
@@ -175,7 +179,7 @@ std::shared_ptr<QImage> Frame::GetWaveform(int width, int height, int Red, int G
 	QVector<QPointF> labels;
 
 	// Calculate width of an image based on the # of samples
-	int total_samples = audio->getNumSamples();
+	int total_samples = GetAudioSamplesCount();
 	if (total_samples > 0)
 	{
 		// If samples are present...
@@ -193,7 +197,7 @@ std::shared_ptr<QImage> Frame::GetWaveform(int width, int height, int Red, int G
 			// Get audio for this channel
 			const float *samples = audio->getReadPointer(channel);
 
-			for (int sample = 0; sample < audio->getNumSamples(); sample++, X++)
+			for (int sample = 0; sample < GetAudioSamplesCount(); sample++, X++)
 			{
 				// Sample value (scaled to -100 to 100)
 				float value = samples[sample] * 100;
@@ -335,7 +339,7 @@ float* Frame::GetPlanarAudioSamples(int new_sample_rate, AudioResampler* resampl
 	float *output = NULL;
 	AudioSampleBuffer *buffer(audio.get());
 	int num_of_channels = audio->getNumChannels();
-	int num_of_samples = audio->getNumSamples();
+	int num_of_samples = GetAudioSamplesCount();
 
 	// Resample to new sample rate (if needed)
 	if (new_sample_rate != sample_rate)
@@ -381,7 +385,7 @@ float* Frame::GetInterleavedAudioSamples(int new_sample_rate, AudioResampler* re
 	float *output = NULL;
 	AudioSampleBuffer *buffer(audio.get());
 	int num_of_channels = audio->getNumChannels();
-	int num_of_samples = audio->getNumSamples();
+	int num_of_samples = GetAudioSamplesCount();
 
 	// Resample to new sample rate (if needed)
 	if (new_sample_rate != sample_rate && resampler)
@@ -434,10 +438,7 @@ int Frame::GetAudioChannelsCount()
 int Frame::GetAudioSamplesCount()
 {
     const GenericScopedLock<CriticalSection> lock(addingAudioSection);
-	if (audio)
-		return audio->getNumSamples();
-	else
-		return 0;
+	return max_audio_sample;
 }
 
 juce::AudioSampleBuffer *Frame::GetAudioSampleBuffer()
@@ -828,6 +829,9 @@ void Frame::ResizeAudio(int channels, int length, int rate, ChannelLayout layout
 	audio->setSize(channels, length, true, true, false);
 	channel_layout = layout;
 	sample_rate = rate;
+
+	// Calculate max audio sample added
+	max_audio_sample = length;
 }
 
 // Add audio samples to a specific channel
@@ -875,6 +879,10 @@ void Frame::AddAudio(bool replaceSamples, int destChannel, int destStartSample,
 		// Add samples to frame's audio buffer
 		audio->addFrom(destChannel, destStartSample, source, numSamples, gainFactor);
 		has_audio_data = true;
+
+		// Calculate max audio sample added
+		if (new_length > max_audio_sample)
+			max_audio_sample = new_length;
 	}
 }
 
@@ -961,7 +969,7 @@ void Frame::AddMagickImage(std::shared_ptr<Magick::Image> new_image)
 void Frame::Play()
 {
 	// Check if samples are present
-	if (!audio->getNumSamples())
+	if (!GetAudioSamplesCount())
 		return;
 
 	AudioDeviceManager deviceManager;
@@ -1043,4 +1051,8 @@ void Frame::AddAudioSilence(int numSamples)
 	audio->setSize(channels, numSamples, false, true, false);
 	audio->clear();
 	has_audio_data = true;
+
+	// Calculate max audio sample added
+	if (numSamples > max_audio_sample)
+		max_audio_sample = numSamples;
 }
diff --git a/src/FrameMapper.cpp b/src/FrameMapper.cpp
index 16bb1c2c..f49cbc4d 100644
--- a/src/FrameMapper.cpp
+++ b/src/FrameMapper.cpp
@@ -31,7 +31,7 @@ using namespace std;
 using namespace openshot;
 
 FrameMapper::FrameMapper(ReaderBase *reader, Fraction target, PulldownType target_pulldown, int target_sample_rate, int target_channels, ChannelLayout target_channel_layout) :
-		reader(reader), target(target), pulldown(target_pulldown), is_dirty(true), avr(NULL), timeline_frame_offset(0)
+		reader(reader), target(target), pulldown(target_pulldown), is_dirty(true), avr(NULL)
 {
 	// Set the original frame rate from the reader
 	original = Fraction(reader->info.fps.num, reader->info.fps.den);
@@ -241,7 +241,7 @@ void FrameMapper::Init()
 		if (field % 2 == 0 && field > 0)
 		{
 			// New frame number
-			int64_t frame_number = field / 2 + timeline_frame_offset;
+			int64_t frame_number = field / 2;
 
 			// Set the bottom frame
 			if (f.isOdd)
@@ -346,7 +346,7 @@ std::shared_ptr<Frame> FrameMapper::GetOrCreateFrame(int64_t number)
 	std::shared_ptr<Frame> new_frame;
 
 	// Init some basic properties about this frame (keep sample rate and # channels the same as the original reader for now)
-	int samples_in_frame = Frame::GetSamplesPerFrame(number + timeline_frame_offset, target, reader->info.sample_rate, reader->info.channels);
+	int samples_in_frame = Frame::GetSamplesPerFrame(number, target, reader->info.sample_rate, reader->info.channels);
 
 	try {
 		// Debug output
@@ -421,7 +421,7 @@ std::shared_ptr<Frame> FrameMapper::GetFrame(int64_t requested_frame)
 
 		// Get # of channels in the actual frame
 		int channels_in_frame = mapped_frame->GetAudioChannelsCount();
-		int samples_in_frame = Frame::GetSamplesPerFrame(frame_number + timeline_frame_offset, target, mapped_frame->SampleRate(), channels_in_frame);
+		int samples_in_frame = Frame::GetSamplesPerFrame(frame_number, target, mapped_frame->SampleRate(), channels_in_frame);
 
 		// Determine if mapped frame is identical to source frame
 		// including audio sample distribution according to mapped.Samples,
@@ -750,18 +750,6 @@ void FrameMapper::ChangeMapping(Fraction target_fps, PulldownType target_pulldow
 	Init();
 }
 
-// Set offset relative to parent timeline
-void FrameMapper::SetTimelineFrameOffset(int64_t offset)
-{
-	ZmqLogger::Instance()->AppendDebugMethod("FrameMapper::SetTimelineFrameOffset", "offset", offset, "", -1, "", -1, "", -1, "", -1, "", -1);
-
-	// Mark as dirty
-	is_dirty = true;
-
-	// Used to correctly align audio sample distribution
-	timeline_frame_offset = offset;
-}
-
 // Resample audio and map channels (if needed)
 void FrameMapper::ResampleMappedAudio(std::shared_ptr<Frame> frame, int64_t original_frame_number)
 {
@@ -813,7 +801,7 @@ void FrameMapper::ResampleMappedAudio(std::shared_ptr<Frame> frame, int64_t orig
 	}
 
 	// Update total samples & input frame size (due to bigger or smaller data types)
-	total_frame_samples = Frame::GetSamplesPerFrame(frame->number + timeline_frame_offset, target, info.sample_rate, info.channels);
+	total_frame_samples = Frame::GetSamplesPerFrame(frame->number, target, info.sample_rate, info.channels);
 
 	ZmqLogger::Instance()->AppendDebugMethod("FrameMapper::ResampleMappedAudio (adjust # of samples)", "total_frame_samples", total_frame_samples, "info.sample_rate", info.sample_rate, "sample_rate_in_frame", sample_rate_in_frame, "info.channels", info.channels, "channels_in_frame", channels_in_frame, "original_frame_number", original_frame_number);
 
diff --git a/src/Timeline.cpp b/src/Timeline.cpp
index fef7cc1b..b2f370ba 100644
--- a/src/Timeline.cpp
+++ b/src/Timeline.cpp
@@ -127,11 +127,6 @@ void Timeline::apply_mapper_to_clip(Clip* clip)
 	FrameMapper* clip_mapped_reader = (FrameMapper*) clip_reader;
 	clip_mapped_reader->ChangeMapping(info.fps, PULLDOWN_NONE, info.sample_rate, info.channels, info.channel_layout);
 
-	// Update timeline offset
-	double time_diff = 0 - clip->Position() + clip->Start();
-	int clip_offset = -round(time_diff * info.fps.ToDouble());
-	clip_mapped_reader->SetTimelineFrameOffset(clip_offset);
-
 	// Update clip reader
 	clip->Reader(clip_reader);
 }