From 2326532820b79f92ac7dbd5cdd3093ffea2d385a Mon Sep 17 00:00:00 2001
From: Jonathan Thomas <jonathan@openshot.org>
Date: Sun, 30 Oct 2022 22:04:19 -0500
Subject: [PATCH 1/4] Initial commit of AudioWaveformer class, which is
 designed to iterate any ReaderBase, and return a reduced "average" sample set
 - more useful for generating waveforms - for example, reduce a 44100 samples
 per second down to 20 samples per second.

---
 bindings/python/openshot.i |   2 +
 bindings/ruby/openshot.i   |   2 +
 src/AudioWaveformer.cpp    | 104 ++++++++++++++++++++++++++++++++++
 src/AudioWaveformer.h      |  51 +++++++++++++++++
 src/CMakeLists.txt         |   1 +
 tests/AudioWaveformer.cpp  | 113 +++++++++++++++++++++++++++++++++++++
 tests/CMakeLists.txt       |   1 +
 7 files changed, 274 insertions(+)
 create mode 100644 src/AudioWaveformer.cpp
 create mode 100644 src/AudioWaveformer.h
 create mode 100644 tests/AudioWaveformer.cpp
diff --git a/bindings/python/openshot.i b/bindings/python/openshot.i
index 6bff2105..f01cef74 100644
--- a/bindings/python/openshot.i
+++ b/bindings/python/openshot.i
@@ -55,6 +55,7 @@
 #include "ReaderBase.h"
 #include "WriterBase.h"
 #include "AudioDevices.h"
+#include "AudioWaveformer.h"
 #include "CacheBase.h"
 #include "CacheDisk.h"
 #include "CacheMemory.h"
@@ -263,6 +264,7 @@
 %include "ReaderBase.h"
 %include "WriterBase.h"
 %include "AudioDevices.h"
+%include "AudioWaveformer.h"
 %include "CacheBase.h"
 %include "CacheDisk.h"
 %include "CacheMemory.h"
diff --git a/bindings/ruby/openshot.i b/bindings/ruby/openshot.i
index 8fdb152b..cf3c4778 100644
--- a/bindings/ruby/openshot.i
+++ b/bindings/ruby/openshot.i
@@ -63,6 +63,7 @@
 #include "ReaderBase.h"
 #include "WriterBase.h"
 #include "AudioDevices.h"
+#include "AudioWaveformer.h"
 #include "CacheBase.h"
 #include "CacheDisk.h"
 #include "CacheMemory.h"
@@ -133,6 +134,7 @@
 %include "ReaderBase.h"
 %include "WriterBase.h"
 %include "AudioDevices.h"
+%include "AudioWaveformer.h"
 %include "CacheBase.h"
 %include "CacheDisk.h"
 %include "CacheMemory.h"
diff --git a/src/AudioWaveformer.cpp b/src/AudioWaveformer.cpp
new file mode 100644
index 00000000..b16a5a0d
--- /dev/null
+++ b/src/AudioWaveformer.cpp
@@ -0,0 +1,104 @@
+/**
+ * @file
+ * @brief Source file for AudioWaveformer class
+ * @author Jonathan Thomas <jonathan@openshot.org>
+ *
+ * @ref License
+ */
+
+// Copyright (c) 2008-2022 OpenShot Studios, LLC
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "AudioWaveformer.h"
+
+
+using namespace std;
+using namespace openshot;
+
+
+// Default constructor
+AudioWaveformer::AudioWaveformer(ReaderBase* new_reader) : reader(new_reader)
+{
+
+}
+
+// Destructor
+AudioWaveformer::~AudioWaveformer()
+{
+
+}
+
+// Extract audio samples from any ReaderBase class
+std::vector<float> AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) {
+    std::vector<float> extracted_data(0);
+
+    if (reader) {
+        // Open reader (if needed)
+        bool does_reader_have_video = reader->info.has_video;
+        if (!reader->IsOpen()) {
+            reader->Open();
+        }
+        // Disable video for faster processing
+        reader->info.has_video = false;
+
+        int sample_rate = reader->info.sample_rate;
+        int sample_divisor = sample_rate / num_per_second;
+        int total_samples = num_per_second * (reader->info.duration + 1.0);
+
+        // Size audio buffer (for smaller dataset)
+        extracted_data.resize(total_samples);
+        int extracted_index = 0;
+
+        // Clear audio buffer
+        for (auto s = 0; s < total_samples; s++) {
+            extracted_data[s] = 0.0;
+        }
+
+        // Loop through all frames
+        int sample_index = 0;
+        float samples_total = 0.0;
+        float samples_max = 0.0;
+        float samples_min = 0.0;
+
+        for (auto f = 1; f <= reader->info.video_length; f++) {
+            // Get next frame
+            shared_ptr<openshot::Frame> frame = reader->GetFrame(f);
+
+            float* samples = frame->GetAudioSamples(channel);
+            for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) {
+                samples_total += samples[s];
+                sample_index += 1;
+
+                // Cut-off reached
+                if (sample_index % sample_divisor == 0) {
+                    float avg_sample_value = samples_total / sample_divisor;
+                    extracted_data[extracted_index] = avg_sample_value;
+                    extracted_index++;
+
+                    // Track max/min values
+                    samples_max = std::max(samples_max, avg_sample_value);
+                    samples_min = std::min(samples_min, avg_sample_value);
+
+                    // reset sample total and index
+                    sample_index = 0;
+                    samples_total = 0.0;
+                }
+            }
+        }
+
+        // Scale all values to the -1 to +1 range (regardless of how small or how large the
+        // original audio sample values are)
+        if (normalize) {
+            float scale = std::min(1.0f / samples_max, 1.0f / std::fabs(samples_min));
+            for (auto s = 0; s < total_samples; s++) {
+                extracted_data[s] *= scale;
+            }
+        }
+
+        // Resume previous has_video value
+        reader->info.has_video = does_reader_have_video;
+    }
+
+    return extracted_data;
+}
diff --git a/src/AudioWaveformer.h b/src/AudioWaveformer.h
new file mode 100644
index 00000000..4e718a87
--- /dev/null
+++ b/src/AudioWaveformer.h
@@ -0,0 +1,51 @@
+/**
+ * @file
+ * @brief Header file for AudioWaveformer class
+ * @author Jonathan Thomas <jonathan@openshot.org>
+ *
+ * @ref License
+ */
+
+// Copyright (c) 2008-2022 OpenShot Studios, LLC
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#ifndef OPENSHOT_WAVEFORMER_H
+#define OPENSHOT_WAVEFORMER_H
+
+#include "ReaderBase.h"
+#include "Frame.h"
+#include <vector>
+
+
+namespace openshot {
+
+	/**
+	 * @brief This class is used to extra audio data used for generating waveforms.
+	 *
+	 * Pass in a ReaderBase* with audio data, and this class will iterate the reader,
+	 * and sample down the dataset to a much smaller set - more useful for generating
+	 * waveforms. For example, take 44100 samples per second, and reduce it to 20
+	 * "average" samples per second - much easier to graph.
+	 */
+	class AudioWaveformer {
+	private:
+        ReaderBase* reader;
+
+	public:
+		/// Default constructor
+        AudioWaveformer(ReaderBase* reader);
+
+        /// @brief Extract audio samples from any ReaderBase class
+        /// @param channel Which audio channel should we extract data from
+        /// @param num_per_second How many samples per second to return
+        /// @param normalize Should we scale the data range so the largest value is 1.0
+		std::vector<float> ExtractSamples(int channel, int num_per_second, bool normalize);
+
+		/// Destructor
+		~AudioWaveformer();
+	};
+
+}
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fdcb9d48..4ac69210 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -50,6 +50,7 @@ set(OPENSHOT_SOURCES
   AudioDevices.cpp
   AudioReaderSource.cpp
   AudioResampler.cpp
+  AudioWaveformer.cpp
   CacheBase.cpp
   CacheDisk.cpp
   CacheMemory.cpp
diff --git a/tests/AudioWaveformer.cpp b/tests/AudioWaveformer.cpp
new file mode 100644
index 00000000..e6cdfa20
--- /dev/null
+++ b/tests/AudioWaveformer.cpp
@@ -0,0 +1,113 @@
+/**
+ * @file
+ * @brief Unit tests for openshot::AudioWaveformer
+ * @author Jonathan Thomas <jonathan@openshot.org>
+ *
+ * @ref License
+ */
+
+// Copyright (c) 2008-2019 OpenShot Studios, LLC
+//
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "openshot_catch.h"
+#include "AudioWaveformer.h"
+#include "FFmpegReader.h"
+
+
+using namespace openshot;
+
+TEST_CASE( "Extract waveform data piano.wav", "[libopenshot][audiowaveformer]" )
+{
+    // Create a reader
+    std::stringstream path;
+    path << TEST_MEDIA_PATH << "piano.wav";
+    FFmpegReader r(path.str());
+    r.Open();
+
+    // Create AudioWaveformer and extract a smaller "average" sample set of audio data
+    AudioWaveformer waveformer(&r);
+    for (auto channel = 0; channel < r.info.channels; channel++) {
+        std::vector<float> waveform = waveformer.ExtractSamples(channel, 20, false);
+
+        if (channel == 0) {
+            CHECK(waveform.size() == 107);
+            CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001));
+            CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001));
+            CHECK(waveform[87] == Approx(0.0f).margin(0.00001));
+
+            for (auto sample = 0; sample < waveform.size(); sample++) {
+                std::cout << waveform[sample] << std::endl;
+            }
+        } else if (channel == 1) {
+            CHECK(waveform.size() == 107);
+            CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001));
+            CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001));
+            CHECK(waveform[87] == Approx(0.0f).margin(0.00001));
+        }
+
+        waveform.clear();
+    }
+
+    // Clean up
+    r.Close();
+}
+
+TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" )
+{
+    // Create a reader
+    std::stringstream path;
+    path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4";
+    FFmpegReader r(path.str());
+
+    // Create AudioWaveformer and extract a smaller "average" sample set of audio data
+    AudioWaveformer waveformer(&r);
+    for (auto channel = 0; channel < r.info.channels; channel++) {
+        std::vector<float> waveform = waveformer.ExtractSamples(channel, 20, false);
+
+        if (channel == 0) {
+            CHECK(waveform.size() == 1058);
+            CHECK(waveform[0] == Approx(-1.48391728e-05f).margin(0.00001));
+            CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001));
+            CHECK(waveform[1038] == Approx(0.0f).margin(0.00001));
+        } else if (channel == 1) {
+            CHECK(waveform.size() == 1058);
+            CHECK(waveform[0] == Approx(-1.43432617e-05f).margin(0.00001));
+            CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001));
+            CHECK(waveform[1038] == Approx(0.0f).margin(0.00001));
+        }
+
+        waveform.clear();
+    }
+
+    // Clean up
+    r.Close();
+}
+
+TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowaveformer]" )
+{
+    // Create a reader
+    std::stringstream path;
+    path << TEST_MEDIA_PATH << "piano.wav";
+    FFmpegReader r(path.str());
+
+    // Create AudioWaveformer and extract a smaller "average" sample set of audio data
+    AudioWaveformer waveformer(&r);
+    for (auto channel = 0; channel < r.info.channels; channel++) {
+        // Normalize values and scale them between -1 and +1
+        std::vector<float> waveform = waveformer.ExtractSamples(channel, 20, true);
+
+        if (channel == 0) {
+            CHECK(waveform.size() == 107);
+            CHECK(waveform[0] == Approx(0.113821134).margin(0.00001));
+            CHECK(waveform[35] == Approx(-1.0f).margin(0.00001));
+            CHECK(waveform[86] == Approx(-0.200542003f).margin(0.00001));
+            CHECK(waveform[87] == Approx(0.0f).margin(0.00001));
+        }
+
+        waveform.clear();
+    }
+
+    // Clean up
+    r.Close();
+}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 56f47e15..2f4fb345 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -20,6 +20,7 @@ file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/examples/" TEST_MEDIA_PATH)
 ###  TEST SOURCE FILES
 ###
 set(OPENSHOT_TESTS
+  AudioWaveformer
   CacheDisk
   CacheMemory
   Clip

From 9cd7dd68a851b501c44ade003b03ee177ed2c770 Mon Sep 17 00:00:00 2001
From: Jonathan Thomas <jonathan@openshot.org>
Date: Mon, 31 Oct 2022 14:20:18 -0500
Subject: [PATCH 2/4] Wrap vector<float> in Swig mappings, and enable
 thread-safe access to swig wrappers - so our long running waveformer does not
 block the Python GIL

---
 bindings/python/openshot.i | 3 ++-
 bindings/ruby/openshot.i   | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bindings/python/openshot.i b/bindings/python/openshot.i
index f01cef74..10cb8866 100644
--- a/bindings/python/openshot.i
+++ b/bindings/python/openshot.i
@@ -9,7 +9,7 @@
 //
 // SPDX-License-Identifier: LGPL-3.0-or-later
 
-%module openshot
+%module("threads"=1) openshot
 
 /* Suppress warnings about ignored operator= */
 %warnfilter(362);
@@ -45,6 +45,7 @@
 %template() std::map<std::string, int>;
 %template() std::pair<int, int>;
 %template() std::vector<int>;
+%template() std::vector<float>;
 %template() std::pair<double, double>;
 %template() std::pair<float, float>;
 %template() std::pair<std::string, std::string>;
diff --git a/bindings/ruby/openshot.i b/bindings/ruby/openshot.i
index cf3c4778..debeeec5 100644
--- a/bindings/ruby/openshot.i
+++ b/bindings/ruby/openshot.i
@@ -9,7 +9,7 @@
 //
 // SPDX-License-Identifier: LGPL-3.0-or-later
 
-%module openshot
+%module("threads"=1) openshot
 
 /* Suppress warnings about ignored operator= */
 %warnfilter(362);
@@ -45,6 +45,7 @@
 %template() std::map<std::string, int>;
 %template() std::pair<int, int>;
 %template() std::vector<int>;
+%template() std::vector<float>;
 %template() std::pair<double, double>;
 %template() std::pair<float, float>;
 %template() std::pair<std::string, std::string>;

From c838c126ad73318bf15d427acabdd4026019f56e Mon Sep 17 00:00:00 2001
From: Jonathan Thomas <jonathan@openshot.org>
Date: Tue, 1 Nov 2022 15:17:03 -0500
Subject: [PATCH 3/4] Improvements to AudioWaveformer to use RMS (Root Mean
 Square), and return both average RMS and max RMS for graphing, including some
 new unit tests.

---
 bindings/python/openshot.i |  1 +
 bindings/ruby/openshot.i   |  1 +
 src/AudioWaveformer.cpp    | 59 +++++++++++++++-----------
 src/AudioWaveformer.h      | 84 ++++++++++++++++++++++++++++++--------
 tests/AudioWaveformer.cpp  | 77 +++++++++++++++++++++-------------
 5 files changed, 153 insertions(+), 69 deletions(-)

diff --git a/bindings/python/openshot.i b/bindings/python/openshot.i
index 10cb8866..2f406c7f 100644
--- a/bindings/python/openshot.i
+++ b/bindings/python/openshot.i
@@ -50,6 +50,7 @@
 %template() std::pair<float, float>;
 %template() std::pair<std::string, std::string>;
 %template() std::vector<std::pair<std::string, std::string>>;
+%template() std::vector<std::vector<float>>;
 
 %{
 #include "OpenShotVersion.h"
diff --git a/bindings/ruby/openshot.i b/bindings/ruby/openshot.i
index debeeec5..a7ef11b4 100644
--- a/bindings/ruby/openshot.i
+++ b/bindings/ruby/openshot.i
@@ -50,6 +50,7 @@
 %template() std::pair<float, float>;
 %template() std::pair<std::string, std::string>;
 %template() std::vector<std::pair<std::string, std::string>>;
+%template() std::vector<std::vector<float>>;
 
 %{
 /* Ruby and FFmpeg define competing RSHIFT macros,
diff --git a/src/AudioWaveformer.cpp b/src/AudioWaveformer.cpp
index b16a5a0d..2c064941 100644
--- a/src/AudioWaveformer.cpp
+++ b/src/AudioWaveformer.cpp
@@ -30,8 +30,8 @@ AudioWaveformer::~AudioWaveformer()
 }
 
 // Extract audio samples from any ReaderBase class
-std::vector<float> AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) {
-    std::vector<float> extracted_data(0);
+AudioWaveformData AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) {
+    AudioWaveformData data;
 
     if (reader) {
         // Open reader (if needed)
@@ -45,44 +45,58 @@ std::vector<float> AudioWaveformer::ExtractSamples(int channel, int num_per_seco
         int sample_rate = reader->info.sample_rate;
         int sample_divisor = sample_rate / num_per_second;
         int total_samples = num_per_second * (reader->info.duration + 1.0);
-
-        // Size audio buffer (for smaller dataset)
-        extracted_data.resize(total_samples);
         int extracted_index = 0;
 
-        // Clear audio buffer
-        for (auto s = 0; s < total_samples; s++) {
-            extracted_data[s] = 0.0;
-        }
+        // Resize and clear audio buffers
+        data.resize(total_samples);
+        data.zero(total_samples);
 
         // Loop through all frames
         int sample_index = 0;
-        float samples_total = 0.0;
         float samples_max = 0.0;
-        float samples_min = 0.0;
+        float chunk_max = 0.0;
+        float chunk_squared_sum = 0.0;
+
+        // How many channels are we using
+        int channel_count = 1;
+        if (channel == -1) {
+            channel_count = reader->info.channels;
+        }
 
         for (auto f = 1; f <= reader->info.video_length; f++) {
             // Get next frame
             shared_ptr<openshot::Frame> frame = reader->GetFrame(f);
 
-            float* samples = frame->GetAudioSamples(channel);
+            // Get sample value from a specific channel (or all channels)
             for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) {
-                samples_total += samples[s];
+
+                for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) {
+                    if (channel == channel_index || channel == -1) {
+                        float *samples = frame->GetAudioSamples(channel_index);
+                        float rms_sample_value = std::sqrt(samples[s] * samples[s]);
+
+                        // Accumulate sample averages
+                        chunk_squared_sum += rms_sample_value;
+                        chunk_max = std::max(chunk_max, rms_sample_value);
+                    }
+                }
+
                 sample_index += 1;
 
                 // Cut-off reached
                 if (sample_index % sample_divisor == 0) {
-                    float avg_sample_value = samples_total / sample_divisor;
-                    extracted_data[extracted_index] = avg_sample_value;
+                    float avg_squared_sum = chunk_squared_sum / (sample_divisor * channel_count);
+                    data.max_samples[extracted_index] = chunk_max;
+                    data.rms_samples[extracted_index] = avg_squared_sum;
                     extracted_index++;
 
                     // Track max/min values
-                    samples_max = std::max(samples_max, avg_sample_value);
-                    samples_min = std::min(samples_min, avg_sample_value);
+                    samples_max = std::max(samples_max, chunk_max);
 
                     // reset sample total and index
                     sample_index = 0;
-                    samples_total = 0.0;
+                    chunk_max = 0.0;
+                    chunk_squared_sum = 0.0;
                 }
             }
         }
@@ -90,15 +104,14 @@ std::vector<float> AudioWaveformer::ExtractSamples(int channel, int num_per_seco
         // Scale all values to the -1 to +1 range (regardless of how small or how large the
         // original audio sample values are)
         if (normalize) {
-            float scale = std::min(1.0f / samples_max, 1.0f / std::fabs(samples_min));
-            for (auto s = 0; s < total_samples; s++) {
-                extracted_data[s] *= scale;
-            }
+            float scale = 1.0f / samples_max;
+            data.scale(total_samples, scale);
         }
 
         // Resume previous has_video value
         reader->info.has_video = does_reader_have_video;
     }
 
-    return extracted_data;
+
+    return data;
 }
diff --git a/src/AudioWaveformer.h b/src/AudioWaveformer.h
index 4e718a87..638295dd 100644
--- a/src/AudioWaveformer.h
+++ b/src/AudioWaveformer.h
@@ -20,31 +20,81 @@
 
 namespace openshot {
 
-	/**
-	 * @brief This class is used to extra audio data used for generating waveforms.
-	 *
-	 * Pass in a ReaderBase* with audio data, and this class will iterate the reader,
-	 * and sample down the dataset to a much smaller set - more useful for generating
-	 * waveforms. For example, take 44100 samples per second, and reduce it to 20
-	 * "average" samples per second - much easier to graph.
-	 */
-	class AudioWaveformer {
-	private:
+    /**
+     * @brief This struct holds the extracted waveform data (both the RMS root-mean-squared average, and the max values)
+     *
+     * Because we extract 2 different datasets from the audio, we return this struct with access to both sets of data,
+     * the average root mean squared values, and the max sample values.
+     */
+    struct AudioWaveformData
+    {
+        std::vector<float> max_samples;
+        std::vector<float> rms_samples;
+
+        /// Resize both datasets
+        void resize(int total_samples) {
+            max_samples.resize(total_samples);
+            rms_samples.resize(total_samples);
+        }
+
+        /// Zero out # of values in both datasets
+        void zero(int total_samples) {
+            for (auto s = 0; s < total_samples; s++) {
+                max_samples[s] = 0.0;
+                rms_samples[s] = 0.0;
+            }
+        }
+
+        /// Scale # of values by some factor
+        void scale(int total_samples, float factor) {
+            for (auto s = 0; s < total_samples; s++) {
+                max_samples[s] *= factor;
+                rms_samples[s] *= factor;
+            }
+        }
+
+        /// Clear and free memory of both datasets
+        void clear() {
+            max_samples.clear();
+            max_samples.shrink_to_fit();
+            rms_samples.clear();
+            rms_samples.shrink_to_fit();
+        }
+
+        /// Return a vector of vectors (containing both datasets)
+        std::vector<std::vector<float>> vectors() {
+            std::vector<std::vector<float>> output;
+            output.push_back(max_samples);
+            output.push_back(rms_samples);
+            return output;
+        }
+    };
+
+    /**
+     * @brief This class is used to extra audio data used for generating waveforms.
+     *
+     * Pass in a ReaderBase* with audio data, and this class will iterate the reader,
+     * and sample down the dataset to a much smaller set - more useful for generating
+     * waveforms. For example, take 44100 samples per second, and reduce it to 20
+     * "max" or "average" samples per second - much easier to graph.
+     */
+    class AudioWaveformer {
+    private:
         ReaderBase* reader;
 
-	public:
-		/// Default constructor
+    public:
+        /// Default constructor
         AudioWaveformer(ReaderBase* reader);
 
         /// @brief Extract audio samples from any ReaderBase class
-        /// @param channel Which audio channel should we extract data from
+        /// @param channel Which audio channel should we extract data from (-1 == all channels)
         /// @param num_per_second How many samples per second to return
         /// @param normalize Should we scale the data range so the largest value is 1.0
-		std::vector<float> ExtractSamples(int channel, int num_per_second, bool normalize);
+        AudioWaveformData ExtractSamples(int channel, int num_per_second, bool normalize);
 
-		/// Destructor
-		~AudioWaveformer();
-	};
+        /// Destructor
+        ~AudioWaveformer();
+    };
 
 }
 
diff --git a/tests/AudioWaveformer.cpp b/tests/AudioWaveformer.cpp
index e6cdfa20..bd12d997 100644
--- a/tests/AudioWaveformer.cpp
+++ b/tests/AudioWaveformer.cpp
@@ -6,7 +6,7 @@
  * @ref License
  */
 
-// Copyright (c) 2008-2019 OpenShot Studios, LLC
+// Copyright (c) 2008-2022 OpenShot Studios, LLC
 //
 // SPDX-License-Identifier: LGPL-3.0-or-later
 
@@ -28,22 +28,18 @@ TEST_CASE( "Extract waveform data piano.wav", "[libopenshot][audiowaveformer]" )
     // Create AudioWaveformer and extract a smaller "average" sample set of audio data
     AudioWaveformer waveformer(&r);
     for (auto channel = 0; channel < r.info.channels; channel++) {
-        std::vector<float> waveform = waveformer.ExtractSamples(channel, 20, false);
+        AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false);
 
         if (channel == 0) {
-            CHECK(waveform.size() == 107);
-            CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001));
-            CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001));
-            CHECK(waveform[87] == Approx(0.0f).margin(0.00001));
-
-            for (auto sample = 0; sample < waveform.size(); sample++) {
-                std::cout << waveform[sample] << std::endl;
-            }
+            CHECK(waveform.rms_samples.size() == 107);
+            CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001));
+            CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001));
+            CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001));
         } else if (channel == 1) {
-            CHECK(waveform.size() == 107);
-            CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001));
-            CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001));
-            CHECK(waveform[87] == Approx(0.0f).margin(0.00001));
+            CHECK(waveform.rms_samples.size() == 107);
+            CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001));
+            CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001));
+            CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001));
         }
 
         waveform.clear();
@@ -63,18 +59,18 @@ TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" )
     // Create AudioWaveformer and extract a smaller "average" sample set of audio data
     AudioWaveformer waveformer(&r);
     for (auto channel = 0; channel < r.info.channels; channel++) {
-        std::vector<float> waveform = waveformer.ExtractSamples(channel, 20, false);
+        AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false);
 
         if (channel == 0) {
-            CHECK(waveform.size() == 1058);
-            CHECK(waveform[0] == Approx(-1.48391728e-05f).margin(0.00001));
-            CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001));
-            CHECK(waveform[1038] == Approx(0.0f).margin(0.00001));
+            CHECK(waveform.rms_samples.size() == 1058);
+            CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001));
+            CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001));
+            CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001));
         } else if (channel == 1) {
-            CHECK(waveform.size() == 1058);
-            CHECK(waveform[0] == Approx(-1.43432617e-05f).margin(0.00001));
-            CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001));
-            CHECK(waveform[1038] == Approx(0.0f).margin(0.00001));
+            CHECK(waveform.rms_samples.size() == 1058);
+            CHECK(waveform.rms_samples[0] == Approx(0.00001f ).margin(0.00001));
+            CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001));
+            CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001));
         }
 
         waveform.clear();
@@ -84,6 +80,29 @@ TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" )
     r.Close();
 }
 
+
+TEST_CASE( "Extract waveform data sintel (all channels)", "[libopenshot][audiowaveformer]" )
+{
+    // Create a reader
+    std::stringstream path;
+    path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4";
+    FFmpegReader r(path.str());
+
+    // Create AudioWaveformer and extract a smaller "average" sample set of audio data
+    AudioWaveformer waveformer(&r);
+    AudioWaveformData waveform = waveformer.ExtractSamples(-1, 20, false);
+
+    CHECK(waveform.rms_samples.size() == 1058);
+    CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001));
+    CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001));
+    CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001));
+
+    waveform.clear();
+
+    // Clean up
+    r.Close();
+}
+
 TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowaveformer]" )
 {
     // Create a reader
@@ -95,14 +114,14 @@ TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowave
     AudioWaveformer waveformer(&r);
     for (auto channel = 0; channel < r.info.channels; channel++) {
         // Normalize values and scale them between -1 and +1
-        std::vector<float> waveform = waveformer.ExtractSamples(channel, 20, true);
+        AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, true);
 
         if (channel == 0) {
-            CHECK(waveform.size() == 107);
-            CHECK(waveform[0] == Approx(0.113821134).margin(0.00001));
-            CHECK(waveform[35] == Approx(-1.0f).margin(0.00001));
-            CHECK(waveform[86] == Approx(-0.200542003f).margin(0.00001));
-            CHECK(waveform[87] == Approx(0.0f).margin(0.00001));
+            CHECK(waveform.rms_samples.size() == 107);
+            CHECK(waveform.rms_samples[0] == Approx(0.07524f).margin(0.00001));
+            CHECK(waveform.rms_samples[35] == Approx(0.20063f).margin(0.00001));
+            CHECK(waveform.rms_samples[86] == Approx(0.2094f).margin(0.00001));
+            CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001));
         }
 
         waveform.clear();

From 4265d84ff9bd45e0e4e42377375e5b4437b9ec15 Mon Sep 17 00:00:00 2001
From: Jonathan Thomas <jonathan@openshot.org>
Date: Tue, 1 Nov 2022 16:48:37 -0500
Subject: [PATCH 4/4] Some performance optimizations. to reduce # of calls to
 GetAudioSamples()

---
 src/AudioWaveformer.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/AudioWaveformer.cpp b/src/AudioWaveformer.cpp
index 2c064941..2b3761c5 100644
--- a/src/AudioWaveformer.cpp
+++ b/src/AudioWaveformer.cpp
@@ -67,12 +67,19 @@ AudioWaveformData AudioWaveformer::ExtractSamples(int channel, int num_per_secon
             // Get next frame
             shared_ptr<openshot::Frame> frame = reader->GetFrame(f);
 
+            // Cache channels for this frame, to reduce # of calls to frame->GetAudioSamples
+            float* channels[channel_count];
+            for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) {
+                if (channel == channel_index || channel == -1) {
+                    channels[channel_index] = frame->GetAudioSamples(channel_index);
+                }
+            }
+
             // Get sample value from a specific channel (or all channels)
             for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) {
-
                 for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) {
                     if (channel == channel_index || channel == -1) {
-                        float *samples = frame->GetAudioSamples(channel_index);
+                        float *samples = channels[channel_index];
                         float rms_sample_value = std::sqrt(samples[s] * samples[s]);
 
                         // Accumulate sample averages