From 2326532820b79f92ac7dbd5cdd3093ffea2d385a Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Sun, 30 Oct 2022 22:04:19 -0500 Subject: [PATCH 1/4] Initial commit of AudioWaveformer class, which is designed to iterate any ReaderBase, and return a reduced "average" sample set - more useful for generating waveforms - for example, reduce a 44100 samples per second down to 20 samples per second. --- bindings/python/openshot.i | 2 + bindings/ruby/openshot.i | 2 + src/AudioWaveformer.cpp | 104 ++++++++++++++++++++++++++++++++++ src/AudioWaveformer.h | 51 +++++++++++++++++ src/CMakeLists.txt | 1 + tests/AudioWaveformer.cpp | 113 +++++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + 7 files changed, 274 insertions(+) create mode 100644 src/AudioWaveformer.cpp create mode 100644 src/AudioWaveformer.h create mode 100644 tests/AudioWaveformer.cpp diff --git a/bindings/python/openshot.i b/bindings/python/openshot.i index 6bff2105..f01cef74 100644 --- a/bindings/python/openshot.i +++ b/bindings/python/openshot.i @@ -55,6 +55,7 @@ #include "ReaderBase.h" #include "WriterBase.h" #include "AudioDevices.h" +#include "AudioWaveformer.h" #include "CacheBase.h" #include "CacheDisk.h" #include "CacheMemory.h" @@ -263,6 +264,7 @@ %include "ReaderBase.h" %include "WriterBase.h" %include "AudioDevices.h" +%include "AudioWaveformer.h" %include "CacheBase.h" %include "CacheDisk.h" %include "CacheMemory.h" diff --git a/bindings/ruby/openshot.i b/bindings/ruby/openshot.i index 8fdb152b..cf3c4778 100644 --- a/bindings/ruby/openshot.i +++ b/bindings/ruby/openshot.i @@ -63,6 +63,7 @@ #include "ReaderBase.h" #include "WriterBase.h" #include "AudioDevices.h" +#include "AudioWaveformer.h" #include "CacheBase.h" #include "CacheDisk.h" #include "CacheMemory.h" @@ -133,6 +134,7 @@ %include "ReaderBase.h" %include "WriterBase.h" %include "AudioDevices.h" +%include "AudioWaveformer.h" %include "CacheBase.h" %include "CacheDisk.h" %include "CacheMemory.h" diff --git a/src/AudioWaveformer.cpp b/src/AudioWaveformer.cpp new file mode 100644 index 00000000..b16a5a0d --- /dev/null +++ b/src/AudioWaveformer.cpp @@ -0,0 +1,104 @@ +/** + * @file + * @brief Source file for AudioWaveformer class + * @author Jonathan Thomas + * + * @ref License + */ + +// Copyright (c) 2008-2022 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "AudioWaveformer.h" + + +using namespace std; +using namespace openshot; + + +// Default constructor +AudioWaveformer::AudioWaveformer(ReaderBase* new_reader) : reader(new_reader) +{ + +} + +// Destructor +AudioWaveformer::~AudioWaveformer() +{ + +} + +// Extract audio samples from any ReaderBase class +std::vector AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) { + std::vector extracted_data(0); + + if (reader) { + // Open reader (if needed) + bool does_reader_have_video = reader->info.has_video; + if (!reader->IsOpen()) { + reader->Open(); + } + // Disable video for faster processing + reader->info.has_video = false; + + int sample_rate = reader->info.sample_rate; + int sample_divisor = sample_rate / num_per_second; + int total_samples = num_per_second * (reader->info.duration + 1.0); + + // Size audio buffer (for smaller dataset) + extracted_data.resize(total_samples); + int extracted_index = 0; + + // Clear audio buffer + for (auto s = 0; s < total_samples; s++) { + extracted_data[s] = 0.0; + } + + // Loop through all frames + int sample_index = 0; + float samples_total = 0.0; + float samples_max = 0.0; + float samples_min = 0.0; + + for (auto f = 1; f <= reader->info.video_length; f++) { + // Get next frame + shared_ptr frame = reader->GetFrame(f); + + float* samples = frame->GetAudioSamples(channel); + for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) { + samples_total += samples[s]; + sample_index += 1; + + // Cut-off reached + if (sample_index % sample_divisor == 0) { + float avg_sample_value = samples_total / sample_divisor; + extracted_data[extracted_index] = avg_sample_value; + extracted_index++; + + // Track max/min values + samples_max = std::max(samples_max, avg_sample_value); + samples_min = std::min(samples_min, avg_sample_value); + + // reset sample total and index + sample_index = 0; + samples_total = 0.0; + } + } + } + + // Scale all values to the -1 to +1 range (regardless of how small or how large the + // original audio sample values are) + if (normalize) { + float scale = std::min(1.0f / samples_max, 1.0f / std::fabs(samples_min)); + for (auto s = 0; s < total_samples; s++) { + extracted_data[s] *= scale; + } + } + + // Resume previous has_video value + reader->info.has_video = does_reader_have_video; + } + + return extracted_data; +} diff --git a/src/AudioWaveformer.h b/src/AudioWaveformer.h new file mode 100644 index 00000000..4e718a87 --- /dev/null +++ b/src/AudioWaveformer.h @@ -0,0 +1,51 @@ +/** + * @file + * @brief Header file for AudioWaveformer class + * @author Jonathan Thomas + * + * @ref License + */ + +// Copyright (c) 2008-2022 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#ifndef OPENSHOT_WAVEFORMER_H +#define OPENSHOT_WAVEFORMER_H + +#include "ReaderBase.h" +#include "Frame.h" +#include + + +namespace openshot { + + /** + * @brief This class is used to extra audio data used for generating waveforms. + * + * Pass in a ReaderBase* with audio data, and this class will iterate the reader, + * and sample down the dataset to a much smaller set - more useful for generating + * waveforms. For example, take 44100 samples per second, and reduce it to 20 + * "average" samples per second - much easier to graph. + */ + class AudioWaveformer { + private: + ReaderBase* reader; + + public: + /// Default constructor + AudioWaveformer(ReaderBase* reader); + + /// @brief Extract audio samples from any ReaderBase class + /// @param channel Which audio channel should we extract data from + /// @param num_per_second How many samples per second to return + /// @param normalize Should we scale the data range so the largest value is 1.0 + std::vector ExtractSamples(int channel, int num_per_second, bool normalize); + + /// Destructor + ~AudioWaveformer(); + }; + +} + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fdcb9d48..4ac69210 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -50,6 +50,7 @@ set(OPENSHOT_SOURCES AudioDevices.cpp AudioReaderSource.cpp AudioResampler.cpp + AudioWaveformer.cpp CacheBase.cpp CacheDisk.cpp CacheMemory.cpp diff --git a/tests/AudioWaveformer.cpp b/tests/AudioWaveformer.cpp new file mode 100644 index 00000000..e6cdfa20 --- /dev/null +++ b/tests/AudioWaveformer.cpp @@ -0,0 +1,113 @@ +/** + * @file + * @brief Unit tests for openshot::AudioWaveformer + * @author Jonathan Thomas + * + * @ref License + */ + +// Copyright (c) 2008-2019 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "openshot_catch.h" +#include "AudioWaveformer.h" +#include "FFmpegReader.h" + + +using namespace openshot; + +TEST_CASE( "Extract waveform data piano.wav", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "piano.wav"; + FFmpegReader r(path.str()); + r.Open(); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + for (auto channel = 0; channel < r.info.channels; channel++) { + std::vector waveform = waveformer.ExtractSamples(channel, 20, false); + + if (channel == 0) { + CHECK(waveform.size() == 107); + CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001)); + CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001)); + CHECK(waveform[87] == Approx(0.0f).margin(0.00001)); + + for (auto sample = 0; sample < waveform.size(); sample++) { + std::cout << waveform[sample] << std::endl; + } + } else if (channel == 1) { + CHECK(waveform.size() == 107); + CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001)); + CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001)); + CHECK(waveform[87] == Approx(0.0f).margin(0.00001)); + } + + waveform.clear(); + } + + // Clean up + r.Close(); +} + +TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4"; + FFmpegReader r(path.str()); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + for (auto channel = 0; channel < r.info.channels; channel++) { + std::vector waveform = waveformer.ExtractSamples(channel, 20, false); + + if (channel == 0) { + CHECK(waveform.size() == 1058); + CHECK(waveform[0] == Approx(-1.48391728e-05f).margin(0.00001)); + CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001)); + CHECK(waveform[1038] == Approx(0.0f).margin(0.00001)); + } else if (channel == 1) { + CHECK(waveform.size() == 1058); + CHECK(waveform[0] == Approx(-1.43432617e-05f).margin(0.00001)); + CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001)); + CHECK(waveform[1038] == Approx(0.0f).margin(0.00001)); + } + + waveform.clear(); + } + + // Clean up + r.Close(); +} + +TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "piano.wav"; + FFmpegReader r(path.str()); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + for (auto channel = 0; channel < r.info.channels; channel++) { + // Normalize values and scale them between -1 and +1 + std::vector waveform = waveformer.ExtractSamples(channel, 20, true); + + if (channel == 0) { + CHECK(waveform.size() == 107); + CHECK(waveform[0] == Approx(0.113821134).margin(0.00001)); + CHECK(waveform[35] == Approx(-1.0f).margin(0.00001)); + CHECK(waveform[86] == Approx(-0.200542003f).margin(0.00001)); + CHECK(waveform[87] == Approx(0.0f).margin(0.00001)); + } + + waveform.clear(); + } + + // Clean up + r.Close(); +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 56f47e15..2f4fb345 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,6 +20,7 @@ file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/examples/" TEST_MEDIA_PATH) ### TEST SOURCE FILES ### set(OPENSHOT_TESTS + AudioWaveformer CacheDisk CacheMemory Clip From 9cd7dd68a851b501c44ade003b03ee177ed2c770 Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Mon, 31 Oct 2022 14:20:18 -0500 Subject: [PATCH 2/4] Wrap vector in Swig mappings, and enable thread-safe access to swig wrappers - so our long running waveformer does not block the Python GIL --- bindings/python/openshot.i | 3 ++- bindings/ruby/openshot.i | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bindings/python/openshot.i b/bindings/python/openshot.i index f01cef74..10cb8866 100644 --- a/bindings/python/openshot.i +++ b/bindings/python/openshot.i @@ -9,7 +9,7 @@ // // SPDX-License-Identifier: LGPL-3.0-or-later -%module openshot +%module("threads"=1) openshot /* Suppress warnings about ignored operator= */ %warnfilter(362); @@ -45,6 +45,7 @@ %template() std::map; %template() std::pair; %template() std::vector; +%template() std::vector; %template() std::pair; %template() std::pair; %template() std::pair; diff --git a/bindings/ruby/openshot.i b/bindings/ruby/openshot.i index cf3c4778..debeeec5 100644 --- a/bindings/ruby/openshot.i +++ b/bindings/ruby/openshot.i @@ -9,7 +9,7 @@ // // SPDX-License-Identifier: LGPL-3.0-or-later -%module openshot +%module("threads"=1) openshot /* Suppress warnings about ignored operator= */ %warnfilter(362); @@ -45,6 +45,7 @@ %template() std::map; %template() std::pair; %template() std::vector; +%template() std::vector; %template() std::pair; %template() std::pair; %template() std::pair; From c838c126ad73318bf15d427acabdd4026019f56e Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Tue, 1 Nov 2022 15:17:03 -0500 Subject: [PATCH 3/4] Improvements to AudioWaveformer to use RMS (Root Mean Square), and return both average RMS and max RMS for graphing, including some new unit tests. --- bindings/python/openshot.i | 1 + bindings/ruby/openshot.i | 1 + src/AudioWaveformer.cpp | 59 +++++++++++++++----------- src/AudioWaveformer.h | 84 ++++++++++++++++++++++++++++++-------- tests/AudioWaveformer.cpp | 77 +++++++++++++++++++++------------- 5 files changed, 153 insertions(+), 69 deletions(-) diff --git a/bindings/python/openshot.i b/bindings/python/openshot.i index 10cb8866..2f406c7f 100644 --- a/bindings/python/openshot.i +++ b/bindings/python/openshot.i @@ -50,6 +50,7 @@ %template() std::pair; %template() std::pair; %template() std::vector>; +%template() std::vector>; %{ #include "OpenShotVersion.h" diff --git a/bindings/ruby/openshot.i b/bindings/ruby/openshot.i index debeeec5..a7ef11b4 100644 --- a/bindings/ruby/openshot.i +++ b/bindings/ruby/openshot.i @@ -50,6 +50,7 @@ %template() std::pair; %template() std::pair; %template() std::vector>; +%template() std::vector>; %{ /* Ruby and FFmpeg define competing RSHIFT macros, diff --git a/src/AudioWaveformer.cpp b/src/AudioWaveformer.cpp index b16a5a0d..2c064941 100644 --- a/src/AudioWaveformer.cpp +++ b/src/AudioWaveformer.cpp @@ -30,8 +30,8 @@ AudioWaveformer::~AudioWaveformer() } // Extract audio samples from any ReaderBase class -std::vector AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) { - std::vector extracted_data(0); +AudioWaveformData AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) { + AudioWaveformData data; if (reader) { // Open reader (if needed) @@ -45,44 +45,58 @@ std::vector AudioWaveformer::ExtractSamples(int channel, int num_per_seco int sample_rate = reader->info.sample_rate; int sample_divisor = sample_rate / num_per_second; int total_samples = num_per_second * (reader->info.duration + 1.0); - - // Size audio buffer (for smaller dataset) - extracted_data.resize(total_samples); int extracted_index = 0; - // Clear audio buffer - for (auto s = 0; s < total_samples; s++) { - extracted_data[s] = 0.0; - } + // Resize and clear audio buffers + data.resize(total_samples); + data.zero(total_samples); // Loop through all frames int sample_index = 0; - float samples_total = 0.0; float samples_max = 0.0; - float samples_min = 0.0; + float chunk_max = 0.0; + float chunk_squared_sum = 0.0; + + // How many channels are we using + int channel_count = 1; + if (channel == -1) { + channel_count = reader->info.channels; + } for (auto f = 1; f <= reader->info.video_length; f++) { // Get next frame shared_ptr frame = reader->GetFrame(f); - float* samples = frame->GetAudioSamples(channel); + // Get sample value from a specific channel (or all channels) for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) { - samples_total += samples[s]; + + for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) { + if (channel == channel_index || channel == -1) { + float *samples = frame->GetAudioSamples(channel_index); + float rms_sample_value = std::sqrt(samples[s] * samples[s]); + + // Accumulate sample averages + chunk_squared_sum += rms_sample_value; + chunk_max = std::max(chunk_max, rms_sample_value); + } + } + sample_index += 1; // Cut-off reached if (sample_index % sample_divisor == 0) { - float avg_sample_value = samples_total / sample_divisor; - extracted_data[extracted_index] = avg_sample_value; + float avg_squared_sum = chunk_squared_sum / (sample_divisor * channel_count); + data.max_samples[extracted_index] = chunk_max; + data.rms_samples[extracted_index] = avg_squared_sum; extracted_index++; // Track max/min values - samples_max = std::max(samples_max, avg_sample_value); - samples_min = std::min(samples_min, avg_sample_value); + samples_max = std::max(samples_max, chunk_max); // reset sample total and index sample_index = 0; - samples_total = 0.0; + chunk_max = 0.0; + chunk_squared_sum = 0.0; } } } @@ -90,15 +104,14 @@ std::vector AudioWaveformer::ExtractSamples(int channel, int num_per_seco // Scale all values to the -1 to +1 range (regardless of how small or how large the // original audio sample values are) if (normalize) { - float scale = std::min(1.0f / samples_max, 1.0f / std::fabs(samples_min)); - for (auto s = 0; s < total_samples; s++) { - extracted_data[s] *= scale; - } + float scale = 1.0f / samples_max; + data.scale(total_samples, scale); } // Resume previous has_video value reader->info.has_video = does_reader_have_video; } - return extracted_data; + + return data; } diff --git a/src/AudioWaveformer.h b/src/AudioWaveformer.h index 4e718a87..638295dd 100644 --- a/src/AudioWaveformer.h +++ b/src/AudioWaveformer.h @@ -20,31 +20,81 @@ namespace openshot { - /** - * @brief This class is used to extra audio data used for generating waveforms. - * - * Pass in a ReaderBase* with audio data, and this class will iterate the reader, - * and sample down the dataset to a much smaller set - more useful for generating - * waveforms. For example, take 44100 samples per second, and reduce it to 20 - * "average" samples per second - much easier to graph. - */ - class AudioWaveformer { - private: + /** + * @brief This struct holds the extracted waveform data (both the RMS root-mean-squared average, and the max values) + * + * Because we extract 2 different datasets from the audio, we return this struct with access to both sets of data, + * the average root mean squared values, and the max sample values. + */ + struct AudioWaveformData + { + std::vector max_samples; + std::vector rms_samples; + + /// Resize both datasets + void resize(int total_samples) { + max_samples.resize(total_samples); + rms_samples.resize(total_samples); + } + + /// Zero out # of values in both datasets + void zero(int total_samples) { + for (auto s = 0; s < total_samples; s++) { + max_samples[s] = 0.0; + rms_samples[s] = 0.0; + } + } + + /// Scale # of values by some factor + void scale(int total_samples, float factor) { + for (auto s = 0; s < total_samples; s++) { + max_samples[s] *= factor; + rms_samples[s] *= factor; + } + } + + /// Clear and free memory of both datasets + void clear() { + max_samples.clear(); + max_samples.shrink_to_fit(); + rms_samples.clear(); + rms_samples.shrink_to_fit(); + } + + /// Return a vector of vectors (containing both datasets) + std::vector> vectors() { + std::vector> output; + output.push_back(max_samples); + output.push_back(rms_samples); + return output; + } + }; + + /** + * @brief This class is used to extra audio data used for generating waveforms. + * + * Pass in a ReaderBase* with audio data, and this class will iterate the reader, + * and sample down the dataset to a much smaller set - more useful for generating + * waveforms. For example, take 44100 samples per second, and reduce it to 20 + * "max" or "average" samples per second - much easier to graph. + */ + class AudioWaveformer { + private: ReaderBase* reader; - public: - /// Default constructor + public: + /// Default constructor AudioWaveformer(ReaderBase* reader); /// @brief Extract audio samples from any ReaderBase class - /// @param channel Which audio channel should we extract data from + /// @param channel Which audio channel should we extract data from (-1 == all channels) /// @param num_per_second How many samples per second to return /// @param normalize Should we scale the data range so the largest value is 1.0 - std::vector ExtractSamples(int channel, int num_per_second, bool normalize); + AudioWaveformData ExtractSamples(int channel, int num_per_second, bool normalize); - /// Destructor - ~AudioWaveformer(); - }; + /// Destructor + ~AudioWaveformer(); + }; } diff --git a/tests/AudioWaveformer.cpp b/tests/AudioWaveformer.cpp index e6cdfa20..bd12d997 100644 --- a/tests/AudioWaveformer.cpp +++ b/tests/AudioWaveformer.cpp @@ -6,7 +6,7 @@ * @ref License */ -// Copyright (c) 2008-2019 OpenShot Studios, LLC +// Copyright (c) 2008-2022 OpenShot Studios, LLC // // SPDX-License-Identifier: LGPL-3.0-or-later @@ -28,22 +28,18 @@ TEST_CASE( "Extract waveform data piano.wav", "[libopenshot][audiowaveformer]" ) // Create AudioWaveformer and extract a smaller "average" sample set of audio data AudioWaveformer waveformer(&r); for (auto channel = 0; channel < r.info.channels; channel++) { - std::vector waveform = waveformer.ExtractSamples(channel, 20, false); + AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false); if (channel == 0) { - CHECK(waveform.size() == 107); - CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001)); - CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001)); - CHECK(waveform[87] == Approx(0.0f).margin(0.00001)); - - for (auto sample = 0; sample < waveform.size(); sample++) { - std::cout << waveform[sample] << std::endl; - } + CHECK(waveform.rms_samples.size() == 107); + CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001)); + CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001)); + CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001)); } else if (channel == 1) { - CHECK(waveform.size() == 107); - CHECK(waveform[0] == Approx(0.000820312474f).margin(0.00001)); - CHECK(waveform[86] == Approx(-0.00144531252f).margin(0.00001)); - CHECK(waveform[87] == Approx(0.0f).margin(0.00001)); + CHECK(waveform.rms_samples.size() == 107); + CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001)); + CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001)); + CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001)); } waveform.clear(); @@ -63,18 +59,18 @@ TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" ) // Create AudioWaveformer and extract a smaller "average" sample set of audio data AudioWaveformer waveformer(&r); for (auto channel = 0; channel < r.info.channels; channel++) { - std::vector waveform = waveformer.ExtractSamples(channel, 20, false); + AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false); if (channel == 0) { - CHECK(waveform.size() == 1058); - CHECK(waveform[0] == Approx(-1.48391728e-05f).margin(0.00001)); - CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001)); - CHECK(waveform[1038] == Approx(0.0f).margin(0.00001)); + CHECK(waveform.rms_samples.size() == 1058); + CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001)); + CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001)); + CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001)); } else if (channel == 1) { - CHECK(waveform.size() == 1058); - CHECK(waveform[0] == Approx(-1.43432617e-05f).margin(0.00001)); - CHECK(waveform[1037] == Approx(6.79016102e-06f).margin(0.00001)); - CHECK(waveform[1038] == Approx(0.0f).margin(0.00001)); + CHECK(waveform.rms_samples.size() == 1058); + CHECK(waveform.rms_samples[0] == Approx(0.00001f ).margin(0.00001)); + CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001)); + CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001)); } waveform.clear(); @@ -84,6 +80,29 @@ TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" ) r.Close(); } + +TEST_CASE( "Extract waveform data sintel (all channels)", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4"; + FFmpegReader r(path.str()); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + AudioWaveformData waveform = waveformer.ExtractSamples(-1, 20, false); + + CHECK(waveform.rms_samples.size() == 1058); + CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001)); + CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001)); + CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001)); + + waveform.clear(); + + // Clean up + r.Close(); +} + TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowaveformer]" ) { // Create a reader @@ -95,14 +114,14 @@ TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowave AudioWaveformer waveformer(&r); for (auto channel = 0; channel < r.info.channels; channel++) { // Normalize values and scale them between -1 and +1 - std::vector waveform = waveformer.ExtractSamples(channel, 20, true); + AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, true); if (channel == 0) { - CHECK(waveform.size() == 107); - CHECK(waveform[0] == Approx(0.113821134).margin(0.00001)); - CHECK(waveform[35] == Approx(-1.0f).margin(0.00001)); - CHECK(waveform[86] == Approx(-0.200542003f).margin(0.00001)); - CHECK(waveform[87] == Approx(0.0f).margin(0.00001)); + CHECK(waveform.rms_samples.size() == 107); + CHECK(waveform.rms_samples[0] == Approx(0.07524f).margin(0.00001)); + CHECK(waveform.rms_samples[35] == Approx(0.20063f).margin(0.00001)); + CHECK(waveform.rms_samples[86] == Approx(0.2094f).margin(0.00001)); + CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001)); } waveform.clear(); From 4265d84ff9bd45e0e4e42377375e5b4437b9ec15 Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Tue, 1 Nov 2022 16:48:37 -0500 Subject: [PATCH 4/4] Some performance optimizations. to reduce # of calls to GetAudioSamples() --- src/AudioWaveformer.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/AudioWaveformer.cpp b/src/AudioWaveformer.cpp index 2c064941..2b3761c5 100644 --- a/src/AudioWaveformer.cpp +++ b/src/AudioWaveformer.cpp @@ -67,12 +67,19 @@ AudioWaveformData AudioWaveformer::ExtractSamples(int channel, int num_per_secon // Get next frame shared_ptr frame = reader->GetFrame(f); + // Cache channels for this frame, to reduce # of calls to frame->GetAudioSamples + float* channels[channel_count]; + for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) { + if (channel == channel_index || channel == -1) { + channels[channel_index] = frame->GetAudioSamples(channel_index); + } + } + // Get sample value from a specific channel (or all channels) for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) { - for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) { if (channel == channel_index || channel == -1) { - float *samples = frame->GetAudioSamples(channel_index); + float *samples = channels[channel_index]; float rms_sample_value = std::sqrt(samples[s] * samples[s]); // Accumulate sample averages