diff --git a/bindings/python/openshot.i b/bindings/python/openshot.i index 6bff2105..2f406c7f 100644 --- a/bindings/python/openshot.i +++ b/bindings/python/openshot.i @@ -9,7 +9,7 @@ // // SPDX-License-Identifier: LGPL-3.0-or-later -%module openshot +%module("threads"=1) openshot /* Suppress warnings about ignored operator= */ %warnfilter(362); @@ -45,16 +45,19 @@ %template() std::map; %template() std::pair; %template() std::vector; +%template() std::vector; %template() std::pair; %template() std::pair; %template() std::pair; %template() std::vector>; +%template() std::vector>; %{ #include "OpenShotVersion.h" #include "ReaderBase.h" #include "WriterBase.h" #include "AudioDevices.h" +#include "AudioWaveformer.h" #include "CacheBase.h" #include "CacheDisk.h" #include "CacheMemory.h" @@ -263,6 +266,7 @@ %include "ReaderBase.h" %include "WriterBase.h" %include "AudioDevices.h" +%include "AudioWaveformer.h" %include "CacheBase.h" %include "CacheDisk.h" %include "CacheMemory.h" diff --git a/bindings/ruby/openshot.i b/bindings/ruby/openshot.i index 8fdb152b..a7ef11b4 100644 --- a/bindings/ruby/openshot.i +++ b/bindings/ruby/openshot.i @@ -9,7 +9,7 @@ // // SPDX-License-Identifier: LGPL-3.0-or-later -%module openshot +%module("threads"=1) openshot /* Suppress warnings about ignored operator= */ %warnfilter(362); @@ -45,10 +45,12 @@ %template() std::map; %template() std::pair; %template() std::vector; +%template() std::vector; %template() std::pair; %template() std::pair; %template() std::pair; %template() std::vector>; +%template() std::vector>; %{ /* Ruby and FFmpeg define competing RSHIFT macros, @@ -63,6 +65,7 @@ #include "ReaderBase.h" #include "WriterBase.h" #include "AudioDevices.h" +#include "AudioWaveformer.h" #include "CacheBase.h" #include "CacheDisk.h" #include "CacheMemory.h" @@ -133,6 +136,7 @@ %include "ReaderBase.h" %include "WriterBase.h" %include "AudioDevices.h" +%include "AudioWaveformer.h" %include "CacheBase.h" %include "CacheDisk.h" %include "CacheMemory.h" diff --git a/src/AudioWaveformer.cpp b/src/AudioWaveformer.cpp new file mode 100644 index 00000000..2b3761c5 --- /dev/null +++ b/src/AudioWaveformer.cpp @@ -0,0 +1,124 @@ +/** + * @file + * @brief Source file for AudioWaveformer class + * @author Jonathan Thomas + * + * @ref License + */ + +// Copyright (c) 2008-2022 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "AudioWaveformer.h" + + +using namespace std; +using namespace openshot; + + +// Default constructor +AudioWaveformer::AudioWaveformer(ReaderBase* new_reader) : reader(new_reader) +{ + +} + +// Destructor +AudioWaveformer::~AudioWaveformer() +{ + +} + +// Extract audio samples from any ReaderBase class +AudioWaveformData AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) { + AudioWaveformData data; + + if (reader) { + // Open reader (if needed) + bool does_reader_have_video = reader->info.has_video; + if (!reader->IsOpen()) { + reader->Open(); + } + // Disable video for faster processing + reader->info.has_video = false; + + int sample_rate = reader->info.sample_rate; + int sample_divisor = sample_rate / num_per_second; + int total_samples = num_per_second * (reader->info.duration + 1.0); + int extracted_index = 0; + + // Resize and clear audio buffers + data.resize(total_samples); + data.zero(total_samples); + + // Loop through all frames + int sample_index = 0; + float samples_max = 0.0; + float chunk_max = 0.0; + float chunk_squared_sum = 0.0; + + // How many channels are we using + int channel_count = 1; + if (channel == -1) { + channel_count = reader->info.channels; + } + + for (auto f = 1; f <= reader->info.video_length; f++) { + // Get next frame + shared_ptr frame = reader->GetFrame(f); + + // Cache channels for this frame, to reduce # of calls to frame->GetAudioSamples + float* channels[channel_count]; + for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) { + if (channel == channel_index || channel == -1) { + channels[channel_index] = frame->GetAudioSamples(channel_index); + } + } + + // Get sample value from a specific channel (or all channels) + for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) { + for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) { + if (channel == channel_index || channel == -1) { + float *samples = channels[channel_index]; + float rms_sample_value = std::sqrt(samples[s] * samples[s]); + + // Accumulate sample averages + chunk_squared_sum += rms_sample_value; + chunk_max = std::max(chunk_max, rms_sample_value); + } + } + + sample_index += 1; + + // Cut-off reached + if (sample_index % sample_divisor == 0) { + float avg_squared_sum = chunk_squared_sum / (sample_divisor * channel_count); + data.max_samples[extracted_index] = chunk_max; + data.rms_samples[extracted_index] = avg_squared_sum; + extracted_index++; + + // Track max/min values + samples_max = std::max(samples_max, chunk_max); + + // reset sample total and index + sample_index = 0; + chunk_max = 0.0; + chunk_squared_sum = 0.0; + } + } + } + + // Scale all values to the -1 to +1 range (regardless of how small or how large the + // original audio sample values are) + if (normalize) { + float scale = 1.0f / samples_max; + data.scale(total_samples, scale); + } + + // Resume previous has_video value + reader->info.has_video = does_reader_have_video; + } + + + return data; +} diff --git a/src/AudioWaveformer.h b/src/AudioWaveformer.h new file mode 100644 index 00000000..638295dd --- /dev/null +++ b/src/AudioWaveformer.h @@ -0,0 +1,101 @@ +/** + * @file + * @brief Header file for AudioWaveformer class + * @author Jonathan Thomas + * + * @ref License + */ + +// Copyright (c) 2008-2022 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#ifndef OPENSHOT_WAVEFORMER_H +#define OPENSHOT_WAVEFORMER_H + +#include "ReaderBase.h" +#include "Frame.h" +#include + + +namespace openshot { + + /** + * @brief This struct holds the extracted waveform data (both the RMS root-mean-squared average, and the max values) + * + * Because we extract 2 different datasets from the audio, we return this struct with access to both sets of data, + * the average root mean squared values, and the max sample values. + */ + struct AudioWaveformData + { + std::vector max_samples; + std::vector rms_samples; + + /// Resize both datasets + void resize(int total_samples) { + max_samples.resize(total_samples); + rms_samples.resize(total_samples); + } + + /// Zero out # of values in both datasets + void zero(int total_samples) { + for (auto s = 0; s < total_samples; s++) { + max_samples[s] = 0.0; + rms_samples[s] = 0.0; + } + } + + /// Scale # of values by some factor + void scale(int total_samples, float factor) { + for (auto s = 0; s < total_samples; s++) { + max_samples[s] *= factor; + rms_samples[s] *= factor; + } + } + + /// Clear and free memory of both datasets + void clear() { + max_samples.clear(); + max_samples.shrink_to_fit(); + rms_samples.clear(); + rms_samples.shrink_to_fit(); + } + + /// Return a vector of vectors (containing both datasets) + std::vector> vectors() { + std::vector> output; + output.push_back(max_samples); + output.push_back(rms_samples); + return output; + } + }; + + /** + * @brief This class is used to extra audio data used for generating waveforms. + * + * Pass in a ReaderBase* with audio data, and this class will iterate the reader, + * and sample down the dataset to a much smaller set - more useful for generating + * waveforms. For example, take 44100 samples per second, and reduce it to 20 + * "max" or "average" samples per second - much easier to graph. + */ + class AudioWaveformer { + private: + ReaderBase* reader; + + public: + /// Default constructor + AudioWaveformer(ReaderBase* reader); + + /// @brief Extract audio samples from any ReaderBase class + /// @param channel Which audio channel should we extract data from (-1 == all channels) + /// @param num_per_second How many samples per second to return + /// @param normalize Should we scale the data range so the largest value is 1.0 + AudioWaveformData ExtractSamples(int channel, int num_per_second, bool normalize); + + /// Destructor + ~AudioWaveformer(); + }; + +} + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fdcb9d48..4ac69210 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -50,6 +50,7 @@ set(OPENSHOT_SOURCES AudioDevices.cpp AudioReaderSource.cpp AudioResampler.cpp + AudioWaveformer.cpp CacheBase.cpp CacheDisk.cpp CacheMemory.cpp diff --git a/tests/AudioWaveformer.cpp b/tests/AudioWaveformer.cpp new file mode 100644 index 00000000..bd12d997 --- /dev/null +++ b/tests/AudioWaveformer.cpp @@ -0,0 +1,132 @@ +/** + * @file + * @brief Unit tests for openshot::AudioWaveformer + * @author Jonathan Thomas + * + * @ref License + */ + +// Copyright (c) 2008-2022 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "openshot_catch.h" +#include "AudioWaveformer.h" +#include "FFmpegReader.h" + + +using namespace openshot; + +TEST_CASE( "Extract waveform data piano.wav", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "piano.wav"; + FFmpegReader r(path.str()); + r.Open(); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + for (auto channel = 0; channel < r.info.channels; channel++) { + AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false); + + if (channel == 0) { + CHECK(waveform.rms_samples.size() == 107); + CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001)); + CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001)); + CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001)); + } else if (channel == 1) { + CHECK(waveform.rms_samples.size() == 107); + CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001)); + CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001)); + CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001)); + } + + waveform.clear(); + } + + // Clean up + r.Close(); +} + +TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4"; + FFmpegReader r(path.str()); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + for (auto channel = 0; channel < r.info.channels; channel++) { + AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false); + + if (channel == 0) { + CHECK(waveform.rms_samples.size() == 1058); + CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001)); + CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001)); + CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001)); + } else if (channel == 1) { + CHECK(waveform.rms_samples.size() == 1058); + CHECK(waveform.rms_samples[0] == Approx(0.00001f ).margin(0.00001)); + CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001)); + CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001)); + } + + waveform.clear(); + } + + // Clean up + r.Close(); +} + + +TEST_CASE( "Extract waveform data sintel (all channels)", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4"; + FFmpegReader r(path.str()); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + AudioWaveformData waveform = waveformer.ExtractSamples(-1, 20, false); + + CHECK(waveform.rms_samples.size() == 1058); + CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001)); + CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001)); + CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001)); + + waveform.clear(); + + // Clean up + r.Close(); +} + +TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowaveformer]" ) +{ + // Create a reader + std::stringstream path; + path << TEST_MEDIA_PATH << "piano.wav"; + FFmpegReader r(path.str()); + + // Create AudioWaveformer and extract a smaller "average" sample set of audio data + AudioWaveformer waveformer(&r); + for (auto channel = 0; channel < r.info.channels; channel++) { + // Normalize values and scale them between -1 and +1 + AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, true); + + if (channel == 0) { + CHECK(waveform.rms_samples.size() == 107); + CHECK(waveform.rms_samples[0] == Approx(0.07524f).margin(0.00001)); + CHECK(waveform.rms_samples[35] == Approx(0.20063f).margin(0.00001)); + CHECK(waveform.rms_samples[86] == Approx(0.2094f).margin(0.00001)); + CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001)); + } + + waveform.clear(); + } + + // Clean up + r.Close(); +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 56f47e15..2f4fb345 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,6 +20,7 @@ file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/examples/" TEST_MEDIA_PATH) ### TEST SOURCE FILES ### set(OPENSHOT_TESTS + AudioWaveformer CacheDisk CacheMemory Clip