You've already forked libopenshot
mirror of
https://github.com/OpenShot/libopenshot.git
synced 2026-03-02 08:53:52 -08:00
Merge pull request #868 from OpenShot/audio-waveformer
New AudioWaveformer Class (for generating fast, graph-friendly audio datasets)
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
//
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
|
||||
%module openshot
|
||||
%module("threads"=1) openshot
|
||||
|
||||
/* Suppress warnings about ignored operator= */
|
||||
%warnfilter(362);
|
||||
@@ -45,16 +45,19 @@
|
||||
%template() std::map<std::string, int>;
|
||||
%template() std::pair<int, int>;
|
||||
%template() std::vector<int>;
|
||||
%template() std::vector<float>;
|
||||
%template() std::pair<double, double>;
|
||||
%template() std::pair<float, float>;
|
||||
%template() std::pair<std::string, std::string>;
|
||||
%template() std::vector<std::pair<std::string, std::string>>;
|
||||
%template() std::vector<std::vector<float>>;
|
||||
|
||||
%{
|
||||
#include "OpenShotVersion.h"
|
||||
#include "ReaderBase.h"
|
||||
#include "WriterBase.h"
|
||||
#include "AudioDevices.h"
|
||||
#include "AudioWaveformer.h"
|
||||
#include "CacheBase.h"
|
||||
#include "CacheDisk.h"
|
||||
#include "CacheMemory.h"
|
||||
@@ -263,6 +266,7 @@
|
||||
%include "ReaderBase.h"
|
||||
%include "WriterBase.h"
|
||||
%include "AudioDevices.h"
|
||||
%include "AudioWaveformer.h"
|
||||
%include "CacheBase.h"
|
||||
%include "CacheDisk.h"
|
||||
%include "CacheMemory.h"
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
//
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
|
||||
%module openshot
|
||||
%module("threads"=1) openshot
|
||||
|
||||
/* Suppress warnings about ignored operator= */
|
||||
%warnfilter(362);
|
||||
@@ -45,10 +45,12 @@
|
||||
%template() std::map<std::string, int>;
|
||||
%template() std::pair<int, int>;
|
||||
%template() std::vector<int>;
|
||||
%template() std::vector<float>;
|
||||
%template() std::pair<double, double>;
|
||||
%template() std::pair<float, float>;
|
||||
%template() std::pair<std::string, std::string>;
|
||||
%template() std::vector<std::pair<std::string, std::string>>;
|
||||
%template() std::vector<std::vector<float>>;
|
||||
|
||||
%{
|
||||
/* Ruby and FFmpeg define competing RSHIFT macros,
|
||||
@@ -63,6 +65,7 @@
|
||||
#include "ReaderBase.h"
|
||||
#include "WriterBase.h"
|
||||
#include "AudioDevices.h"
|
||||
#include "AudioWaveformer.h"
|
||||
#include "CacheBase.h"
|
||||
#include "CacheDisk.h"
|
||||
#include "CacheMemory.h"
|
||||
@@ -133,6 +136,7 @@
|
||||
%include "ReaderBase.h"
|
||||
%include "WriterBase.h"
|
||||
%include "AudioDevices.h"
|
||||
%include "AudioWaveformer.h"
|
||||
%include "CacheBase.h"
|
||||
%include "CacheDisk.h"
|
||||
%include "CacheMemory.h"
|
||||
|
||||
124
src/AudioWaveformer.cpp
Normal file
124
src/AudioWaveformer.cpp
Normal file
@@ -0,0 +1,124 @@
|
||||
/**
|
||||
* @file
|
||||
* @brief Source file for AudioWaveformer class
|
||||
* @author Jonathan Thomas <jonathan@openshot.org>
|
||||
*
|
||||
* @ref License
|
||||
*/
|
||||
|
||||
// Copyright (c) 2008-2022 OpenShot Studios, LLC
|
||||
//
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
|
||||
#include "AudioWaveformer.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace openshot;
|
||||
|
||||
|
||||
// Default constructor
|
||||
AudioWaveformer::AudioWaveformer(ReaderBase* new_reader) : reader(new_reader)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// Destructor
|
||||
AudioWaveformer::~AudioWaveformer()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// Extract audio samples from any ReaderBase class
|
||||
AudioWaveformData AudioWaveformer::ExtractSamples(int channel, int num_per_second, bool normalize) {
|
||||
AudioWaveformData data;
|
||||
|
||||
if (reader) {
|
||||
// Open reader (if needed)
|
||||
bool does_reader_have_video = reader->info.has_video;
|
||||
if (!reader->IsOpen()) {
|
||||
reader->Open();
|
||||
}
|
||||
// Disable video for faster processing
|
||||
reader->info.has_video = false;
|
||||
|
||||
int sample_rate = reader->info.sample_rate;
|
||||
int sample_divisor = sample_rate / num_per_second;
|
||||
int total_samples = num_per_second * (reader->info.duration + 1.0);
|
||||
int extracted_index = 0;
|
||||
|
||||
// Resize and clear audio buffers
|
||||
data.resize(total_samples);
|
||||
data.zero(total_samples);
|
||||
|
||||
// Loop through all frames
|
||||
int sample_index = 0;
|
||||
float samples_max = 0.0;
|
||||
float chunk_max = 0.0;
|
||||
float chunk_squared_sum = 0.0;
|
||||
|
||||
// How many channels are we using
|
||||
int channel_count = 1;
|
||||
if (channel == -1) {
|
||||
channel_count = reader->info.channels;
|
||||
}
|
||||
|
||||
for (auto f = 1; f <= reader->info.video_length; f++) {
|
||||
// Get next frame
|
||||
shared_ptr<openshot::Frame> frame = reader->GetFrame(f);
|
||||
|
||||
// Cache channels for this frame, to reduce # of calls to frame->GetAudioSamples
|
||||
float* channels[channel_count];
|
||||
for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) {
|
||||
if (channel == channel_index || channel == -1) {
|
||||
channels[channel_index] = frame->GetAudioSamples(channel_index);
|
||||
}
|
||||
}
|
||||
|
||||
// Get sample value from a specific channel (or all channels)
|
||||
for (auto s = 0; s < frame->GetAudioSamplesCount(); s++) {
|
||||
for (auto channel_index = 0; channel_index < reader->info.channels; channel_index++) {
|
||||
if (channel == channel_index || channel == -1) {
|
||||
float *samples = channels[channel_index];
|
||||
float rms_sample_value = std::sqrt(samples[s] * samples[s]);
|
||||
|
||||
// Accumulate sample averages
|
||||
chunk_squared_sum += rms_sample_value;
|
||||
chunk_max = std::max(chunk_max, rms_sample_value);
|
||||
}
|
||||
}
|
||||
|
||||
sample_index += 1;
|
||||
|
||||
// Cut-off reached
|
||||
if (sample_index % sample_divisor == 0) {
|
||||
float avg_squared_sum = chunk_squared_sum / (sample_divisor * channel_count);
|
||||
data.max_samples[extracted_index] = chunk_max;
|
||||
data.rms_samples[extracted_index] = avg_squared_sum;
|
||||
extracted_index++;
|
||||
|
||||
// Track max/min values
|
||||
samples_max = std::max(samples_max, chunk_max);
|
||||
|
||||
// reset sample total and index
|
||||
sample_index = 0;
|
||||
chunk_max = 0.0;
|
||||
chunk_squared_sum = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scale all values to the -1 to +1 range (regardless of how small or how large the
|
||||
// original audio sample values are)
|
||||
if (normalize) {
|
||||
float scale = 1.0f / samples_max;
|
||||
data.scale(total_samples, scale);
|
||||
}
|
||||
|
||||
// Resume previous has_video value
|
||||
reader->info.has_video = does_reader_have_video;
|
||||
}
|
||||
|
||||
|
||||
return data;
|
||||
}
|
||||
101
src/AudioWaveformer.h
Normal file
101
src/AudioWaveformer.h
Normal file
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* @file
|
||||
* @brief Header file for AudioWaveformer class
|
||||
* @author Jonathan Thomas <jonathan@openshot.org>
|
||||
*
|
||||
* @ref License
|
||||
*/
|
||||
|
||||
// Copyright (c) 2008-2022 OpenShot Studios, LLC
|
||||
//
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
|
||||
#ifndef OPENSHOT_WAVEFORMER_H
|
||||
#define OPENSHOT_WAVEFORMER_H
|
||||
|
||||
#include "ReaderBase.h"
|
||||
#include "Frame.h"
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace openshot {
|
||||
|
||||
/**
|
||||
* @brief This struct holds the extracted waveform data (both the RMS root-mean-squared average, and the max values)
|
||||
*
|
||||
* Because we extract 2 different datasets from the audio, we return this struct with access to both sets of data,
|
||||
* the average root mean squared values, and the max sample values.
|
||||
*/
|
||||
struct AudioWaveformData
|
||||
{
|
||||
std::vector<float> max_samples;
|
||||
std::vector<float> rms_samples;
|
||||
|
||||
/// Resize both datasets
|
||||
void resize(int total_samples) {
|
||||
max_samples.resize(total_samples);
|
||||
rms_samples.resize(total_samples);
|
||||
}
|
||||
|
||||
/// Zero out # of values in both datasets
|
||||
void zero(int total_samples) {
|
||||
for (auto s = 0; s < total_samples; s++) {
|
||||
max_samples[s] = 0.0;
|
||||
rms_samples[s] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Scale # of values by some factor
|
||||
void scale(int total_samples, float factor) {
|
||||
for (auto s = 0; s < total_samples; s++) {
|
||||
max_samples[s] *= factor;
|
||||
rms_samples[s] *= factor;
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear and free memory of both datasets
|
||||
void clear() {
|
||||
max_samples.clear();
|
||||
max_samples.shrink_to_fit();
|
||||
rms_samples.clear();
|
||||
rms_samples.shrink_to_fit();
|
||||
}
|
||||
|
||||
/// Return a vector of vectors (containing both datasets)
|
||||
std::vector<std::vector<float>> vectors() {
|
||||
std::vector<std::vector<float>> output;
|
||||
output.push_back(max_samples);
|
||||
output.push_back(rms_samples);
|
||||
return output;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This class is used to extra audio data used for generating waveforms.
|
||||
*
|
||||
* Pass in a ReaderBase* with audio data, and this class will iterate the reader,
|
||||
* and sample down the dataset to a much smaller set - more useful for generating
|
||||
* waveforms. For example, take 44100 samples per second, and reduce it to 20
|
||||
* "max" or "average" samples per second - much easier to graph.
|
||||
*/
|
||||
class AudioWaveformer {
|
||||
private:
|
||||
ReaderBase* reader;
|
||||
|
||||
public:
|
||||
/// Default constructor
|
||||
AudioWaveformer(ReaderBase* reader);
|
||||
|
||||
/// @brief Extract audio samples from any ReaderBase class
|
||||
/// @param channel Which audio channel should we extract data from (-1 == all channels)
|
||||
/// @param num_per_second How many samples per second to return
|
||||
/// @param normalize Should we scale the data range so the largest value is 1.0
|
||||
AudioWaveformData ExtractSamples(int channel, int num_per_second, bool normalize);
|
||||
|
||||
/// Destructor
|
||||
~AudioWaveformer();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -50,6 +50,7 @@ set(OPENSHOT_SOURCES
|
||||
AudioDevices.cpp
|
||||
AudioReaderSource.cpp
|
||||
AudioResampler.cpp
|
||||
AudioWaveformer.cpp
|
||||
CacheBase.cpp
|
||||
CacheDisk.cpp
|
||||
CacheMemory.cpp
|
||||
|
||||
132
tests/AudioWaveformer.cpp
Normal file
132
tests/AudioWaveformer.cpp
Normal file
@@ -0,0 +1,132 @@
|
||||
/**
|
||||
* @file
|
||||
* @brief Unit tests for openshot::AudioWaveformer
|
||||
* @author Jonathan Thomas <jonathan@openshot.org>
|
||||
*
|
||||
* @ref License
|
||||
*/
|
||||
|
||||
// Copyright (c) 2008-2022 OpenShot Studios, LLC
|
||||
//
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
|
||||
#include "openshot_catch.h"
|
||||
#include "AudioWaveformer.h"
|
||||
#include "FFmpegReader.h"
|
||||
|
||||
|
||||
using namespace openshot;
|
||||
|
||||
TEST_CASE( "Extract waveform data piano.wav", "[libopenshot][audiowaveformer]" )
|
||||
{
|
||||
// Create a reader
|
||||
std::stringstream path;
|
||||
path << TEST_MEDIA_PATH << "piano.wav";
|
||||
FFmpegReader r(path.str());
|
||||
r.Open();
|
||||
|
||||
// Create AudioWaveformer and extract a smaller "average" sample set of audio data
|
||||
AudioWaveformer waveformer(&r);
|
||||
for (auto channel = 0; channel < r.info.channels; channel++) {
|
||||
AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false);
|
||||
|
||||
if (channel == 0) {
|
||||
CHECK(waveform.rms_samples.size() == 107);
|
||||
CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001));
|
||||
} else if (channel == 1) {
|
||||
CHECK(waveform.rms_samples.size() == 107);
|
||||
CHECK(waveform.rms_samples[0] == Approx(0.04879f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[86] == Approx(0.13578f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001));
|
||||
}
|
||||
|
||||
waveform.clear();
|
||||
}
|
||||
|
||||
// Clean up
|
||||
r.Close();
|
||||
}
|
||||
|
||||
TEST_CASE( "Extract waveform data sintel", "[libopenshot][audiowaveformer]" )
|
||||
{
|
||||
// Create a reader
|
||||
std::stringstream path;
|
||||
path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4";
|
||||
FFmpegReader r(path.str());
|
||||
|
||||
// Create AudioWaveformer and extract a smaller "average" sample set of audio data
|
||||
AudioWaveformer waveformer(&r);
|
||||
for (auto channel = 0; channel < r.info.channels; channel++) {
|
||||
AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, false);
|
||||
|
||||
if (channel == 0) {
|
||||
CHECK(waveform.rms_samples.size() == 1058);
|
||||
CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001));
|
||||
} else if (channel == 1) {
|
||||
CHECK(waveform.rms_samples.size() == 1058);
|
||||
CHECK(waveform.rms_samples[0] == Approx(0.00001f ).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001));
|
||||
}
|
||||
|
||||
waveform.clear();
|
||||
}
|
||||
|
||||
// Clean up
|
||||
r.Close();
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE( "Extract waveform data sintel (all channels)", "[libopenshot][audiowaveformer]" )
|
||||
{
|
||||
// Create a reader
|
||||
std::stringstream path;
|
||||
path << TEST_MEDIA_PATH << "sintel_trailer-720p.mp4";
|
||||
FFmpegReader r(path.str());
|
||||
|
||||
// Create AudioWaveformer and extract a smaller "average" sample set of audio data
|
||||
AudioWaveformer waveformer(&r);
|
||||
AudioWaveformData waveform = waveformer.ExtractSamples(-1, 20, false);
|
||||
|
||||
CHECK(waveform.rms_samples.size() == 1058);
|
||||
CHECK(waveform.rms_samples[0] == Approx(0.00001f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[1037] == Approx(0.00003f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[1038] == Approx(0.0f).margin(0.00001));
|
||||
|
||||
waveform.clear();
|
||||
|
||||
// Clean up
|
||||
r.Close();
|
||||
}
|
||||
|
||||
TEST_CASE( "Normalize & scale waveform data piano.wav", "[libopenshot][audiowaveformer]" )
|
||||
{
|
||||
// Create a reader
|
||||
std::stringstream path;
|
||||
path << TEST_MEDIA_PATH << "piano.wav";
|
||||
FFmpegReader r(path.str());
|
||||
|
||||
// Create AudioWaveformer and extract a smaller "average" sample set of audio data
|
||||
AudioWaveformer waveformer(&r);
|
||||
for (auto channel = 0; channel < r.info.channels; channel++) {
|
||||
// Normalize values and scale them between -1 and +1
|
||||
AudioWaveformData waveform = waveformer.ExtractSamples(channel, 20, true);
|
||||
|
||||
if (channel == 0) {
|
||||
CHECK(waveform.rms_samples.size() == 107);
|
||||
CHECK(waveform.rms_samples[0] == Approx(0.07524f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[35] == Approx(0.20063f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[86] == Approx(0.2094f).margin(0.00001));
|
||||
CHECK(waveform.rms_samples[87] == Approx(0.0f).margin(0.00001));
|
||||
}
|
||||
|
||||
waveform.clear();
|
||||
}
|
||||
|
||||
// Clean up
|
||||
r.Close();
|
||||
}
|
||||
@@ -20,6 +20,7 @@ file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/examples/" TEST_MEDIA_PATH)
|
||||
### TEST SOURCE FILES
|
||||
###
|
||||
set(OPENSHOT_TESTS
|
||||
AudioWaveformer
|
||||
CacheDisk
|
||||
CacheMemory
|
||||
Clip
|
||||
|
||||
Reference in New Issue
Block a user