From 1ec7128a090c7802f69dfb4d971e2f36aa2014d2 Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Wed, 22 Aug 2012 17:31:12 -0500 Subject: [PATCH] Fixed a huge bug where the FFmpegWriter was not scaling colors to 8 bit, and that would crash on certain colors being used in an image. Also, added a new, experimental waveform visualization to the encoder. --- include/FFmpegWriter.h | 2 +- include/FileWriterBase.h | 1 + include/Frame.h | 12 +++- src/FFmpegWriter.cpp | 50 +++++++++++---- src/FileWriterBase.cpp | 2 + src/Frame.cpp | 130 +++++++++++++++++++++++++-------------- src/Main.cpp | 10 +-- 7 files changed, 143 insertions(+), 64 deletions(-) diff --git a/include/FFmpegWriter.h b/include/FFmpegWriter.h index 929f8a86..2c0c8101 100644 --- a/include/FFmpegWriter.h +++ b/include/FFmpegWriter.h @@ -134,7 +134,7 @@ namespace openshot void OutputStreamInfo(); /// Set audio export options - void SetAudioOptions(bool has_audio, string codec, int sample_rate, int channels, int bit_rate); + void SetAudioOptions(bool has_audio, string codec, int sample_rate, int channels, int bit_rate, bool visualize); /// Set the cache size (number of frames to queue before writing) int SetCacheSize(int new_size) { cache_size = new_size; }; diff --git a/include/FileWriterBase.h b/include/FileWriterBase.h index f5bc92df..22dedc32 100644 --- a/include/FileWriterBase.h +++ b/include/FileWriterBase.h @@ -48,6 +48,7 @@ namespace openshot int channels; ///< The number of audio channels used in the audio stream int audio_stream_index; ///< The index of the audio stream Fraction audio_timebase; ///< The audio timebase determines how long each audio packet should be played + bool visualize; ///< Replace the video stream with a waveform visualization of the audio samples }; /** diff --git a/include/Frame.h b/include/Frame.h index b62316f6..3e59810f 100644 --- a/include/Frame.h +++ b/include/Frame.h @@ -36,6 +36,7 @@ namespace openshot private: Magick::Image *image; Magick::Image *small_image; + Magick::Image *wave_image; juce::AudioSampleBuffer *audio; Fraction pixel_ratio; int sample_rate; @@ -77,6 +78,9 @@ namespace openshot /// Experimental method to add effects to this frame void AddEffect(string name); + /// Clear the waveform image (and deallocate it's memory) + void ClearWaveform(); + /// Copy data and pointers from another Frame instance void DeepCopy(const Frame& other); @@ -87,7 +91,7 @@ namespace openshot void Display(); /// Display the wave form - void DisplayWaveform(bool resize); + void DisplayWaveform(); /// Get an array of sample data float* GetAudioSamples(int channel); @@ -116,6 +120,12 @@ namespace openshot /// Get height of image int GetHeight(); + /// Get an audio waveform image + Magick::Image* GetWaveform(int width, int height); + + /// Get an audio waveform image pixels + const Magick::PixelPacket* GetWaveformPixels(int width, int height); + /// Get height of image int GetWidth(); diff --git a/src/FFmpegWriter.cpp b/src/FFmpegWriter.cpp index 3d92f2e4..93435fb1 100644 --- a/src/FFmpegWriter.cpp +++ b/src/FFmpegWriter.cpp @@ -146,7 +146,7 @@ void FFmpegWriter::SetVideoOptions(bool has_video, string codec, Fraction fps, i } // Set audio export options -void FFmpegWriter::SetAudioOptions(bool has_audio, string codec, int sample_rate, int channels, int bit_rate) +void FFmpegWriter::SetAudioOptions(bool has_audio, string codec, int sample_rate, int channels, int bit_rate, bool visualize) { // Set audio options if (codec.length() > 0) @@ -172,6 +172,7 @@ void FFmpegWriter::SetAudioOptions(bool has_audio, string codec, int sample_rate // Enable / Disable audio info.has_audio = has_audio; + info.visualize = visualize; } // Set custom options (some codecs accept additional params) @@ -880,38 +881,59 @@ void FFmpegWriter::process_video_packet(Frame* frame) AVCodecContext *c; c = video_st->codec; + int source_image_width = frame->GetWidth(); + int source_image_height = frame->GetHeight(); + + // If visualizing waveform (replace image with waveform image) + if (info.visualize) + { + source_image_width = info.width; + source_image_height = info.height; + } + // Initialize the software scaler (if needed) if (!img_convert_ctx) // Init the software scaler from FFMpeg - img_convert_ctx = sws_getContext(frame->GetWidth(), frame->GetHeight(), PIX_FMT_RGB24, info.width, info.height, c->pix_fmt, SWS_FAST_BILINEAR, NULL, NULL, NULL); + img_convert_ctx = sws_getContext(source_image_width, source_image_height, PIX_FMT_RGB24, info.width, info.height, c->pix_fmt, SWS_FAST_BILINEAR, NULL, NULL, NULL); if (img_convert_ctx == NULL) throw OutOfMemory("Could not allocate SwsContext.", path); SwsContext *scaler = img_convert_ctx; - #pragma omp task firstprivate(frame, c, scaler) + #pragma task firstprivate(frame, c, scaler, source_image_width, source_image_height) { // Allocate an RGB frame & final output frame int bytes_source = 0; int bytes_final = 0; - AVFrame *frame_source = allocate_avframe(PIX_FMT_RGB24, frame->GetWidth(), frame->GetHeight(), &bytes_source); + AVFrame *frame_source = NULL; + const Magick::PixelPacket *pixel_packets = NULL; + + // If visualizing waveform (replace image with waveform image) + if (!info.visualize) + // Get a list of pixels from source image + pixel_packets = frame->GetPixels(); + else + // Get a list of pixels from waveform image + pixel_packets = frame->GetWaveformPixels(source_image_width, source_image_height); + + // Init AVFrame for source image & final (converted image) + frame_source = allocate_avframe(PIX_FMT_RGB24, source_image_width, source_image_height, &bytes_source); AVFrame *frame_final = allocate_avframe(c->pix_fmt, info.width, info.height, &bytes_final); - // Get a list of pixels from the frame. - const Magick::PixelPacket *pixel_packets = frame->GetPixels(); - // Fill the AVFrame with RGB image data - for (int packet = 0, row = 0; row < bytes_source; packet++, row+=3) + int source_total_pixels = source_image_width * source_image_height; + for (int packet = 0, row = 0; packet < source_total_pixels; packet++, row+=3) { // Update buffer (which is already linked to the AVFrame: pFrameRGB) - frame_source->data[0][row] = pixel_packets[packet].red; - frame_source->data[0][row+1] = pixel_packets[packet].green; - frame_source->data[0][row+2] = pixel_packets[packet].blue; + // Each color needs to be 8 bit (so I'm bit shifting the 16 bit ints) + frame_source->data[0][row] = pixel_packets[packet].red >> 8; + frame_source->data[0][row+1] = pixel_packets[packet].green >> 8; + frame_source->data[0][row+2] = pixel_packets[packet].blue >> 8; } // Resize & convert pixel format #pragma omp critical (image_scaler) sws_scale(scaler, frame_source->data, frame_source->linesize, 0, - frame->GetHeight(), frame_final->data, frame_final->linesize); + source_image_height, frame_final->data, frame_final->linesize); // Add resized AVFrame to av_frames map #pragma omp critical (av_frames_section) @@ -921,6 +943,10 @@ void FFmpegWriter::process_video_packet(Frame* frame) av_free(frame_source->data[0]); av_free(frame_source); + if (info.visualize) + // Deallocate the waveform's image (if needed) + frame->ClearWaveform(); + } // end task } diff --git a/src/FileWriterBase.cpp b/src/FileWriterBase.cpp index 2088d38c..59c6b4e9 100644 --- a/src/FileWriterBase.cpp +++ b/src/FileWriterBase.cpp @@ -28,6 +28,7 @@ void FileWriterBase::InitFileInfo() info.channels = 0; info.audio_stream_index = -1; info.audio_timebase = Fraction(); + info.visualize = false; } // Display file information @@ -65,6 +66,7 @@ void FileWriterBase::DisplayInfo() { cout << "--> # of Channels: " << info.channels << endl; cout << "--> Audio Stream Index: " << info.audio_stream_index << endl; cout << "--> Audio Timebase: " << info.audio_timebase.ToDouble() << " (" << info.audio_timebase.num << "/" << info.audio_timebase.den << ")" << endl; + cout << "--> Visualize Waveform: " << info.visualize << endl; cout << "----------------------------" << endl; } diff --git a/src/Frame.cpp b/src/Frame.cpp index 929b3129..09c1d977 100644 --- a/src/Frame.cpp +++ b/src/Frame.cpp @@ -10,7 +10,7 @@ using namespace std; using namespace openshot; // Constructor - blank frame (300x200 blank image, 48kHz audio silence) -Frame::Frame() : number(1), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(2) +Frame::Frame() : number(1), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(2), wave_image(NULL) { // Init the image magic and audio buffer image = new Magick::Image(Magick::Geometry(1,1), Magick::Color("red")); @@ -22,7 +22,7 @@ Frame::Frame() : number(1), image(0), audio(0), pixel_ratio(1,1), sample_rate(48 // Constructor - image only (48kHz audio silence) Frame::Frame(int number, int width, int height, string color) - : number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(2) + : number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(2), wave_image(NULL) { // Init the image magic and audio buffer image = new Magick::Image(Magick::Geometry(1, 1), Magick::Color(color)); @@ -34,7 +34,7 @@ Frame::Frame(int number, int width, int height, string color) // Constructor - image only from pixel array (48kHz audio silence) Frame::Frame(int number, int width, int height, const string map, const Magick::StorageType type, const void *pixels) - : number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(2) + : number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(2), wave_image(NULL) { // Init the image magic and audio buffer image = new Magick::Image(width, height, map, type, pixels); @@ -46,7 +46,7 @@ Frame::Frame(int number, int width, int height, const string map, const Magick:: // Constructor - audio only (300x200 blank image) Frame::Frame(int number, int samples, int channels) : - number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(channels) + number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(channels), wave_image(NULL) { // Init the image magic and audio buffer image = new Magick::Image(Magick::Geometry(1, 1), Magick::Color("white")); @@ -58,7 +58,7 @@ Frame::Frame(int number, int samples, int channels) : // Constructor - image & audio Frame::Frame(int number, int width, int height, string color, int samples, int channels) - : number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(channels) + : number(number), image(0), audio(0), pixel_ratio(1,1), sample_rate(48000), channels(channels), wave_image(NULL) { // Init the image magic and audio buffer image = new Magick::Image(Magick::Geometry(1, 1), Magick::Color(color)); @@ -146,99 +146,139 @@ void Frame::Display() } } -// Display the wave form -void Frame::DisplayWaveform(bool resize) +// Get an audio waveform image +Magick::Image* Frame::GetWaveform(int width, int height) { - // Create blank image - Magick::Image wave_image; + // Clear any existing waveform image + ClearWaveform(); // Init a list of lines list lines; lines.push_back(Magick::DrawableFillColor("#0070ff")); lines.push_back(Magick::DrawablePointSize(16)); - // Calculate the width of an image based on the # of samples - int width = audio->getNumSamples(); + // Calculate 1/2 the width of an image based on the # of samples + int total_samples = audio->getNumSamples(); - if (width > 0) + // Determine how many samples can be skipped (to speed things up) + int step = 1; + if (total_samples > width) + // Set the # of samples to move forward for each pixel we draw + step = round((float) total_samples / (float) width) + 1; + + if (total_samples > 0) { // If samples are present... - int height = 200 * audio->getNumChannels(); + int new_height = 200 * audio->getNumChannels(); int height_padding = 20 * (audio->getNumChannels() - 1); - int total_height = height + height_padding; - wave_image = Magick::Image(Magick::Geometry(width, total_height), Magick::Color("#000000")); + int total_height = new_height + height_padding; + int total_width = 0; // Loop through each audio channel int Y = 100; for (int channel = 0; channel < audio->getNumChannels(); channel++) { + int X = 0; + + // Change stroke and color + lines.push_back(Magick::DrawableStrokeColor("#0070ff")); + lines.push_back(Magick::DrawableStrokeWidth(1)); + // Get audio for this channel float *samples = audio->getSampleData(channel); - for (int sample = 0; sample < audio->getNumSamples(); sample++) + for (int sample = 0; sample < audio->getNumSamples(); sample+=step, X++) { // Sample value (scaled to -100 to 100) float value = samples[sample] * 100; - if (value > 100 || value < -100) - { - cout << "TOO BIG: Sample # " << sample << " on frame " << number << " is TOO BIG: " << samples[sample] << endl; - } - // Append a line segment for each sample if (value != 0.0) - { // LINE - lines.push_back(Magick::DrawableStrokeColor("#0070ff")); - lines.push_back(Magick::DrawableStrokeWidth(1)); - lines.push_back(Magick::DrawableLine(sample,Y, sample,Y-value)); // sample=X coordinate, Y=100 is the middle - } + lines.push_back(Magick::DrawableLine(X,Y, X,Y-value)); // sample=X coordinate, Y=100 is the middle else - { // DOT - lines.push_back(Magick::DrawableFillColor("#0070ff")); - lines.push_back(Magick::DrawableStrokeWidth(1)); - lines.push_back(Magick::DrawablePoint(sample,Y)); - } + lines.push_back(Magick::DrawablePoint(X,Y)); } // Add Channel Label - stringstream label; - label << "Channel " << channel; - lines.push_back(Magick::DrawableStrokeColor("#ffffff")); - lines.push_back(Magick::DrawableFillColor("#ffffff")); - lines.push_back(Magick::DrawableStrokeWidth(0.1)); - lines.push_back(Magick::DrawableText(5, Y - 5, label.str())); +// stringstream label; +// label << "Channel " << channel; +// lines.push_back(Magick::DrawableStrokeColor("#ffffff")); +// lines.push_back(Magick::DrawableFillColor("#ffffff")); +// lines.push_back(Magick::DrawableStrokeWidth(0.1)); +// lines.push_back(Magick::DrawableText(5, Y - 5, label.str())); // Increment Y Y += (200 + height_padding); + total_width = X; } + // Create image + wave_image = new Magick::Image(Magick::Geometry(total_width, total_height), Magick::Color("#000000")); + // Draw the waveform - wave_image.draw(lines); + wave_image->draw(lines); // Resize Image (if requested) - if (resize) - // Resize to 60% - wave_image.resize(Magick::Geometry(width * 0.6, total_height * 0.6)); + if (width != total_width || height != total_height) + { + Magick::Geometry new_size(width, height); + new_size.aspect(true); + wave_image->resize(new_size); + } + } else { // No audio samples present - wave_image = Magick::Image(Magick::Geometry(720, 480), Magick::Color("#000000")); + wave_image = new Magick::Image(Magick::Geometry(width, height), Magick::Color("#000000")); // Add Channel Label lines.push_back(Magick::DrawableStrokeColor("#ffffff")); lines.push_back(Magick::DrawableFillColor("#ffffff")); lines.push_back(Magick::DrawableStrokeWidth(0.1)); - lines.push_back(Magick::DrawableText(265, 240, "No Audio Samples Found")); + lines.push_back(Magick::DrawableText((width / 2) - 100, height / 2, "No Audio Samples Found")); // Draw the waveform - wave_image.draw(lines); + wave_image->draw(lines); } + // Return new image + return wave_image; +} + +// Clear the waveform image (and deallocate it's memory) +void Frame::ClearWaveform() +{ + if (wave_image) + { + delete wave_image; + wave_image = NULL; + } +} + +// Get an audio waveform image pixels +const Magick::PixelPacket* Frame::GetWaveformPixels(int width, int height) +{ + // Get audio wave form image + Magick::Image *wave_image = GetWaveform(width, height); + + // Return array of pixel packets + return wave_image->getConstPixels(0,0, wave_image->columns(), wave_image->rows()); +} + +// Display the wave form +void Frame::DisplayWaveform() +{ + // Get audio wave form image + Magick::Image *wave_image = GetWaveform(720, 480); + // Display Image - wave_image.display(); + wave_image->display(); + + // Deallocate waveform image + ClearWaveform(); } // Get an array of sample data diff --git a/src/Main.cpp b/src/Main.cpp index ab3e0ebc..f0488943 100644 --- a/src/Main.cpp +++ b/src/Main.cpp @@ -20,11 +20,11 @@ int main() // openshot::FFmpegReader r("../../src/examples/piano.wav"); // openshot::FFmpegReader r("/home/jonathan/Videos/big-buck-bunny_trailer.webm"); - // openshot::FFmpegReader r("/home/jonathan/Videos/sintel-1024-stereo.mp4"); + openshot::FFmpegReader r("/home/jonathan/Videos/sintel-1024-stereo.mp4"); // openshot::FFmpegReader r("/home/jonathan/Videos/OpenShot_Now_In_3d.mp4"); // openshot::FFmpegReader r("/home/jonathan/Videos/sintel_trailer-720p.mp4"); // openshot::FFmpegReader r("/home/jonathan/Aptana Studio Workspace/OpenShotLibrary/src/examples/piano.wav"); - openshot::FFmpegReader r("/home/jonathan/Music/Army of Lovers/Crucified/Army of Lovers - Crucified [Single Version].mp3"); + // openshot::FFmpegReader r("/home/jonathan/Music/Army of Lovers/Crucified/Army of Lovers - Crucified [Single Version].mp3"); // openshot::FFmpegReader r("/home/jonathan/Documents/OpenShot Art/test.jpeg"); // openshot::FFmpegReader r("/home/jonathan/Videos/60fps.mp4"); // openshot::FFmpegReader r("/home/jonathan/Aptana Studio Workspace/OpenShotLibrary/src/examples/asdf.wdf"); @@ -37,7 +37,7 @@ int main() w.DisplayInfo(); // Set options - w.SetAudioOptions(true, "libvorbis", 44100, 2, 128000); + w.SetAudioOptions(true, "libvorbis", 44100, 2, 128000, false); w.SetVideoOptions(true, "libvpx", Fraction(24, 1), 640, 360, Fraction(1,1), false, false, 2000000); // Prepare Streams @@ -67,12 +67,12 @@ int main() //Frame *f = r.GetFrame(1); - for (int frame = 1; frame <= 1000; frame++) + for (int frame = 1; frame <= 2000; frame++) { Frame *f = r.GetFrame(frame); //if (f->number == 307 || f->number == 308 || f->number == 309 || f->number == 310) - // f->DisplayWaveform(false); + //f->DisplayWaveform(); // Apply effect //f->AddEffect("flip");