From 5e4bc364cbd77645bf47bc177e36e13cfa894720 Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Thu, 5 Jun 2025 19:38:15 -0500 Subject: [PATCH] Remove per-thread scalers; use single persistent frames and SwsContext for video scaling. Total improvement of 8-9% when testing h.264 encoding @ 720p. --- src/FFmpegWriter.cpp | 254 +++++++++++++++++++++---------------------- src/FFmpegWriter.h | 17 +-- 2 files changed, 131 insertions(+), 140 deletions(-) diff --git a/src/FFmpegWriter.cpp b/src/FFmpegWriter.cpp index 340f3657..ddfa67bc 100644 --- a/src/FFmpegWriter.cpp +++ b/src/FFmpegWriter.cpp @@ -9,7 +9,7 @@ * @ref License */ -// Copyright (c) 2008-2024 OpenShot Studios, LLC, Fabrice Bellard +// Copyright (c) 2008-2025 OpenShot Studios, LLC, Fabrice Bellard // // SPDX-License-Identifier: LGPL-3.0-or-later @@ -75,8 +75,8 @@ static int set_hwframe_ctx(AVCodecContext *ctx, AVBufferRef *hw_device_ctx, int6 FFmpegWriter::FFmpegWriter(const std::string& path) : path(path), oc(NULL), audio_st(NULL), video_st(NULL), samples(NULL), audio_outbuf(NULL), audio_outbuf_size(0), audio_input_frame_size(0), audio_input_position(0), - initial_audio_input_frame_size(0), img_convert_ctx(NULL), num_of_rescalers(1), - rescaler_position(0), video_codec_ctx(NULL), audio_codec_ctx(NULL), is_writing(false), video_timestamp(0), audio_timestamp(0), + initial_audio_input_frame_size(0), img_convert_ctx(NULL), + video_codec_ctx(NULL), audio_codec_ctx(NULL), is_writing(false), video_timestamp(0), audio_timestamp(0), original_sample_rate(0), original_channels(0), avr(NULL), avr_planar(NULL), is_open(false), prepare_streams(false), write_header(false), write_trailer(false), audio_encoder_buffer_size(0), audio_encoder_buffer(NULL) { @@ -414,8 +414,6 @@ void FFmpegWriter::SetOption(StreamType stream, std::string name, std::string va else if (name == "cqp") { // encode quality and special settings like lossless - // This might be better in an extra methods as more options - // and way to set quality are possible #if USE_HW_ACCEL if (hw_en_on) { av_opt_set_int(c->priv_data, "qp", std::min(std::stoi(value),63), 0); // 0-63 @@ -464,8 +462,6 @@ void FFmpegWriter::SetOption(StreamType stream, std::string name, std::string va } } else if (name == "crf") { // encode quality and special settings like lossless - // This might be better in an extra methods as more options - // and way to set quality are possible #if USE_HW_ACCEL if (hw_en_on) { double mbs = 15000000.0; @@ -539,8 +535,6 @@ void FFmpegWriter::SetOption(StreamType stream, std::string name, std::string va } } else if (name == "qp") { // encode quality and special settings like lossless - // This might be better in an extra methods as more options - // and way to set quality are possible #if (LIBAVCODEC_VERSION_MAJOR >= 58) // FFmpeg 4.0+ switch (c->codec_id) { @@ -605,10 +599,7 @@ bool FFmpegWriter::IsValidCodec(std::string codec_name) { AV_REGISTER_ALL // Find the codec (if any) - if (avcodec_find_encoder_by_name(codec_name.c_str()) == NULL) - return false; - else - return true; + return avcodec_find_encoder_by_name(codec_name.c_str()) != NULL; } // Prepare & initialize streams and open codecs @@ -972,9 +963,9 @@ void FFmpegWriter::Close() { if (audio_st) close_audio(oc, audio_st); - // Deallocate image scalers - if (image_rescalers.size() > 0) - RemoveScalers(); + // Remove single software scaler + if (img_convert_ctx) + sws_freeContext(img_convert_ctx); if (!(oc->oformat->flags & AVFMT_NOFILE)) { /* close the output file */ @@ -1025,7 +1016,7 @@ AVStream *FFmpegWriter::add_audio_stream() { // Create a new audio stream AVStream* st = avformat_new_stream(oc, codec); if (!st) - throw OutOfMemory("Could not allocate memory for the video stream.", path); + throw OutOfMemory("Could not allocate memory for the audio stream.", path); // Allocate a new codec context for the stream ALLOC_CODEC_CTX(audio_codec_ctx, codec, st) @@ -1058,7 +1049,7 @@ AVStream *FFmpegWriter::add_audio_stream() { // Set sample rate c->sample_rate = info.sample_rate; -uint64_t channel_layout = info.channel_layout; + uint64_t channel_layout = info.channel_layout; #if HAVE_CH_LAYOUT // Set a valid number of channels (or throw error) AVChannelLayout ch_layout; @@ -1117,9 +1108,9 @@ uint64_t channel_layout = info.channel_layout; AV_COPY_PARAMS_FROM_CONTEXT(st, c); -int nb_channels; -const char* nb_channels_label; -const char* channel_layout_label; + int nb_channels; + const char* nb_channels_label; + const char* channel_layout_label; #if HAVE_CH_LAYOUT nb_channels = c->ch_layout.nb_channels; @@ -1559,7 +1550,7 @@ void FFmpegWriter::open_video(AVFormatContext *oc, AVStream *st) { av_dict_free(&opts); // Add video metadata (if any) - for (std::map::iterator iter = info.metadata.begin(); iter != info.metadata.end(); ++iter) { + for (auto iter = info.metadata.begin(); iter != info.metadata.end(); ++iter) { av_dict_set(&st->metadata, iter->first.c_str(), iter->second.c_str(), 0); } @@ -2059,69 +2050,128 @@ AVFrame *FFmpegWriter::allocate_avframe(PixelFormat pix_fmt, int width, int heig // process video frame void FFmpegWriter::process_video_packet(std::shared_ptr frame) { - // Determine the height & width of the source image - int source_image_width = frame->GetWidth(); - int source_image_height = frame->GetHeight(); + // Source dimensions (RGBA) + int src_w = frame->GetWidth(); + int src_h = frame->GetHeight(); - // Do nothing if size is 1x1 (i.e. no image in this frame) - if (source_image_height == 1 && source_image_width == 1) + // Skip empty frames (1×1) + if (src_w == 1 && src_h == 1) return; - // Init rescalers (if not initialized yet) - if (image_rescalers.size() == 0) - InitScalers(source_image_width, source_image_height); + // Point persistent_src_frame->data to RGBA pixels + const uchar* pixels = frame->GetPixels(); + if (!persistent_src_frame) { + persistent_src_frame = av_frame_alloc(); + if (!persistent_src_frame) + throw OutOfMemory("Could not allocate persistent_src_frame", path); + persistent_src_frame->format = AV_PIX_FMT_RGBA; + persistent_src_frame->width = src_w; + persistent_src_frame->height = src_h; + persistent_src_frame->linesize[0] = src_w * 4; + } + persistent_src_frame->data[0] = const_cast( + reinterpret_cast(pixels) + ); - // Get a unique rescaler (for this thread) - SwsContext *scaler = image_rescalers[rescaler_position]; - rescaler_position++; - if (rescaler_position == num_of_rescalers) - rescaler_position = 0; + // Prepare persistent_dst_frame + buffer on first use + if (!persistent_dst_frame) { + persistent_dst_frame = av_frame_alloc(); + if (!persistent_dst_frame) + throw OutOfMemory("Could not allocate persistent_dst_frame", path); - // Allocate an RGB frame & final output frame - int bytes_source = 0; - int bytes_final = 0; - AVFrame *frame_source = NULL; - const uchar *pixels = NULL; - - // Get a list of pixels from source image - pixels = frame->GetPixels(); - - // Init AVFrame for source image & final (converted image) - frame_source = allocate_avframe(PIX_FMT_RGBA, source_image_width, source_image_height, &bytes_source, (uint8_t *) pixels); -#if IS_FFMPEG_3_2 - AVFrame *frame_final; + // Decide destination pixel format: NV12 if HW accel is on, else encoder’s pix_fmt + AVPixelFormat dst_fmt = video_codec_ctx->pix_fmt; #if USE_HW_ACCEL - if (hw_en_on && hw_en_supported) { - frame_final = allocate_avframe(AV_PIX_FMT_NV12, info.width, info.height, &bytes_final, NULL); - } else -#endif // USE_HW_ACCEL - { - frame_final = allocate_avframe( - (AVPixelFormat)(video_st->codecpar->format), - info.width, info.height, &bytes_final, NULL + if (hw_en_on && hw_en_supported) { + dst_fmt = AV_PIX_FMT_NV12; + } +#endif + persistent_dst_frame->format = dst_fmt; + persistent_dst_frame->width = info.width; + persistent_dst_frame->height = info.height; + + persistent_dst_size = av_image_get_buffer_size( + dst_fmt, info.width, info.height, 1 + ); + if (persistent_dst_size < 0) + throw ErrorEncodingVideo("Invalid destination image size", -1); + + persistent_dst_buffer = static_cast( + av_malloc(persistent_dst_size) + ); + if (!persistent_dst_buffer) + throw OutOfMemory("Could not allocate persistent_dst_buffer", path); + + av_image_fill_arrays( + persistent_dst_frame->data, + persistent_dst_frame->linesize, + persistent_dst_buffer, + dst_fmt, + info.width, + info.height, + 1 ); } -#else - AVFrame *frame_final = allocate_avframe(video_codec_ctx->pix_fmt, info.width, info.height, &bytes_final, NULL); -#endif // IS_FFMPEG_3_2 - // Fill with data - AV_COPY_PICTURE_DATA(frame_source, (uint8_t *) pixels, PIX_FMT_RGBA, source_image_width, source_image_height); - ZmqLogger::Instance()->AppendDebugMethod( - "FFmpegWriter::process_video_packet", - "frame->number", frame->number, - "bytes_source", bytes_source, - "bytes_final", bytes_final); + // Initialize SwsContext (RGBA → dst_fmt) on first use + if (!img_convert_ctx) { + int flags = SWS_FAST_BILINEAR; + if (openshot::Settings::Instance()->HIGH_QUALITY_SCALING) { + flags = SWS_BICUBIC; + } + AVPixelFormat dst_fmt = video_codec_ctx->pix_fmt; +#if USE_HW_ACCEL + if (hw_en_on && hw_en_supported) { + dst_fmt = AV_PIX_FMT_NV12; + } +#endif + img_convert_ctx = sws_getContext( + src_w, src_h, AV_PIX_FMT_RGBA, + info.width, info.height, dst_fmt, + flags, NULL, NULL, NULL + ); + if (!img_convert_ctx) + throw ErrorEncodingVideo("Could not initialize sws context", -1); + } - // Resize & convert pixel format - sws_scale(scaler, frame_source->data, frame_source->linesize, 0, - source_image_height, frame_final->data, frame_final->linesize); + // Scale RGBA → dst_fmt into persistent_dst_buffer + sws_scale( + img_convert_ctx, + persistent_src_frame->data, + persistent_src_frame->linesize, + 0, src_h, + persistent_dst_frame->data, + persistent_dst_frame->linesize + ); - // Add resized AVFrame to av_frames map - add_avframe(frame, frame_final); + // Allocate a new AVFrame + buffer, then copy scaled data into it + int bytes_final = 0; + AVPixelFormat dst_fmt = video_codec_ctx->pix_fmt; +#if USE_HW_ACCEL + if (hw_en_on && hw_en_supported) { + dst_fmt = AV_PIX_FMT_NV12; + } +#endif - // Deallocate memory - AV_FREE_FRAME(&frame_source); + AVFrame* new_frame = allocate_avframe( + dst_fmt, + info.width, + info.height, + &bytes_final, + nullptr + ); + if (!new_frame) + throw OutOfMemory("Could not allocate new_frame via allocate_avframe", path); + + // Copy persistent_dst_buffer → new_frame buffer + memcpy( + new_frame->data[0], + persistent_dst_buffer, + static_cast(bytes_final) + ); + + // Queue the deep‐copied frame for encoding + add_avframe(frame, new_frame); } // write video frame @@ -2307,55 +2357,14 @@ void FFmpegWriter::OutputStreamInfo() { av_dump_format(oc, 0, path.c_str(), 1); } -// Init a collection of software rescalers (thread safe) -void FFmpegWriter::InitScalers(int source_width, int source_height) { - int scale_mode = SWS_FAST_BILINEAR; - if (openshot::Settings::Instance()->HIGH_QUALITY_SCALING) { - scale_mode = SWS_BICUBIC; - } - - // Init software rescalers vector (many of them, one for each thread) - for (int x = 0; x < num_of_rescalers; x++) { - // Init the software scaler from FFMpeg -#if USE_HW_ACCEL - if (hw_en_on && hw_en_supported) { - img_convert_ctx = sws_getContext(source_width, source_height, PIX_FMT_RGBA, - info.width, info.height, AV_PIX_FMT_NV12, scale_mode, NULL, NULL, NULL); - } else -#endif // USE_HW_ACCEL - { - img_convert_ctx = sws_getContext(source_width, source_height, PIX_FMT_RGBA, - info.width, info.height, AV_GET_CODEC_PIXEL_FORMAT(video_st, video_st->codec), - scale_mode, NULL, NULL, NULL); - } - - // Add rescaler to vector - image_rescalers.push_back(img_convert_ctx); - } -} - // Set audio resample options void FFmpegWriter::ResampleAudio(int sample_rate, int channels) { original_sample_rate = sample_rate; original_channels = channels; } -// Remove & deallocate all software scalers -void FFmpegWriter::RemoveScalers() { - // Close all rescalers - for (int x = 0; x < num_of_rescalers; x++) - sws_freeContext(image_rescalers[x]); - - // Clear vector - image_rescalers.clear(); -} - // In FFmpegWriter.cpp -void FFmpegWriter::AddSphericalMetadata(const std::string& projection, - float yaw_deg, - float pitch_deg, - float roll_deg) -{ +void FFmpegWriter::AddSphericalMetadata(const std::string& projection, float yaw_deg, float pitch_deg, float roll_deg) { if (!oc) return; if (!info.has_video || !video_st) return; @@ -2371,10 +2380,7 @@ void FFmpegWriter::AddSphericalMetadata(const std::string& projection, // Allocate the side‐data structure size_t sd_size = 0; AVSphericalMapping* map = av_spherical_alloc(&sd_size); - if (!map) { - // Allocation failed; skip metadata - return; - } + if (!map) return; // Populate it map->projection = static_cast(proj); @@ -2383,14 +2389,6 @@ void FFmpegWriter::AddSphericalMetadata(const std::string& projection, map->pitch = static_cast(pitch_deg * (1 << 16)); map->roll = static_cast(roll_deg * (1 << 16)); - // Attach to the video stream so movenc will emit an sv3d atom - av_stream_add_side_data( - video_st, - AV_PKT_DATA_SPHERICAL, - reinterpret_cast(map), - sd_size - ); -#else - // FFmpeg build too old: spherical side-data not supported + av_stream_add_side_data(video_st, AV_PKT_DATA_SPHERICAL, reinterpret_cast(map), sd_size); #endif } diff --git a/src/FFmpegWriter.h b/src/FFmpegWriter.h index 1f54eed9..4fab6a77 100644 --- a/src/FFmpegWriter.h +++ b/src/FFmpegWriter.h @@ -134,9 +134,10 @@ namespace openshot { uint8_t *audio_outbuf; uint8_t *audio_encoder_buffer; - int num_of_rescalers; - int rescaler_position; - std::vector image_rescalers; + AVFrame *persistent_src_frame = nullptr; + AVFrame *persistent_dst_frame = nullptr; + uint8_t *persistent_dst_buffer = nullptr; + int persistent_dst_size = 0; int audio_outbuf_size; int audio_input_frame_size; @@ -180,11 +181,6 @@ namespace openshot { /// initialize streams void initialize_streams(); - /// @brief Init a collection of software rescalers (thread safe) - /// @param source_width The source width of the image scalers (used to cache a bunch of scalers) - /// @param source_height The source height of the image scalers (used to cache a bunch of scalers) - void InitScalers(int source_width, int source_height); - /// open audio codec void open_audio(AVFormatContext *oc, AVStream *st); @@ -230,9 +226,6 @@ namespace openshot { /// by the Open() method if this method has not yet been called. void PrepareStreams(); - /// Remove & deallocate all software scalers - void RemoveScalers(); - /// @brief Set audio resample options /// @param sample_rate The number of samples per second of the audio /// @param channels The number of audio channels @@ -327,6 +320,6 @@ namespace openshot { }; -} +} // namespace openshot #endif