From c95fd837b686e0548d6f5d8f39f0bb22cef2f67d Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Wed, 4 Jun 2025 17:18:23 -0500 Subject: [PATCH] Small improvement to caching sws scale context and reusing AVFrame objects. About 1.5% less CPU calls, and more even memory allocations (less spikey). --- src/FFmpegReader.cpp | 58 ++++++++++++++++++++++++++------------------ src/FFmpegReader.h | 5 ++++ 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/FFmpegReader.cpp b/src/FFmpegReader.cpp index c42cc386..afdff878 100644 --- a/src/FFmpegReader.cpp +++ b/src/FFmpegReader.cpp @@ -74,8 +74,8 @@ FFmpegReader::FFmpegReader(const std::string &path, bool inspect_reader) seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0), current_video_frame(0), packet(NULL), max_concurrent_frames(OPEN_MP_NUM_PROCESSORS), audio_pts(0), video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL), - pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0}, - hold_packet(false) { + pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0}, + hold_packet(false) { // Initialize FFMpeg, and register all formats and codecs AV_REGISTER_ALL @@ -678,6 +678,13 @@ void FFmpegReader::Close() { } } #endif // USE_HW_ACCEL + if (img_convert_ctx) { + sws_freeContext(img_convert_ctx); + img_convert_ctx = nullptr; + } + if (pFrameRGB_cached) { + AV_FREE_FRAME(&pFrameRGB_cached); + } } // Close the audio codec @@ -686,6 +693,11 @@ void FFmpegReader::Close() { avcodec_flush_buffers(aCodecCtx); } AV_FREE_CONTEXT(aCodecCtx); + if (avr_ctx) { + SWR_CLOSE(avr_ctx); + SWR_FREE(&avr_ctx); + avr_ctx = nullptr; + } } // Clear final cache @@ -1469,15 +1481,17 @@ void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) { int width = info.width; int64_t video_length = info.video_length; - // Create variables for a RGB Frame (since most videos are not in RGB, we must convert it) - AVFrame *pFrameRGB = nullptr; + // Create or reuse a RGB Frame (since most videos are not in RGB, we must convert it) + AVFrame *pFrameRGB = pFrameRGB_cached; + if (!pFrameRGB) { + pFrameRGB = AV_ALLOCATE_FRAME(); + if (pFrameRGB == nullptr) + throw OutOfMemory("Failed to allocate frame buffer", path); + pFrameRGB_cached = pFrameRGB; + } + AV_RESET_FRAME(pFrameRGB); uint8_t *buffer = nullptr; - // Allocate an AVFrame structure - pFrameRGB = AV_ALLOCATE_FRAME(); - if (pFrameRGB == nullptr) - throw OutOfMemory("Failed to allocate frame buffer", path); - // Determine the max size of this source image (based on the timeline's size, the scaling mode, // and the scaling keyframes). This is a performance improvement, to keep the images as small as possible, // without losing quality. NOTE: We cannot go smaller than the timeline itself, or the add_layer timeline @@ -1564,8 +1578,9 @@ void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) { if (openshot::Settings::Instance()->HIGH_QUALITY_SCALING) { scale_mode = SWS_BICUBIC; } - SwsContext *img_convert_ctx = sws_getContext(info.width, info.height, AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx), width, - height, PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL); + img_convert_ctx = sws_getCachedContext(img_convert_ctx, info.width, info.height, AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx), width, height, PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL); + if (!img_convert_ctx) + throw OutOfMemory("Failed to initialize sws context", path); // Resize / Convert to RGB sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0, @@ -1590,11 +1605,10 @@ void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) { last_video_frame = f; // Free the RGB image - AV_FREE_FRAME(&pFrameRGB); + AV_RESET_FRAME(pFrameRGB); - // Remove frame and packet - RemoveAVFrame(pFrame); - sws_freeContext(img_convert_ctx); + // Remove frame and packet + RemoveAVFrame(pFrame); // Get video PTS in seconds video_pts_seconds = (double(video_pts) * info.video_timebase.ToDouble()) + pts_offset_seconds; @@ -1738,10 +1752,10 @@ void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) { audio_converted->nb_samples = audio_frame->nb_samples; av_samples_alloc(audio_converted->data, audio_converted->linesize, info.channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0); - SWRCONTEXT *avr = NULL; - - // setup resample context - avr = SWR_ALLOC(); + SWRCONTEXT *avr = avr_ctx; + // setup resample context if needed + if (!avr) { + avr = SWR_ALLOC(); #if HAVE_CH_LAYOUT av_opt_set_chlayout(avr, "in_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0); av_opt_set_chlayout(avr, "out_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0); @@ -1756,6 +1770,8 @@ void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) { av_opt_set_int(avr, "in_sample_rate", info.sample_rate, 0); av_opt_set_int(avr, "out_sample_rate", info.sample_rate, 0); SWR_INIT(avr); + avr_ctx = avr; + } // Convert audio samples int nb_samples = SWR_CONVERT(avr, // audio resample context @@ -1766,10 +1782,6 @@ void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) { audio_frame->linesize[0], // input plane size, in bytes (0 if unknown) audio_frame->nb_samples); // number of input samples to convert - // Deallocate resample buffer - SWR_CLOSE(avr); - SWR_FREE(&avr); - avr = NULL; int64_t starting_frame_number = -1; for (int channel_filter = 0; channel_filter < info.channels; channel_filter++) { diff --git a/src/FFmpegReader.h b/src/FFmpegReader.h index 06727d62..4ccd9554 100644 --- a/src/FFmpegReader.h +++ b/src/FFmpegReader.h @@ -148,6 +148,11 @@ namespace openshot { int64_t NO_PTS_OFFSET; PacketStatus packet_status; + // Cached conversion contexts and frames for performance + SwsContext *img_convert_ctx = nullptr; ///< Cached video scaler context + SWRCONTEXT *avr_ctx = nullptr; ///< Cached audio resample context + AVFrame *pFrameRGB_cached = nullptr; ///< Temporary frame used for video conversion + int hw_de_supported = 0; // Is set by FFmpegReader #if USE_HW_ACCEL AVPixelFormat hw_de_av_pix_fmt = AV_PIX_FMT_NONE;