From 7fdd145dc23dce631905b3f8061754bcb87912fd Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Sun, 14 Dec 2025 23:53:36 -0600 Subject: [PATCH] Adding new memory trimming to more forcefully return memory to the OS after major memory clearing events (i.e. clearing all cache, closing readers, or large amounts of cache cleared). Also, refactoring default cache sizes in Timeline, FrameMapper, and FFmpegReader to better support high frame rate and high resolution videos (i.e. 4k 60 fps) so we don't immediately run out of memory. --- src/CMakeLists.txt | 1 + src/CacheMemory.cpp | 25 ++++++++++++-- src/CacheMemory.h | 2 ++ src/FFmpegReader.cpp | 18 ++++++---- src/FFmpegReader.h | 1 - src/FrameMapper.cpp | 6 +++- src/MemoryTrim.cpp | 80 ++++++++++++++++++++++++++++++++++++++++++++ src/MemoryTrim.h | 30 +++++++++++++++++ src/Timeline.cpp | 10 +++--- src/Timeline.h | 1 - 10 files changed, 156 insertions(+), 18 deletions(-) create mode 100644 src/MemoryTrim.cpp create mode 100644 src/MemoryTrim.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 249122ea..168ae102 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -72,6 +72,7 @@ set(OPENSHOT_SOURCES Fraction.cpp Frame.cpp FrameMapper.cpp + MemoryTrim.cpp Json.cpp KeyFrame.cpp OpenShotVersion.cpp diff --git a/src/CacheMemory.cpp b/src/CacheMemory.cpp index b768c12f..bd9a6c2c 100644 --- a/src/CacheMemory.cpp +++ b/src/CacheMemory.cpp @@ -13,12 +13,13 @@ #include "CacheMemory.h" #include "Exceptions.h" #include "Frame.h" +#include "MemoryTrim.h" using namespace std; using namespace openshot; // Default constructor, no max bytes -CacheMemory::CacheMemory() : CacheBase(0) { +CacheMemory::CacheMemory() : CacheBase(0), bytes_freed_since_trim(0) { // Set cache type name cache_type = "CacheMemory"; range_version = 0; @@ -26,7 +27,7 @@ CacheMemory::CacheMemory() : CacheBase(0) { } // Constructor that sets the max bytes to cache -CacheMemory::CacheMemory(int64_t max_bytes) : CacheBase(max_bytes) { +CacheMemory::CacheMemory(int64_t max_bytes) : CacheBase(max_bytes), bytes_freed_since_trim(0) { // Set cache type name cache_type = "CacheMemory"; range_version = 0; @@ -161,6 +162,7 @@ void CacheMemory::Remove(int64_t start_frame_number, int64_t end_frame_number) { // Create a scoped lock, to protect the cache from multiple threads const std::lock_guard lock(*cacheMutex); + int64_t removed_bytes = 0; // Loop through frame numbers std::deque::iterator itr; @@ -180,6 +182,10 @@ void CacheMemory::Remove(int64_t start_frame_number, int64_t end_frame_number) { if (*itr_ordered >= start_frame_number && *itr_ordered <= end_frame_number) { + // Count bytes freed before erasing the frame + if (frames.count(*itr_ordered)) + removed_bytes += frames[*itr_ordered]->GetBytes(); + // erase frame number frames.erase(*itr_ordered); itr_ordered = ordered_frame_numbers.erase(itr_ordered); @@ -187,6 +193,17 @@ void CacheMemory::Remove(int64_t start_frame_number, int64_t end_frame_number) itr_ordered++; } + if (removed_bytes > 0) + { + bytes_freed_since_trim += removed_bytes; + if (bytes_freed_since_trim >= TRIM_THRESHOLD_BYTES) + { + // Periodically return freed arenas to the OS + if (TrimMemoryToOS()) + bytes_freed_since_trim = 0; + } + } + // Needs range processing (since cache has changed) needs_range_processing = true; } @@ -229,6 +246,10 @@ void CacheMemory::Clear() ordered_frame_numbers.clear(); ordered_frame_numbers.shrink_to_fit(); needs_range_processing = true; + bytes_freed_since_trim = 0; + + // Trim freed arenas back to OS after large clears + TrimMemoryToOS(true); } // Count the frames in the queue diff --git a/src/CacheMemory.h b/src/CacheMemory.h index e35fdb11..9972b102 100644 --- a/src/CacheMemory.h +++ b/src/CacheMemory.h @@ -28,8 +28,10 @@ namespace openshot { */ class CacheMemory : public CacheBase { private: + static constexpr int64_t TRIM_THRESHOLD_BYTES = 1024LL * 1024 * 1024; ///< Release memory after freeing this much memory std::map > frames; ///< This map holds the frame number and Frame objects std::deque frame_numbers; ///< This queue holds a sequential list of cached Frame numbers + int64_t bytes_freed_since_trim; ///< Tracks bytes freed to trigger a heap trim /// Clean up cached frames that exceed the max number of bytes void CleanUp(); diff --git a/src/FFmpegReader.cpp b/src/FFmpegReader.cpp index 5676214f..75990fce 100644 --- a/src/FFmpegReader.cpp +++ b/src/FFmpegReader.cpp @@ -23,6 +23,7 @@ #include "FFmpegReader.h" #include "Exceptions.h" +#include "MemoryTrim.h" #include "Timeline.h" #include "ZmqLogger.h" @@ -77,7 +78,7 @@ FFmpegReader::FFmpegReader(const std::string &path, DurationStrategy duration_st : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999), path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false), seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0), - current_video_frame(0), packet(NULL), max_concurrent_frames(OPEN_MP_NUM_PROCESSORS), duration_strategy(duration_strategy), + current_video_frame(0), packet(NULL), duration_strategy(duration_strategy), audio_pts(0), video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL), pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0}, hold_packet(false) { @@ -92,8 +93,8 @@ FFmpegReader::FFmpegReader(const std::string &path, DurationStrategy duration_st audio_pts_seconds = NO_PTS_OFFSET; // Init cache - working_cache.SetMaxBytesFromInfo(max_concurrent_frames * info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels); - final_cache.SetMaxBytesFromInfo(max_concurrent_frames * 2, info.width, info.height, info.sample_rate, info.channels); + working_cache.SetMaxBytesFromInfo(info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels); + final_cache.SetMaxBytesFromInfo(24, info.width, info.height, info.sample_rate, info.channels); // Open and Close the reader, to populate its attributes (such as height, width, etc...) if (inspect_reader) { @@ -610,8 +611,8 @@ void FFmpegReader::Open() { previous_packet_location.sample_start = 0; // Adjust cache size based on size of frame and audio - working_cache.SetMaxBytesFromInfo(max_concurrent_frames * info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels); - final_cache.SetMaxBytesFromInfo(max_concurrent_frames * 2, info.width, info.height, info.sample_rate, info.channels); + working_cache.SetMaxBytesFromInfo(info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels); + final_cache.SetMaxBytesFromInfo(24, info.width, info.height, info.sample_rate, info.channels); // Scan PTS for any offsets (i.e. non-zero starting streams). At least 1 stream must start at zero timestamp. // This method allows us to shift timestamps to ensure at least 1 stream is starting at zero. @@ -713,6 +714,9 @@ void FFmpegReader::Close() { avformat_close_input(&pFormatCtx); av_freep(&pFormatCtx); + // Release free’d arenas back to OS after heavy teardown + TrimMemoryToOS(true); + // Reset some variables last_frame = 0; hold_packet = false; @@ -1101,7 +1105,7 @@ std::shared_ptr FFmpegReader::ReadStream(int64_t requested_frame) { int packet_error = -1; // Debug output - ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame, "max_concurrent_frames", max_concurrent_frames); + ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame); // Loop through the stream until the correct frame is found while (true) { @@ -1939,7 +1943,7 @@ void FFmpegReader::Seek(int64_t requested_frame) { seek_count++; // If seeking near frame 1, we need to close and re-open the file (this is more reliable than seeking) - int buffer_amount = std::max(max_concurrent_frames, 8); + int buffer_amount = 12; if (requested_frame - buffer_amount < 20) { // prevent Open() from seeking again is_seeking = true; diff --git a/src/FFmpegReader.h b/src/FFmpegReader.h index 218bbc8f..f264754c 100644 --- a/src/FFmpegReader.h +++ b/src/FFmpegReader.h @@ -117,7 +117,6 @@ namespace openshot { bool is_duration_known; bool check_interlace; bool check_fps; - int max_concurrent_frames; DurationStrategy duration_strategy; CacheMemory working_cache; diff --git a/src/FrameMapper.cpp b/src/FrameMapper.cpp index fc0962b4..531cd571 100644 --- a/src/FrameMapper.cpp +++ b/src/FrameMapper.cpp @@ -17,6 +17,7 @@ #include "FrameMapper.h" #include "Exceptions.h" #include "Clip.h" +#include "MemoryTrim.h" #include "ZmqLogger.h" using namespace std; @@ -745,6 +746,9 @@ void FrameMapper::Close() SWR_FREE(&avr); avr = NULL; } + + // Release free’d arenas back to OS after heavy teardown + TrimMemoryToOS(true); } @@ -841,7 +845,7 @@ void FrameMapper::ChangeMapping(Fraction target_fps, PulldownType target_pulldow final_cache.Clear(); // Adjust cache size based on size of frame and audio - final_cache.SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS, info.width, info.height, info.sample_rate, info.channels); + final_cache.SetMaxBytesFromInfo(24, info.width, info.height, info.sample_rate, info.channels); // Deallocate resample buffer if (avr) { diff --git a/src/MemoryTrim.cpp b/src/MemoryTrim.cpp new file mode 100644 index 00000000..8a73ec50 --- /dev/null +++ b/src/MemoryTrim.cpp @@ -0,0 +1,80 @@ +/** + * @file + * @brief Cross-platform helper to encourage returning freed memory to the OS + * + * @ref License + */ + +// Copyright (c) 2008-2025 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "MemoryTrim.h" + +#include +#include +#include + +#if defined(__GLIBC__) +#include +#elif defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#endif + +namespace { +// Limit trim attempts to once per interval to avoid spamming platform calls +constexpr uint64_t kMinTrimIntervalMs = 1000; // 1s debounce +std::atomic g_last_trim_ms{0}; +std::atomic g_trim_in_progress{false}; + +uint64_t NowMs() { + using namespace std::chrono; + return duration_cast(steady_clock::now().time_since_epoch()).count(); +} +} // namespace + +namespace openshot { + +bool TrimMemoryToOS(bool force) noexcept { + const uint64_t now_ms = NowMs(); + const uint64_t last_ms = g_last_trim_ms.load(std::memory_order_relaxed); + + // Skip if we recently trimmed (unless forced) + if (!force && now_ms - last_ms < kMinTrimIntervalMs) + return false; + + // Only one trim attempt runs at a time + bool expected = false; + if (!g_trim_in_progress.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) + return false; + + bool did_trim = false; + +#if defined(__GLIBC__) + // GLIBC exposes malloc_trim to release free arenas back to the OS + malloc_trim(0); + did_trim = true; +#elif defined(_WIN32) + // MinGW/MSYS2 expose _heapmin to compact the CRT heap + _heapmin(); + did_trim = true; +#elif defined(__APPLE__) + // macOS uses the malloc zone API to relieve memory pressure + malloc_zone_t* zone = malloc_default_zone(); + malloc_zone_pressure_relief(zone, 0); + did_trim = true; +#else + // Platforms without a known trimming API + did_trim = false; +#endif + + if (did_trim) + g_last_trim_ms.store(now_ms, std::memory_order_relaxed); + + g_trim_in_progress.store(false, std::memory_order_release); + return did_trim; +} + +} // namespace openshot diff --git a/src/MemoryTrim.h b/src/MemoryTrim.h new file mode 100644 index 00000000..943fa0ae --- /dev/null +++ b/src/MemoryTrim.h @@ -0,0 +1,30 @@ +/** + * @file + * @brief Cross-platform helper to encourage returning freed memory to the OS + * + * @ref License + */ + +// Copyright (c) 2008-2025 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#pragma once + +namespace openshot { + +/** + * @brief Attempt to return unused heap memory to the operating system. + * + * This maps to the appropriate platform-specific API where available. + * The call is safe to invoke on any supported platform; on unsupported + * platforms it will simply return false without doing anything. + * Calls are rate-limited internally (1s debounce) and single-flight. A forced + * call bypasses the debounce but still honors the single-flight guard. + * + * @param force If true, bypass the debounce interval (useful for teardown). + * @return true if a platform-specific trim call was made, false otherwise. + */ +bool TrimMemoryToOS(bool force = false) noexcept; + +} // namespace openshot diff --git a/src/Timeline.cpp b/src/Timeline.cpp index 272ae0fd..26fa96bd 100644 --- a/src/Timeline.cpp +++ b/src/Timeline.cpp @@ -29,8 +29,7 @@ using namespace openshot; // Default Constructor for the timeline (which sets the canvas width and height) Timeline::Timeline(int width, int height, Fraction fps, int sample_rate, int channels, ChannelLayout channel_layout) : - is_open(false), auto_map_clips(true), managed_cache(true), path(""), - max_concurrent_frames(OPEN_MP_NUM_PROCESSORS), max_time(0.0) + is_open(false), auto_map_clips(true), managed_cache(true), path(""), max_time(0.0) { // Create CrashHandler and Attach (incase of errors) CrashHandler::Instance(); @@ -70,7 +69,7 @@ Timeline::Timeline(int width, int height, Fraction fps, int sample_rate, int cha // Init cache final_cache = new CacheMemory(); - final_cache->SetMaxBytesFromInfo(max_concurrent_frames * 4, info.width, info.height, info.sample_rate, info.channels); + final_cache->SetMaxBytesFromInfo(24, info.width, info.height, info.sample_rate, info.channels); } // Delegating constructor that copies parameters from a provided ReaderInfo @@ -80,8 +79,7 @@ Timeline::Timeline(const ReaderInfo info) : Timeline::Timeline( // Constructor for the timeline (which loads a JSON structure from a file path, and initializes a timeline) Timeline::Timeline(const std::string& projectPath, bool convert_absolute_paths) : - is_open(false), auto_map_clips(true), managed_cache(true), path(projectPath), - max_concurrent_frames(OPEN_MP_NUM_PROCESSORS), max_time(0.0) { + is_open(false), auto_map_clips(true), managed_cache(true), path(projectPath), max_time(0.0) { // Create CrashHandler and Attach (incase of errors) CrashHandler::Instance(); @@ -203,7 +201,7 @@ Timeline::Timeline(const std::string& projectPath, bool convert_absolute_paths) // Init cache final_cache = new CacheMemory(); - final_cache->SetMaxBytesFromInfo(max_concurrent_frames * 4, info.width, info.height, info.sample_rate, info.channels); + final_cache->SetMaxBytesFromInfo(24, info.width, info.height, info.sample_rate, info.channels); } Timeline::~Timeline() { diff --git a/src/Timeline.h b/src/Timeline.h index 61a164f0..46cdfb31 100644 --- a/src/Timeline.h +++ b/src/Timeline.h @@ -165,7 +165,6 @@ namespace openshot { std::set allocated_frame_mappers; ///< all the frame mappers we allocated and must free bool managed_cache; ///< Does this timeline instance manage the cache object std::string path; ///< Optional path of loaded UTF-8 OpenShot JSON project file - int max_concurrent_frames; ///< Max concurrent frames to process at one time double max_time; ///> The max duration (in seconds) of the timeline, based on the furthest clip (right edge) double min_time; ///> The min duration (in seconds) of the timeline, based on the position of the first clip (left edge)