You've already forked crosspoint-reader
mirror of
https://github.com/crosspoint-reader/crosspoint-reader.git
synced 2026-04-29 10:26:52 -07:00
9bc5111c77
## Summary * **What is the goal of this PR?** Slightly increase the OOM limit when loading epubs * **What changes are included?** Switched vectors for parsing epubs to deques, allowing use of more memory ## Additional Context * Increases loadable epub size from 2000+ chapter/ToC entries loadable to 5000+ chapter/ToC entries * #1574, but without the complicated stuff --- ### AI Usage While CrossPoint doesn't have restrictions on AI tools in contributing, please be transparent about their usage as it helps set the right context for reviewers. Did you use AI tools to help write this code? _**NO**_ ### Testing | Commit | Book | Time | |----------|------|-------| |83cd96bc2f| 1000.epub | ~30 sec | |83cd96bc2f| 5000.epub | crash | | PR | 1000.epub | ~29 sec | | PR | 5000.epub | ~2 min 20 sec | => No actual loading time regressions [tested_epubs.zip](https://github.com/user-attachments/files/26645243/tested_epubs.zip)
73 lines
2.6 KiB
C++
73 lines
2.6 KiB
C++
#pragma once
|
|
#include <HalStorage.h>
|
|
|
|
#include <deque>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
|
|
class ZipFile {
|
|
public:
|
|
struct FileStatSlim {
|
|
uint16_t method; // Compression method
|
|
uint32_t compressedSize; // Compressed size
|
|
uint32_t uncompressedSize; // Uncompressed size
|
|
uint32_t localHeaderOffset; // Offset of local file header
|
|
};
|
|
|
|
struct ZipDetails {
|
|
uint32_t centralDirOffset;
|
|
uint16_t totalEntries;
|
|
bool isSet;
|
|
};
|
|
|
|
// Target for batch uncompressed size lookup (sorted by hash, then len)
|
|
struct SizeTarget {
|
|
uint64_t hash; // FNV-1a 64-bit hash of normalized path
|
|
uint16_t len; // Length of path for collision reduction
|
|
uint16_t index; // Caller's index (e.g. spine index)
|
|
};
|
|
|
|
// FNV-1a 64-bit hash computed from char buffer (no std::string allocation)
|
|
static uint64_t fnvHash64(const char* s, size_t len) {
|
|
uint64_t hash = 14695981039346656037ull;
|
|
for (size_t i = 0; i < len; i++) {
|
|
hash ^= static_cast<uint8_t>(s[i]);
|
|
hash *= 1099511628211ull;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
private:
|
|
const std::string& filePath;
|
|
FsFile file;
|
|
ZipDetails zipDetails = {0, 0, false};
|
|
std::unordered_map<std::string, FileStatSlim> fileStatSlimCache;
|
|
|
|
// Cursor for sequential central-dir scanning optimization
|
|
uint32_t lastCentralDirPos = 0;
|
|
bool lastCentralDirPosValid = false;
|
|
|
|
bool loadFileStatSlim(const char* filename, FileStatSlim* fileStat);
|
|
long getDataOffset(const FileStatSlim& fileStat);
|
|
bool loadZipDetails();
|
|
|
|
public:
|
|
explicit ZipFile(const std::string& filePath) : filePath(filePath) {}
|
|
~ZipFile() = default;
|
|
// Zip file can be opened and closed by hand in order to allow for quick calculation of inflated file size
|
|
// It is NOT recommended to pre-open it for any kind of inflation due to memory constraints
|
|
bool isOpen() const { return !!file; }
|
|
bool open();
|
|
bool close();
|
|
bool loadAllFileStatSlims();
|
|
bool getInflatedFileSize(const char* filename, size_t* size);
|
|
// Batch lookup: scan ZIP central dir once and fill sizes for matching targets.
|
|
// targets must be sorted by (hash, len). sizes[target.index] receives uncompressedSize.
|
|
// Returns number of targets matched.
|
|
int fillUncompressedSizes(std::deque<SizeTarget>& targets, std::deque<uint32_t>& sizes);
|
|
// Due to the memory required to run each of these, it is recommended to not preopen the zip file for multiple
|
|
// These functions will open and close the zip as needed
|
|
uint8_t* readFileToMemory(const char* filename, size_t* size = nullptr, bool trailingNullByte = false);
|
|
bool readFileToStream(const char* filename, Print& out, size_t chunkSize);
|
|
};
|