From 79ab53dcd9e91cc8d1d8b041a53aeb6018bc736e Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Sat, 27 Feb 2021 16:43:13 +1000 Subject: [PATCH] Add ReadCache --- src/read/mod.rs | 5 ++ src/read/read_cache.rs | 130 +++++++++++++++++++++++++++++++++++++++++ tests/parse_self.rs | 15 ++++- 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 src/read/read_cache.rs diff --git a/src/read/mod.rs b/src/read/mod.rs index 0963210..674167e 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -10,6 +10,11 @@ use crate::{ByteString, Endianness}; mod read_ref; pub use read_ref::*; +#[cfg(feature = "std")] +mod read_cache; +#[cfg(feature = "std")] +pub use read_cache::*; + mod util; pub use util::StringTable; diff --git a/src/read/read_cache.rs b/src/read/read_cache.rs new file mode 100644 index 0000000..6a6ee02 --- /dev/null +++ b/src/read/read_cache.rs @@ -0,0 +1,130 @@ +use std::boxed::Box; +use std::cell::RefCell; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::convert::TryInto; +use std::io::{Read, Seek, SeekFrom}; +use std::mem; + +use crate::read::ReadRef; + +/// An implementation of `ReadRef` for data in a stream that implements +/// `Read + Seek`. +/// +/// Contains a cache of read-only blocks of data, allowing references to +/// them to be returned. Entries in the cache are never removed. +/// Entries are keyed on the offset and size of the read. +/// Currently overlapping reads are considered separate reads. +#[derive(Debug)] +pub struct ReadCache { + cache: RefCell>, +} + +#[derive(Debug)] +struct ReadCacheInternal { + read: R, + bufs: HashMap<(u64, u64), Box<[u8]>>, +} + +impl ReadCache { + /// Create an empty `ReadCache` for the given stream. + pub fn new(read: R) -> Self { + ReadCache { + cache: RefCell::new(ReadCacheInternal { + read, + bufs: HashMap::new(), + }), + } + } + + /// Return an implementation of `ReadRef` that restricts reads + /// to the given range of the stream. + pub fn range<'a>(&'a self, offset: u64, size: u64) -> ReadCacheRange<'a, R> { + ReadCacheRange { + r: self, + offset, + size, + } + } + + /// Free buffers used by the cache. + pub fn clear(&mut self) { + self.cache.borrow_mut().bufs.clear(); + } + + /// Unwrap this `ReadCache`, returning the underlying reader. + pub fn into_inner(self) -> R { + self.cache.into_inner().read + } +} + +impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache { + fn len(self) -> Result { + let cache = &mut *self.cache.borrow_mut(); + cache.read.seek(SeekFrom::End(0)).map_err(|_| ()) + } + + fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8], ()> { + if size == 0 { + return Ok(&[]); + } + let cache = &mut *self.cache.borrow_mut(); + let buf = match cache.bufs.entry((offset, size)) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(entry) => { + let size = size.try_into().map_err(|_| ())?; + cache + .read + .seek(SeekFrom::Start(offset as u64)) + .map_err(|_| ())?; + let mut bytes = vec![0; size].into_boxed_slice(); + cache.read.read_exact(&mut bytes).map_err(|_| ())?; + entry.insert(bytes) + } + }; + // Extend the lifetime to that of self. + // This is OK because we never mutate or remove entries. + Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) }) + } +} + +/// An implementation of `ReadRef` for a range of data in a stream that +/// implements `Read + Seek`. +/// +/// Shares an underlying `ReadCache` with a lifetime of `'a`. +#[derive(Debug)] +pub struct ReadCacheRange<'a, R: Read + Seek> { + r: &'a ReadCache, + offset: u64, + size: u64, +} + +impl<'a, R: Read + Seek> Clone for ReadCacheRange<'a, R> { + fn clone(&self) -> Self { + Self { + r: self.r, + offset: self.offset, + size: self.size, + } + } +} + +impl<'a, R: Read + Seek> Copy for ReadCacheRange<'a, R> {} + +impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> { + fn len(self) -> Result { + Ok(self.size) + } + + fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8], ()> { + if size == 0 { + return Ok(&[]); + } + let end = offset.checked_add(size).ok_or(())?; + if end > self.size { + return Err(()); + } + let r_offset = self.offset.checked_add(offset).ok_or(())?; + self.r.read_bytes_at(r_offset, size) + } +} diff --git a/tests/parse_self.rs b/tests/parse_self.rs index 0367d35..1e7df67 100644 --- a/tests/parse_self.rs +++ b/tests/parse_self.rs @@ -6,7 +6,20 @@ use std::{env, fs}; fn parse_self() { let exe = env::current_exe().unwrap(); let data = fs::read(exe).unwrap(); - let object = File::parse(&data).unwrap(); + let object = File::parse(&*data).unwrap(); + assert!(object.entry() != 0); + assert!(object.sections().count() != 0); +} + +#[cfg(feature = "std")] +#[test] +fn parse_self_cache() { + use object::read::{ReadCache, ReadRef}; + let exe = env::current_exe().unwrap(); + let file = fs::File::open(exe).unwrap(); + let cache = ReadCache::new(file); + let data = cache.range(0, cache.len().unwrap()); + let object = File::parse(data).unwrap(); assert!(object.entry() != 0); assert!(object.sections().count() != 0); }