From 20e42a499a7af4bda4f8010c76bdf312df9bcd4c Mon Sep 17 00:00:00 2001 From: Luke Street Date: Mon, 4 Mar 2024 18:06:21 -0700 Subject: [PATCH] Rework .splitmeta, now .note.split Uses actual ELF .note format, which is more standard and handled better by mwld. --- objdiff-core/src/obj/elf.rs | 11 +- objdiff-core/src/obj/mod.rs | 4 +- objdiff-core/src/obj/split_meta.rs | 214 ++++++++++++++++++----------- 3 files changed, 135 insertions(+), 94 deletions(-) diff --git a/objdiff-core/src/obj/elf.rs b/objdiff-core/src/obj/elf.rs index 06ad30b..c3db386 100644 --- a/objdiff-core/src/obj/elf.rs +++ b/objdiff-core/src/obj/elf.rs @@ -328,13 +328,12 @@ fn line_info(obj_file: &File<'_>) -> Result>> { // DWARF 2+ #[cfg(feature = "dwarf")] { - use std::borrow::Cow; let dwarf_cow = gimli::Dwarf::load(|id| { Ok::<_, gimli::Error>( obj_file .section_by_name(id.name()) .and_then(|section| section.uncompressed_data().ok()) - .unwrap_or(Cow::Borrowed(&[][..])), + .unwrap_or(std::borrow::Cow::Borrowed(&[][..])), ) })?; let endian = match obj_file.endianness() { @@ -407,13 +406,7 @@ pub fn has_function(obj_path: &Path, symbol_name: &str) -> Result { fn split_meta(obj_file: &File<'_>) -> Result> { Ok(if let Some(section) = obj_file.section_by_name(SPLITMETA_SECTION) { - if section.size() != 0 { - let data = section.uncompressed_data()?; - let mut reader = data.as_ref(); - Some(SplitMeta::from_reader(&mut reader, obj_file.endianness(), obj_file.is_64())?) - } else { - None - } + Some(SplitMeta::from_section(section, obj_file.endianness(), obj_file.is_64())?) } else { None }) diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 75c50b7..afbbf3b 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -188,7 +188,7 @@ pub struct ObjSymbol { pub size_known: bool, pub flags: ObjSymbolFlagSet, pub addend: i64, - /// Original virtual address (from .splitmeta section) + /// Original virtual address (from .note.split section) pub virtual_address: Option, // Diff @@ -215,7 +215,7 @@ pub struct ObjInfo { pub common: Vec, /// Line number info (.line or .debug_line section) pub line_info: Option>, - /// Split object metadata (.splitmeta section) + /// Split object metadata (.note.split section) pub split_meta: Option, } diff --git a/objdiff-core/src/obj/split_meta.rs b/objdiff-core/src/obj/split_meta.rs index c5c7b65..5a8b3c7 100644 --- a/objdiff-core/src/obj/split_meta.rs +++ b/objdiff-core/src/obj/split_meta.rs @@ -1,13 +1,10 @@ -use std::{ - io, - io::{Read, Write}, -}; +use std::{io, io::Write}; -use object::{elf::SHT_LOUSER, Endian}; +use object::{elf::SHT_NOTE, Endian, ObjectSection}; -pub const SPLITMETA_SECTION: &str = ".splitmeta"; -// Use the same section type as .mwcats.* so the linker ignores it -pub const SHT_SPLITMETA: u32 = SHT_LOUSER + 0x4A2A82C2; +pub const SPLITMETA_SECTION: &str = ".note.split"; +pub const SHT_SPLITMETA: u32 = SHT_NOTE; +pub const ELF_NOTE_SPLIT: &[u8] = b"Split"; /// This is used to store metadata about the source of an object file, /// such as the original virtual addresses and the tool that wrote it. @@ -24,79 +21,50 @@ pub struct SplitMeta { pub virtual_addresses: Option>, } -/** - * .splitmeta section format: - * - Magic: "SPMD" - * - Section: Magic: 4 bytes, Data size: 4 bytes, Data: variable - * Section size can be used to skip unknown sections - * - Repeat section until EOF - * Endianness matches the object file - * - * Sections: - * - Generator: Magic: "GENR", Data size: 4 bytes, Data: UTF-8 string (no null terminator) - * - Virtual addresses: Magic: "VIRT", Data size: 4 bytes, Data: array - * Data is u32 array for 32-bit objects, u64 array for 64-bit objects - * Count is size / 4 (32-bit) or size / 8 (64-bit) - */ - -const SPLIT_META_MAGIC: [u8; 4] = *b"SPMD"; -const GENERATOR_MAGIC: [u8; 4] = *b"GENR"; -const MODULE_NAME_MAGIC: [u8; 4] = *b"MODN"; -const MODULE_ID_MAGIC: [u8; 4] = *b"MODI"; -const VIRTUAL_ADDRESS_MAGIC: [u8; 4] = *b"VIRT"; +const NT_SPLIT_GENERATOR: u32 = u32::from_be_bytes(*b"GENR"); +const NT_SPLIT_MODULE_NAME: u32 = u32::from_be_bytes(*b"MODN"); +const NT_SPLIT_MODULE_ID: u32 = u32::from_be_bytes(*b"MODI"); +const NT_SPLIT_VIRTUAL_ADDRESSES: u32 = u32::from_be_bytes(*b"VIRT"); impl SplitMeta { - pub fn from_reader(reader: &mut R, e: E, is_64: bool) -> io::Result - where - E: Endian, - R: Read + ?Sized, - { - let mut magic = [0; 4]; - reader.read_exact(&mut magic)?; - if magic != SPLIT_META_MAGIC { - return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid split metadata magic")); - } + pub fn from_section(section: object::Section, e: E, is_64: bool) -> io::Result + where E: Endian { let mut result = SplitMeta::default(); - loop { - let mut magic = [0; 4]; - match reader.read_exact(&mut magic) { - Ok(()) => {} - Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break, - Err(e) => return Err(e), - }; - let mut size_bytes = [0; 4]; - reader.read_exact(&mut size_bytes)?; - let size = e.read_u32_bytes(size_bytes); - let mut data = vec![0; size as usize]; - reader.read_exact(&mut data)?; - match magic { - GENERATOR_MAGIC => { - let string = String::from_utf8(data) + let data = section.uncompressed_data().map_err(object_io_error)?; + let mut iter = NoteIterator::new(data.as_ref(), section.align(), e, is_64)?; + while let Some(note) = iter.next(e)? { + if note.name != ELF_NOTE_SPLIT { + continue; + } + match note.n_type { + NT_SPLIT_GENERATOR => { + let string = String::from_utf8(note.desc.to_vec()) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; result.generator = Some(string); } - MODULE_NAME_MAGIC => { - let string = String::from_utf8(data) + NT_SPLIT_MODULE_NAME => { + let string = String::from_utf8(note.desc.to_vec()) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; result.module_name = Some(string); } - MODULE_ID_MAGIC => { - let id = e.read_u32_bytes(data.as_slice().try_into().map_err(|_| { - io::Error::new(io::ErrorKind::InvalidData, "Invalid module ID size") - })?); - result.module_id = Some(id); + NT_SPLIT_MODULE_ID => { + result.module_id = + Some(e.read_u32_bytes(note.desc.try_into().map_err(|_| { + io::Error::new(io::ErrorKind::InvalidData, "Invalid module ID size") + })?)); } - VIRTUAL_ADDRESS_MAGIC => { + NT_SPLIT_VIRTUAL_ADDRESSES => { let vec = if is_64 { - let mut vec = vec![0u64; data.len() / 8]; - for i in 0..vec.len() { - vec[i] = e.read_u64_bytes(data[i * 8..(i + 1) * 8].try_into().unwrap()); + let mut vec = vec![0u64; note.desc.len() / 8]; + for (i, v) in vec.iter_mut().enumerate() { + *v = + e.read_u64_bytes(note.desc[i * 8..(i + 1) * 8].try_into().unwrap()); } vec } else { - let mut vec = vec![0u64; data.len() / 4]; - for i in 0..vec.len() { - vec[i] = e.read_u32_bytes(data[i * 4..(i + 1) * 4].try_into().unwrap()) + let mut vec = vec![0u64; note.desc.len() / 4]; + for (i, v) in vec.iter_mut().enumerate() { + *v = e.read_u32_bytes(note.desc[i * 4..(i + 1) * 4].try_into().unwrap()) as u64; } vec @@ -116,32 +84,29 @@ impl SplitMeta { E: Endian, W: Write + ?Sized, { - writer.write_all(&SPLIT_META_MAGIC)?; if let Some(generator) = &self.generator { - writer.write_all(&GENERATOR_MAGIC)?; - writer.write_all(&e.write_u32_bytes(generator.len() as u32))?; + write_note_header(writer, e, NT_SPLIT_GENERATOR, generator.len())?; writer.write_all(generator.as_bytes())?; + align_to_4(writer, generator.len())?; } if let Some(module_name) = &self.module_name { - writer.write_all(&MODULE_NAME_MAGIC)?; - writer.write_all(&e.write_u32_bytes(module_name.len() as u32))?; + write_note_header(writer, e, NT_SPLIT_MODULE_NAME, module_name.len())?; writer.write_all(module_name.as_bytes())?; + align_to_4(writer, module_name.len())?; } if let Some(module_id) = self.module_id { - writer.write_all(&MODULE_ID_MAGIC)?; - writer.write_all(&e.write_u32_bytes(4))?; + write_note_header(writer, e, NT_SPLIT_MODULE_ID, 4)?; writer.write_all(&e.write_u32_bytes(module_id))?; } if let Some(virtual_addresses) = &self.virtual_addresses { - writer.write_all(&VIRTUAL_ADDRESS_MAGIC)?; - let count = virtual_addresses.len() as u32; + let count = virtual_addresses.len(); + let size = if is_64 { count * 8 } else { count * 4 }; + write_note_header(writer, e, NT_SPLIT_VIRTUAL_ADDRESSES, size)?; if is_64 { - writer.write_all(&e.write_u32_bytes(count * 8))?; for &addr in virtual_addresses { writer.write_all(&e.write_u64_bytes(addr))?; } } else { - writer.write_all(&e.write_u32_bytes(count * 4))?; for &addr in virtual_addresses { writer.write_all(&e.write_u32_bytes(addr as u32))?; } @@ -151,19 +116,102 @@ impl SplitMeta { } pub fn write_size(&self, is_64: bool) -> usize { - let mut size = 4; + let mut size = 0; if let Some(generator) = self.generator.as_deref() { - size += 8 + generator.len(); + size += NOTE_HEADER_SIZE + generator.len(); } if let Some(module_name) = self.module_name.as_deref() { - size += 8 + module_name.len(); + size += NOTE_HEADER_SIZE + module_name.len(); } if self.module_id.is_some() { - size += 12; + size += NOTE_HEADER_SIZE + 4; } if let Some(virtual_addresses) = self.virtual_addresses.as_deref() { - size += 8 + if is_64 { 8 } else { 4 } * virtual_addresses.len(); + size += NOTE_HEADER_SIZE + if is_64 { 8 } else { 4 } * virtual_addresses.len(); } size } } + +/// Convert an object::read::Error to an io::Error. +fn object_io_error(err: object::read::Error) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, err) +} + +/// An ELF note entry. +struct Note<'data> { + n_type: u32, + name: &'data [u8], + desc: &'data [u8], +} + +/// object::read::elf::NoteIterator is awkward to use generically, +/// so wrap it in our own iterator. +enum NoteIterator<'data, E> +where E: Endian +{ + B32(object::read::elf::NoteIterator<'data, object::elf::FileHeader32>), + B64(object::read::elf::NoteIterator<'data, object::elf::FileHeader64>), +} + +impl<'data, E> NoteIterator<'data, E> +where E: Endian +{ + fn new(data: &'data [u8], align: u64, e: E, is_64: bool) -> io::Result { + Ok(if is_64 { + NoteIterator::B64( + object::read::elf::NoteIterator::new(e, align, data).map_err(object_io_error)?, + ) + } else { + NoteIterator::B32( + object::read::elf::NoteIterator::new(e, align as u32, data) + .map_err(object_io_error)?, + ) + }) + } + + fn next(&mut self, e: E) -> io::Result>> { + match self { + NoteIterator::B32(iter) => Ok(iter.next().map_err(object_io_error)?.map(|note| Note { + n_type: note.n_type(e), + name: note.name(), + desc: note.desc(), + })), + NoteIterator::B64(iter) => Ok(iter.next().map_err(object_io_error)?.map(|note| Note { + n_type: note.n_type(e), + name: note.name(), + desc: note.desc(), + })), + } + } +} + +fn align_to_4(writer: &mut W, len: usize) -> io::Result<()> { + const ALIGN_BYTES: &[u8] = &[0; 4]; + if len % 4 != 0 { + writer.write_all(&ALIGN_BYTES[..4 - len % 4])?; + } + Ok(()) +} + +// ELF note format: +// Name Size | 4 bytes (integer) +// Desc Size | 4 bytes (integer) +// Type | 4 bytes (usually interpreted as an integer) +// Name | variable size, padded to a 4 byte boundary +// Desc | variable size, padded to a 4 byte boundary +const NOTE_HEADER_SIZE: usize = 12 + ((ELF_NOTE_SPLIT.len() + 4) & !3); + +fn write_note_header(writer: &mut W, e: E, kind: u32, desc_len: usize) -> io::Result<()> +where + E: Endian, + W: Write + ?Sized, +{ + writer.write_all(&e.write_u32_bytes(ELF_NOTE_SPLIT.len() as u32 + 1))?; // Name Size + writer.write_all(&e.write_u32_bytes(desc_len as u32))?; // Desc Size + writer.write_all(&e.write_u32_bytes(kind))?; // Type + writer.write_all(ELF_NOTE_SPLIT)?; // Name + writer.write_all(&[0; 1])?; // Null terminator + align_to_4(writer, ELF_NOTE_SPLIT.len() + 1)?; + Ok(()) +}