From 90e81fad7e4b5abd98a99efbb75b3e0fb56a96a2 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Thu, 25 Sep 2025 00:26:43 -0600 Subject: [PATCH] Improve ARM function size inference This allows 2-byte padding to be trimmed in ARM functions. Resolves #253 --- objdiff-core/src/arch/arm.rs | 14 +++-- objdiff-core/src/arch/mips.rs | 1 + objdiff-core/src/arch/ppc/mod.rs | 1 + objdiff-core/src/obj/mod.rs | 29 ++++++----- objdiff-core/tests/arch_arm.rs | 17 +++++++ objdiff-core/tests/data/arm/issue_253.o | Bin 0 -> 748 bytes .../arch_arm__trim_trailing_hword-2.snap | 7 +++ .../arch_arm__trim_trailing_hword.snap | 48 ++++++++++++++++++ 8 files changed, 98 insertions(+), 19 deletions(-) create mode 100644 objdiff-core/tests/data/arm/issue_253.o create mode 100644 objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword-2.snap create mode 100644 objdiff-core/tests/snapshots/arch_arm__trim_trailing_hword.snap diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index fc67d10..a6bc271 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -460,12 +460,16 @@ impl Arch for ArchArm { section: &Section, mut next_address: u64, ) -> Result { - // Trim any trailing 4-byte zeroes from the end (padding) - while next_address >= symbol.address + 4 - && let Some(data) = section.data_range(next_address - 4, 4) - && data == [0u8; 4] + // TODO: This should probably check the disasm mode and trim accordingly, + // but self.disasm_modes isn't populated until post_init, so it needs a refactor. + + // Trim any trailing 2-byte zeroes from the end (padding) + while next_address >= symbol.address + 2 + && let Some(data) = section.data_range(next_address - 2, 2) + && data == [0u8; 2] + && section.relocation_at(next_address - 2, 2).is_none() { - next_address -= 4; + next_address -= 2; } Ok(next_address.saturating_sub(symbol.address)) } diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs index d38fc2d..04779ac 100644 --- a/objdiff-core/src/arch/mips.rs +++ b/objdiff-core/src/arch/mips.rs @@ -355,6 +355,7 @@ impl Arch for ArchMips { while new_address >= symbol.address + 4 && let Some(data) = section.data_range(new_address - 4, 4) && data == [0u8; 4] + && section.relocation_at(next_address - 4, 4).is_none() { new_address -= 4; } diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index 31ca501..24f01eb 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -457,6 +457,7 @@ impl Arch for ArchPpc { while next_address >= symbol.address + 4 && let Some(data) = section.data_range(next_address - 4, 4) && data == [0u8; 4] + && section.relocation_at(next_address - 4, 4).is_none() { next_address -= 4; } diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 96a6df4..8a58319 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -107,32 +107,33 @@ impl Section { // The alignment to use when "Combine data/text sections" is enabled. pub fn combined_alignment(&self) -> u64 { const MIN_ALIGNMENT: u64 = 4; - self.align.map(|align| align.get().max(MIN_ALIGNMENT)).unwrap_or(MIN_ALIGNMENT) + self.align.map_or(MIN_ALIGNMENT, |align| align.get().max(MIN_ALIGNMENT)) } - pub fn relocation_at<'obj>( - &'obj self, - obj: &'obj Object, - ins_ref: InstructionRef, - ) -> Option> { - match self.relocations.binary_search_by_key(&ins_ref.address, |r| r.address) { + pub fn relocation_at(&self, address: u64, size: u8) -> Option<&Relocation> { + match self.relocations.binary_search_by_key(&address, |r| r.address) { Ok(mut i) => { // Find the first relocation at the address while i .checked_sub(1) .and_then(|n| self.relocations.get(n)) - .is_some_and(|r| r.address == ins_ref.address) + .is_some_and(|r| r.address == address) { i -= 1; } self.relocations.get(i) } - Err(i) => self - .relocations - .get(i) - .filter(|r| r.address < ins_ref.address + ins_ref.size as u64), + Err(i) => self.relocations.get(i).filter(|r| r.address < address + size as u64), } - .and_then(|relocation| { + } + + pub fn resolve_relocation_at<'obj>( + &'obj self, + obj: &'obj Object, + address: u64, + size: u8, + ) -> Option> { + self.relocation_at(address, size).and_then(|relocation| { let symbol = obj.symbols.get(relocation.target_symbol)?; Some(ResolvedRelocation { relocation, symbol }) }) @@ -316,7 +317,7 @@ impl Object { let section = self.sections.get(section_index)?; let offset = ins_ref.address.checked_sub(section.address)?; let code = section.data.get(offset as usize..offset as usize + ins_ref.size as usize)?; - let relocation = section.relocation_at(self, ins_ref); + let relocation = section.resolve_relocation_at(self, ins_ref.address, ins_ref.size); Some(ResolvedInstructionRef { ins_ref, symbol_index, diff --git a/objdiff-core/tests/arch_arm.rs b/objdiff-core/tests/arch_arm.rs index 887a582..918d099 100644 --- a/objdiff-core/tests/arch_arm.rs +++ b/objdiff-core/tests/arch_arm.rs @@ -56,3 +56,20 @@ fn combine_text_sections() { let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); insta::assert_snapshot!(output); } + +#[test] +#[cfg(feature = "arm")] +fn trim_trailing_hword() { + let diff_config = diff::DiffObjConfig::default(); + let obj = obj::read::parse( + include_object!("data/arm/issue_253.o"), + &diff_config, + diff::DiffSide::Base, + ) + .unwrap(); + let symbol_idx = obj.symbols.iter().position(|s| s.name == "sub_8030F20").unwrap(); + let diff = diff::code::no_diff_code(&obj, symbol_idx, &diff_config).unwrap(); + insta::assert_debug_snapshot!(diff.instruction_rows); + let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); + insta::assert_snapshot!(output); +} diff --git a/objdiff-core/tests/data/arm/issue_253.o b/objdiff-core/tests/data/arm/issue_253.o new file mode 100644 index 0000000000000000000000000000000000000000..36083e291acc275a34cdfc1ec3b3153d8c87dc09 GIT binary patch literal 748 zcmb<-^>JflWMqH=Mg|QA1doAX4TQsL0#=~Gz`?-I;F#2uRN&6Q;3&_)z>t`ln3T!D zD9XUV!0H&}YaZg_>&d{z#KFi3(FxXvNid_>!oa}Hz=WcRnE|5WPdx)OMB?LrK?s8d zLVf%%1!6EU!1;;{3=B+2d<_N$1`Z^?0Z2U@Gcp)6Ffgzo@!?Dckb77d%orHpt`q{X zJpDp~;=^2>L;Qo{okHChf?VC>og9N*!3<9*1rhf2b!AW~VNgk7C@xKkw=gg^a5FMs zV9+bB%q>YwV$drtDT2@$Fjh%wMG1pmN@7VOgI-c`F@v6Ckgr~1Nl8&=QfWzQF|y+b zu%)5yhWQN?cN|E1paI4J3NKJ3K?9h<21yNw36cZR>Y#80hY5(sfK#0gL`?xi6-1o^ zj01HOhzl|oIksaM7#J=<