From 9df98f263e850d5cecb13acbf10141375a724f54 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Sun, 17 Mar 2024 12:06:18 -0600 Subject: [PATCH] Move all architecture-specific code into modules No more scattered relocation handling and feature checks. Everything will go through the ObjArch trait, which makes it easier to add new architectures going forward. --- objdiff-core/Cargo.toml | 29 +-- objdiff-core/src/arch/mips.rs | 207 ++++++++++++++++++ objdiff-core/src/arch/mod.rs | 47 ++++ objdiff-core/src/arch/ppc.rs | 210 ++++++++++++++++++ objdiff-core/src/{obj => arch}/x86.rs | 284 ++++++++++++++----------- objdiff-core/src/diff/code.rs | 116 +--------- objdiff-core/src/diff/mod.rs | 17 +- objdiff-core/src/lib.rs | 1 + objdiff-core/src/obj/mips.rs | 155 -------------- objdiff-core/src/obj/mod.rs | 67 +----- objdiff-core/src/obj/ppc.rs | 164 -------------- objdiff-core/src/obj/read.rs | 166 ++++----------- objdiff-gui/src/app.rs | 2 +- objdiff-gui/src/views/config.rs | 2 +- objdiff-gui/src/views/function_diff.rs | 32 ++- 15 files changed, 744 insertions(+), 755 deletions(-) create mode 100644 objdiff-core/src/arch/mips.rs create mode 100644 objdiff-core/src/arch/mod.rs create mode 100644 objdiff-core/src/arch/ppc.rs rename objdiff-core/src/{obj => arch}/x86.rs (51%) delete mode 100644 objdiff-core/src/obj/mips.rs delete mode 100644 objdiff-core/src/obj/ppc.rs diff --git a/objdiff-core/Cargo.toml b/objdiff-core/Cargo.toml index a827727..7885fda 100644 --- a/objdiff-core/Cargo.toml +++ b/objdiff-core/Cargo.toml @@ -14,7 +14,7 @@ A local diffing tool for decompilation projects. [features] all = ["config", "dwarf", "mips", "ppc", "x86"] any-arch = [] # Implicit, used to check if any arch is enabled -config = [] +config = ["globset", "semver", "serde_json", "serde_yaml"] dwarf = ["gimli"] mips = ["any-arch", "rabbitizer"] ppc = ["any-arch", "cwdemangle", "ppc750cl"] @@ -23,30 +23,35 @@ x86 = ["any-arch", "iced-x86", "msvc-demangler"] [dependencies] anyhow = "1.0.81" byteorder = "1.5.0" -cwdemangle = { version = "1.0.0", optional = true } filetime = "0.2.23" flagset = "0.4.5" -gimli = { version = "0.28.1", default-features = false, features = ["read-all"], optional = true } log = "0.4.21" memmap2 = "0.9.4" num-traits = "0.2.18" object = { version = "0.34.0", features = ["read_core", "std", "elf", "pe"], default-features = false } -ppc750cl = { git = "https://github.com/encounter/ppc750cl", rev = "4a2bbbc6f84dcb76255ab6f3595a8d4a0ce96618", optional = true } -rabbitizer = { version = "1.9.2", optional = true } serde = { version = "1", features = ["derive"] } similar = { version = "2.4.0", default-features = false } # config -globset = { version = "0.4.14", features = ["serde1"] } -semver = "1.0.22" -serde_json = "1.0.114" -serde_yaml = "0.9.32" +globset = { version = "0.4.14", features = ["serde1"], optional = true } +semver = { version = "1.0.22", optional = true } +serde_json = { version = "1.0.114", optional = true } +serde_yaml = { version = "0.9.32", optional = true } + +# dwarf +gimli = { version = "0.28.1", default-features = false, features = ["read-all"], optional = true } + +# ppc +cwdemangle = { version = "1.0.0", optional = true } +ppc750cl = { git = "https://github.com/encounter/ppc750cl", rev = "4a2bbbc6f84dcb76255ab6f3595a8d4a0ce96618", optional = true } + +# mips +rabbitizer = { version = "1.9.2", optional = true } # x86 msvc-demangler = { version = "0.10.0", optional = true } [dependencies.iced-x86] version = "1.21.0" -#default-features = false -#features = ["std", "decoder", "intel"] -features = ["exhaustive_enums"] +default-features = false +features = ["std", "decoder", "intel", "gas", "masm", "nasm", "exhaustive_enums"] optional = true diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs new file mode 100644 index 0000000..cd0dbf1 --- /dev/null +++ b/objdiff-core/src/arch/mips.rs @@ -0,0 +1,207 @@ +use std::{borrow::Cow, collections::BTreeMap}; + +use anyhow::{bail, Result}; +use object::{elf, Endian, Endianness, File, Object, Relocation, RelocationFlags}; +use rabbitizer::{config, Abi, InstrCategory, Instruction, OperandType}; + +use crate::{ + arch::ObjArch, + diff::{DiffObjConfig, ProcessCodeResult}, + obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjSection}, +}; + +fn configure_rabbitizer() { + unsafe { + config::RabbitizerConfig_Cfg.reg_names.fpr_abi_names = Abi::O32; + } +} + +pub struct ObjArchMips { + pub endianness: Endianness, +} + +impl ObjArchMips { + pub fn new(object: &File) -> Result { Ok(Self { endianness: object.endianness() }) } +} + +impl ObjArch for ObjArchMips { + fn process_code( + &self, + config: &DiffObjConfig, + data: &[u8], + start_address: u64, + relocs: &[ObjReloc], + line_info: &Option>, + ) -> Result { + configure_rabbitizer(); + + let end_address = start_address + data.len() as u64; + let ins_count = data.len() / 4; + let mut ops = Vec::::with_capacity(ins_count); + let mut insts = Vec::::with_capacity(ins_count); + let mut cur_addr = start_address as u32; + for chunk in data.chunks_exact(4) { + let reloc = relocs.iter().find(|r| (r.address as u32 & !3) == cur_addr); + let code = self.endianness.read_u32_bytes(chunk.try_into()?); + let instruction = Instruction::new(code, cur_addr, InstrCategory::CPU); + + let op = instruction.unique_id as u16; + ops.push(op); + + let mnemonic = instruction.opcode_name().to_string(); + let is_branch = instruction.is_branch(); + let branch_offset = instruction.branch_offset(); + let branch_dest = if is_branch { + cur_addr.checked_add_signed(branch_offset).map(|a| a as u64) + } else { + None + }; + + let operands = instruction.get_operands_slice(); + let mut args = Vec::with_capacity(operands.len() + 1); + for (idx, op) in operands.iter().enumerate() { + if idx > 0 { + if config.space_between_args { + args.push(ObjInsArg::PlainText(", ".to_string())); + } else { + args.push(ObjInsArg::PlainText(",".to_string())); + } + } + + match op { + OperandType::cpu_immediate + | OperandType::cpu_label + | OperandType::cpu_branch_target_label => { + if let Some(branch_dest) = branch_dest { + args.push(ObjInsArg::BranchDest(branch_dest)); + } else if let Some(reloc) = reloc { + if matches!(&reloc.target_section, Some(s) if s == ".text") + && reloc.target.address > start_address + && reloc.target.address < end_address + { + args.push(ObjInsArg::BranchDest(reloc.target.address)); + } else { + push_reloc(&mut args, reloc)?; + } + } else { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( + op.disassemble(&instruction, None), + ))); + } + } + OperandType::cpu_immediate_base => { + if let Some(reloc) = reloc { + push_reloc(&mut args, reloc)?; + } else { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( + OperandType::cpu_immediate.disassemble(&instruction, None), + ))); + } + args.push(ObjInsArg::PlainText("(".to_string())); + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( + OperandType::cpu_rs.disassemble(&instruction, None), + ))); + args.push(ObjInsArg::PlainText(")".to_string())); + } + _ => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( + op.disassemble(&instruction, None), + ))); + } + } + } + let line = line_info + .as_ref() + .and_then(|map| map.range(..=cur_addr as u64).last().map(|(_, &b)| b)); + insts.push(ObjIns { + address: cur_addr as u64, + size: 4, + op, + mnemonic, + args, + reloc: reloc.cloned(), + branch_dest, + line, + orig: None, + }); + cur_addr += 4; + } + Ok(ProcessCodeResult { ops, insts }) + } + + fn implcit_addend( + &self, + section: &ObjSection, + address: u64, + reloc: &Relocation, + ) -> Result { + let data = section.data[address as usize..address as usize + 4].try_into()?; + let addend = self.endianness.read_u32_bytes(data); + Ok(match reloc.flags() { + RelocationFlags::Elf { r_type: elf::R_MIPS_32 } => addend as i64, + RelocationFlags::Elf { r_type: elf::R_MIPS_HI16 } => { + ((addend & 0x0000FFFF) << 16) as i32 as i64 + } + RelocationFlags::Elf { + r_type: + elf::R_MIPS_LO16 | elf::R_MIPS_GOT16 | elf::R_MIPS_CALL16 | elf::R_MIPS_GPREL16, + } => (addend & 0x0000FFFF) as i16 as i64, + RelocationFlags::Elf { r_type: elf::R_MIPS_26 } => ((addend & 0x03FFFFFF) << 2) as i64, + flags => bail!("Unsupported MIPS implicit relocation {flags:?}"), + }) + } + + fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> { + match flags { + RelocationFlags::Elf { r_type } => match r_type { + elf::R_MIPS_HI16 => Cow::Borrowed("R_MIPS_HI16"), + elf::R_MIPS_LO16 => Cow::Borrowed("R_MIPS_LO16"), + elf::R_MIPS_GOT16 => Cow::Borrowed("R_MIPS_GOT16"), + elf::R_MIPS_CALL16 => Cow::Borrowed("R_MIPS_CALL16"), + elf::R_MIPS_GPREL16 => Cow::Borrowed("R_MIPS_GPREL16"), + elf::R_MIPS_32 => Cow::Borrowed("R_MIPS_32"), + elf::R_MIPS_26 => Cow::Borrowed("R_MIPS_26"), + _ => Cow::Owned(format!("")), + }, + flags => Cow::Owned(format!("<{flags:?}>")), + } + } +} + +fn push_reloc(args: &mut Vec, reloc: &ObjReloc) -> Result<()> { + match reloc.flags { + RelocationFlags::Elf { r_type } => match r_type { + elf::R_MIPS_HI16 => { + args.push(ObjInsArg::PlainText("%hi(".to_string())); + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText(")".to_string())); + } + elf::R_MIPS_LO16 => { + args.push(ObjInsArg::PlainText("%lo(".to_string())); + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText(")".to_string())); + } + elf::R_MIPS_GOT16 => { + args.push(ObjInsArg::PlainText("%got(".to_string())); + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText(")".to_string())); + } + elf::R_MIPS_CALL16 => { + args.push(ObjInsArg::PlainText("%call16(".to_string())); + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText(")".to_string())); + } + elf::R_MIPS_GPREL16 => { + args.push(ObjInsArg::PlainText("%gp_rel(".to_string())); + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText(")".to_string())); + } + elf::R_MIPS_32 | elf::R_MIPS_26 => { + args.push(ObjInsArg::Reloc); + } + _ => bail!("Unsupported ELF MIPS relocation type {r_type}"), + }, + flags => panic!("Unsupported MIPS relocation flags {flags:?}"), + } + Ok(()) +} diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs new file mode 100644 index 0000000..69ef11f --- /dev/null +++ b/objdiff-core/src/arch/mod.rs @@ -0,0 +1,47 @@ +use std::borrow::Cow; +use std::collections::BTreeMap; + +use anyhow::{bail, Result}; +use object::{Architecture, Object, Relocation, RelocationFlags}; + +use crate::{ + diff::{DiffObjConfig, ProcessCodeResult}, + obj::{ObjReloc, ObjSection}, +}; + +#[cfg(feature = "mips")] +mod mips; +#[cfg(feature = "ppc")] +mod ppc; +#[cfg(feature = "x86")] +mod x86; + +pub trait ObjArch: Send + Sync { + fn process_code( + &self, + config: &DiffObjConfig, + data: &[u8], + address: u64, + relocs: &[ObjReloc], + line_info: &Option>, + ) -> Result; + + fn implcit_addend(&self, section: &ObjSection, address: u64, reloc: &Relocation) + -> Result; + + fn demangle(&self, _name: &str) -> Option { None } + + fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str>; +} + +pub fn new_arch(object: &object::File) -> Result> { + Ok(match object.architecture() { + #[cfg(feature = "ppc")] + Architecture::PowerPc => Box::new(ppc::ObjArchPpc::new(object)?), + #[cfg(feature = "mips")] + Architecture::Mips => Box::new(mips::ObjArchMips::new(object)?), + #[cfg(feature = "x86")] + Architecture::I386 | Architecture::X86_64 => Box::new(x86::ObjArchX86::new(object)?), + arch => bail!("Unsupported architecture: {arch:?}"), + }) +} diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs new file mode 100644 index 0000000..9062a20 --- /dev/null +++ b/objdiff-core/src/arch/ppc.rs @@ -0,0 +1,210 @@ +use std::borrow::Cow; +use std::collections::BTreeMap; + +use anyhow::{bail, Result}; +use object::{elf, File, Relocation, RelocationFlags}; +use ppc750cl::{disasm_iter, Argument, SimplifiedIns, GPR}; + +use crate::{ + arch::ObjArch, + diff::{DiffObjConfig, ProcessCodeResult}, + obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjSection}, +}; + +// Relative relocation, can be Simm, Offset or BranchDest +fn is_relative_arg(arg: &Argument) -> bool { + matches!(arg, Argument::Simm(_) | Argument::Offset(_) | Argument::BranchDest(_)) +} + +// Relative or absolute relocation, can be Uimm, Simm or Offset +fn is_rel_abs_arg(arg: &Argument) -> bool { + matches!(arg, Argument::Uimm(_) | Argument::Simm(_) | Argument::Offset(_)) +} + +fn is_offset_arg(arg: &Argument) -> bool { matches!(arg, Argument::Offset(_)) } + +pub struct ObjArchPpc {} + +impl ObjArchPpc { + pub fn new(_file: &File) -> Result { Ok(Self {}) } +} + +impl ObjArch for ObjArchPpc { + fn process_code( + &self, + config: &DiffObjConfig, + data: &[u8], + address: u64, + relocs: &[ObjReloc], + line_info: &Option>, + ) -> Result { + let ins_count = data.len() / 4; + let mut ops = Vec::::with_capacity(ins_count); + let mut insts = Vec::::with_capacity(ins_count); + for mut ins in disasm_iter(data, address as u32) { + let reloc = relocs.iter().find(|r| (r.address as u32 & !3) == ins.addr); + if let Some(reloc) = reloc { + // Zero out relocations + ins.code = match reloc.flags { + RelocationFlags::Elf { r_type: elf::R_PPC_EMB_SDA21 } => ins.code & !0x1FFFFF, + RelocationFlags::Elf { r_type: elf::R_PPC_REL24 } => ins.code & !0x3FFFFFC, + RelocationFlags::Elf { r_type: elf::R_PPC_REL14 } => ins.code & !0xFFFC, + RelocationFlags::Elf { + r_type: elf::R_PPC_ADDR16_HI | elf::R_PPC_ADDR16_HA | elf::R_PPC_ADDR16_LO, + } => ins.code & !0xFFFF, + _ => ins.code, + }; + } + let simplified = ins.clone().simplified(); + + let mut reloc_arg = None; + if let Some(reloc) = reloc { + match reloc.flags { + RelocationFlags::Elf { r_type: elf::R_PPC_EMB_SDA21 } => { + reloc_arg = Some(1); + } + RelocationFlags::Elf { r_type: elf::R_PPC_REL24 | elf::R_PPC_REL14 } => { + reloc_arg = simplified.args.iter().rposition(is_relative_arg); + } + RelocationFlags::Elf { + r_type: elf::R_PPC_ADDR16_HI | elf::R_PPC_ADDR16_HA | elf::R_PPC_ADDR16_LO, + } => { + reloc_arg = simplified.args.iter().rposition(is_rel_abs_arg); + } + _ => {} + } + } + + let mut args = vec![]; + let mut branch_dest = None; + let mut writing_offset = false; + for (idx, arg) in simplified.args.iter().enumerate() { + if idx > 0 && !writing_offset { + if config.space_between_args { + args.push(ObjInsArg::PlainText(", ".to_string())); + } else { + args.push(ObjInsArg::PlainText(",".to_string())); + } + } + + if reloc_arg == Some(idx) { + let reloc = reloc.unwrap(); + push_reloc(&mut args, reloc)?; + // For @sda21, we can omit the register argument + if matches!(reloc.flags, RelocationFlags::Elf { r_type: elf::R_PPC_EMB_SDA21 }) + // Sanity check: the next argument should be r0 + && matches!(simplified.args.get(idx + 1), Some(Argument::GPR(GPR(0)))) + { + break; + } + } else { + match arg { + Argument::Simm(simm) => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(simm.0 as i64))); + } + Argument::Uimm(uimm) => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(uimm.0 as u64))); + } + Argument::Offset(offset) => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(offset.0 as i64))); + } + Argument::BranchDest(dest) => { + let dest = ins.addr.wrapping_add_signed(dest.0) as u64; + args.push(ObjInsArg::BranchDest(dest)); + branch_dest = Some(dest); + } + _ => { + args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(arg.to_string()))); + } + }; + } + + if writing_offset { + args.push(ObjInsArg::PlainText(")".to_string())); + writing_offset = false; + } + if is_offset_arg(arg) { + args.push(ObjInsArg::PlainText("(".to_string())); + writing_offset = true; + } + } + + ops.push(simplified.ins.op as u16); + let line = line_info + .as_ref() + .and_then(|map| map.range(..=simplified.ins.addr as u64).last().map(|(_, &b)| b)); + insts.push(ObjIns { + address: simplified.ins.addr as u64, + size: 4, + mnemonic: format!("{}{}", simplified.mnemonic, simplified.suffix), + args, + reloc: reloc.cloned(), + op: ins.op as u16, + branch_dest, + line, + orig: Some(format!("{}", SimplifiedIns::basic_form(ins))), + }); + } + Ok(ProcessCodeResult { ops, insts }) + } + + fn implcit_addend( + &self, + _section: &ObjSection, + address: u64, + reloc: &Relocation, + ) -> Result { + bail!("Unsupported PPC implicit relocation {:#x}:{:?}", address, reloc.flags()) + } + + fn demangle(&self, name: &str) -> Option { + cwdemangle::demangle(name, &Default::default()) + } + + + fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> { + match flags { + RelocationFlags::Elf { r_type } => match r_type { + elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"), + elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"), + elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"), + elf::R_PPC_EMB_SDA21 => Cow::Borrowed("R_PPC_EMB_SDA21"), + elf::R_PPC_ADDR32 => Cow::Borrowed("R_PPC_ADDR32"), + elf::R_PPC_UADDR32 => Cow::Borrowed("R_PPC_UADDR32"), + elf::R_PPC_REL24 => Cow::Borrowed("R_PPC_REL24"), + elf::R_PPC_REL14 => Cow::Borrowed("R_PPC_REL14"), + _ => Cow::Owned(format!("")), + }, + flags => Cow::Owned(format!("<{flags:?}>")), + } + } +} + +fn push_reloc(args: &mut Vec, reloc: &ObjReloc) -> Result<()> { + match reloc.flags { + RelocationFlags::Elf { r_type } => match r_type { + elf::R_PPC_ADDR16_LO => { + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText("@l".to_string())); + } + elf::R_PPC_ADDR16_HI => { + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText("@h".to_string())); + } + elf::R_PPC_ADDR16_HA => { + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText("@ha".to_string())); + } + elf::R_PPC_EMB_SDA21 => { + args.push(ObjInsArg::Reloc); + args.push(ObjInsArg::PlainText("@sda21".to_string())); + } + elf::R_PPC_ADDR32 | elf::R_PPC_UADDR32 | elf::R_PPC_REL24 | elf::R_PPC_REL14 => { + args.push(ObjInsArg::Reloc); + } + _ => bail!("Unsupported ELF PPC relocation type {r_type}"), + }, + flags => bail!("Unsupported PPC relocation kind: {flags:?}"), + }; + Ok(()) +} diff --git a/objdiff-core/src/obj/x86.rs b/objdiff-core/src/arch/x86.rs similarity index 51% rename from objdiff-core/src/obj/x86.rs rename to objdiff-core/src/arch/x86.rs index 991e848..17bae42 100644 --- a/objdiff-core/src/obj/x86.rs +++ b/objdiff-core/src/arch/x86.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::{borrow::Cow, collections::BTreeMap}; use anyhow::{anyhow, bail, ensure, Result}; use iced_x86::{ @@ -6,138 +6,178 @@ use iced_x86::{ GasFormatter, Instruction, IntelFormatter, MasmFormatter, NasmFormatter, NumberKind, OpKind, PrefixKind, Register, SymbolResult, }; +use object::{pe, Endian, Endianness, File, Object, Relocation, RelocationFlags}; use crate::{ - diff::{DiffObjConfig, ProcessCodeResult}, - obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjRelocKind}, + arch::ObjArch, + diff::{DiffObjConfig, ProcessCodeResult, X86Formatter}, + obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjSection}, }; -#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize)] -pub enum X86Formatter { - #[default] - Intel, - Gas, - Nasm, - Masm, +pub struct ObjArchX86 { + bits: u32, + endianness: Endianness, } -pub fn process_code( - config: &DiffObjConfig, - data: &[u8], - bitness: u32, - start_address: u64, - relocs: &[ObjReloc], - line_info: &Option>, -) -> Result { - let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() }; - let mut decoder = Decoder::with_ip(bitness, data, start_address, DecoderOptions::NONE); - let mut formatter: Box = match config.x86_formatter { - X86Formatter::Intel => Box::new(IntelFormatter::new()), - X86Formatter::Gas => Box::new(GasFormatter::new()), - X86Formatter::Nasm => Box::new(NasmFormatter::new()), - X86Formatter::Masm => Box::new(MasmFormatter::new()), - }; - formatter.options_mut().set_space_after_operand_separator(config.space_between_args); +impl ObjArchX86 { + pub fn new(object: &File) -> Result { + Ok(Self { bits: if object.is_64() { 64 } else { 32 }, endianness: object.endianness() }) + } +} - let mut output = InstructionFormatterOutput { - formatted: String::new(), - ins: ObjIns { - address: 0, - size: 0, - op: 0, - mnemonic: "".to_string(), - args: vec![], - reloc: None, - branch_dest: None, - line: None, - orig: None, - }, - error: None, - ins_operands: vec![], - }; - let mut instruction = Instruction::default(); - while decoder.can_decode() { - decoder.decode_out(&mut instruction); - - let address = instruction.ip(); - let op = instruction.mnemonic() as u16; - let reloc = relocs - .iter() - .find(|r| r.address >= address && r.address < address + instruction.len() as u64); - output.ins = ObjIns { - address, - size: instruction.len() as u8, - op, - mnemonic: "".to_string(), - args: vec![], - reloc: reloc.cloned(), - branch_dest: None, - line: line_info.as_ref().and_then(|m| m.get(&address).cloned()), - orig: None, +impl ObjArch for ObjArchX86 { + fn process_code( + &self, + config: &DiffObjConfig, + data: &[u8], + start_address: u64, + relocs: &[ObjReloc], + line_info: &Option>, + ) -> Result { + let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() }; + let mut decoder = Decoder::with_ip(self.bits, data, start_address, DecoderOptions::NONE); + let mut formatter: Box = match config.x86_formatter { + X86Formatter::Intel => Box::new(IntelFormatter::new()), + X86Formatter::Gas => Box::new(GasFormatter::new()), + X86Formatter::Nasm => Box::new(NasmFormatter::new()), + X86Formatter::Masm => Box::new(MasmFormatter::new()), }; - // Run the formatter, which will populate output.ins - formatter.format(&instruction, &mut output); - if let Some(error) = output.error.take() { - return Err(error); - } - ensure!(output.ins_operands.len() == output.ins.args.len()); - output.ins.orig = Some(output.formatted.clone()); + formatter.options_mut().set_space_after_operand_separator(config.space_between_args); - // print!("{:016X} ", instruction.ip()); - // let start_index = (instruction.ip() - address) as usize; - // let instr_bytes = &data[start_index..start_index + instruction.len()]; - // for b in instr_bytes.iter() { - // print!("{:02X}", b); - // } - // if instr_bytes.len() < 32 { - // for _ in 0..32 - instr_bytes.len() { - // print!(" "); - // } - // } - // println!(" {}", output.formatted); - // - // if let Some(reloc) = reloc { - // println!("\tReloc: {:?}", reloc); - // } - // - // for i in 0..instruction.op_count() { - // let kind = instruction.op_kind(i); - // print!("{:?} ", kind); - // } - // println!(); + let mut output = InstructionFormatterOutput { + formatted: String::new(), + ins: ObjIns { + address: 0, + size: 0, + op: 0, + mnemonic: String::new(), + args: vec![], + reloc: None, + branch_dest: None, + line: None, + orig: None, + }, + error: None, + ins_operands: vec![], + }; + let mut instruction = Instruction::default(); + while decoder.can_decode() { + decoder.decode_out(&mut instruction); - // Make sure we've put the relocation somewhere in the instruction - if reloc.is_some() && !output.ins.args.iter().any(|a| matches!(a, ObjInsArg::Reloc)) { - let mut found = replace_arg( - OpKind::Memory, - ObjInsArg::Reloc, - &mut output.ins.args, - &instruction, - &output.ins_operands, - )?; - if !found { - found = replace_arg( - OpKind::Immediate32, + let address = instruction.ip(); + let op = instruction.mnemonic() as u16; + let reloc = relocs + .iter() + .find(|r| r.address >= address && r.address < address + instruction.len() as u64); + output.ins = ObjIns { + address, + size: instruction.len() as u8, + op, + mnemonic: String::new(), + args: vec![], + reloc: reloc.cloned(), + branch_dest: None, + line: line_info.as_ref().and_then(|m| m.get(&address).cloned()), + orig: None, + }; + // Run the formatter, which will populate output.ins + formatter.format(&instruction, &mut output); + if let Some(error) = output.error.take() { + return Err(error); + } + ensure!(output.ins_operands.len() == output.ins.args.len()); + output.ins.orig = Some(output.formatted.clone()); + + // print!("{:016X} ", instruction.ip()); + // let start_index = (instruction.ip() - address) as usize; + // let instr_bytes = &data[start_index..start_index + instruction.len()]; + // for b in instr_bytes.iter() { + // print!("{:02X}", b); + // } + // if instr_bytes.len() < 32 { + // for _ in 0..32 - instr_bytes.len() { + // print!(" "); + // } + // } + // println!(" {}", output.formatted); + // + // if let Some(reloc) = reloc { + // println!("\tReloc: {:?}", reloc); + // } + // + // for i in 0..instruction.op_count() { + // let kind = instruction.op_kind(i); + // print!("{:?} ", kind); + // } + // println!(); + + // Make sure we've put the relocation somewhere in the instruction + if reloc.is_some() && !output.ins.args.iter().any(|a| matches!(a, ObjInsArg::Reloc)) { + let mut found = replace_arg( + OpKind::Memory, ObjInsArg::Reloc, &mut output.ins.args, &instruction, &output.ins_operands, )?; + if !found { + found = replace_arg( + OpKind::Immediate32, + ObjInsArg::Reloc, + &mut output.ins.args, + &instruction, + &output.ins_operands, + )?; + } + ensure!(found, "x86: Failed to find operand for Absolute relocation"); + } + if reloc.is_some() && !output.ins.args.iter().any(|a| matches!(a, ObjInsArg::Reloc)) { + bail!("Failed to find relocation in instruction"); } - ensure!(found, "x86: Failed to find operand for Absolute relocation"); - } - if reloc.is_some() && !output.ins.args.iter().any(|a| matches!(a, ObjInsArg::Reloc)) { - bail!("Failed to find relocation in instruction"); - } - result.ops.push(op); - result.insts.push(output.ins.clone()); + result.ops.push(op); + result.insts.push(output.ins.clone()); - // Clear for next iteration - output.formatted.clear(); - output.ins_operands.clear(); + // Clear for next iteration + output.formatted.clear(); + output.ins_operands.clear(); + } + Ok(result) + } + + fn implcit_addend( + &self, + section: &ObjSection, + address: u64, + reloc: &Relocation, + ) -> Result { + match reloc.flags() { + RelocationFlags::Coff { typ: pe::IMAGE_REL_I386_DIR32 | pe::IMAGE_REL_I386_REL32 } => { + let data = section.data[address as usize..address as usize + 4].try_into()?; + Ok(self.endianness.read_i32_bytes(data) as i64) + } + flags => bail!("Unsupported x86 implicit relocation {flags:?}"), + } + } + + fn demangle(&self, name: &str) -> Option { + if name.starts_with('?') { + msvc_demangler::demangle(name, msvc_demangler::DemangleFlags::llvm()).ok() + } else { + None + } + } + + fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> { + match flags { + RelocationFlags::Coff { typ } => match typ { + pe::IMAGE_REL_I386_DIR32 => Cow::Borrowed("IMAGE_REL_I386_DIR32"), + pe::IMAGE_REL_I386_REL32 => Cow::Borrowed("IMAGE_REL_I386_REL32"), + _ => Cow::Owned(format!("")), + }, + flags => Cow::Owned(format!("<{flags:?}>")), + } } - Ok(result) } fn replace_arg( @@ -242,13 +282,15 @@ impl FormatterOutput for InstructionFormatterOutput { match kind { FormatterTextKind::LabelAddress => { if let Some(reloc) = self.ins.reloc.as_ref() { - if reloc.kind == ObjRelocKind::Absolute { + if matches!(reloc.flags, RelocationFlags::Coff { + typ: pe::IMAGE_REL_I386_DIR32 + }) { self.ins.args.push(ObjInsArg::Reloc); return; } else if self.error.is_none() { self.error = Some(anyhow!( - "x86: Unsupported LabelAddress relocation kind {:?}", - reloc.kind + "x86: Unsupported LabelAddress relocation flags {:?}", + reloc.flags )); } } @@ -258,13 +300,15 @@ impl FormatterOutput for InstructionFormatterOutput { } FormatterTextKind::FunctionAddress => { if let Some(reloc) = self.ins.reloc.as_ref() { - if reloc.kind == ObjRelocKind::X86PcRel32 { + if matches!(reloc.flags, RelocationFlags::Coff { + typ: pe::IMAGE_REL_I386_REL32 + }) { self.ins.args.push(ObjInsArg::Reloc); return; } else if self.error.is_none() { self.error = Some(anyhow!( - "x86: Unsupported FunctionAddress relocation kind {:?}", - reloc.kind + "x86: Unsupported FunctionAddress relocation flags {:?}", + reloc.flags )); } } diff --git a/objdiff-core/src/diff/code.rs b/objdiff-core/src/diff/code.rs index 1267d72..3323e32 100644 --- a/objdiff-core/src/diff/code.rs +++ b/objdiff-core/src/diff/code.rs @@ -8,17 +8,17 @@ use anyhow::Result; use similar::{capture_diff_slices_deadline, Algorithm}; use crate::{ + arch::ObjArch, diff::{DiffObjConfig, ProcessCodeResult}, - obj, obj::{ - ObjArchitecture, ObjInfo, ObjInsArg, ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, - ObjInsDiff, ObjInsDiffKind, ObjReloc, ObjSymbol, ObjSymbolFlags, + ObjInfo, ObjInsArg, ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, ObjInsDiff, + ObjInsDiffKind, ObjReloc, ObjSymbol, ObjSymbolFlags, }, }; pub fn no_diff_code( + arch: &dyn ObjArch, config: &DiffObjConfig, - arch: ObjArchitecture, data: &[u8], symbol: &mut ObjSymbol, relocs: &[ObjReloc], @@ -26,29 +26,7 @@ pub fn no_diff_code( ) -> Result<()> { let code = &data[symbol.section_address as usize..(symbol.section_address + symbol.size) as usize]; - let out: ProcessCodeResult = match arch { - #[cfg(feature = "ppc")] - ObjArchitecture::PowerPc => { - obj::ppc::process_code(config, code, symbol.address, relocs, line_info)? - } - #[cfg(feature = "mips")] - ObjArchitecture::Mips => obj::mips::process_code( - config, - code, - symbol.address, - symbol.address + symbol.size, - relocs, - line_info, - )?, - #[cfg(feature = "x86")] - ObjArchitecture::X86_32 => { - obj::x86::process_code(config, code, 32, symbol.address, relocs, line_info)? - } - #[cfg(feature = "x86")] - ObjArchitecture::X86_64 => { - obj::x86::process_code(config, code, 64, symbol.address, relocs, line_info)? - } - }; + let out = arch.process_code(config, code, symbol.address, relocs, line_info)?; let mut diff = Vec::::new(); for i in out.insts { @@ -61,8 +39,8 @@ pub fn no_diff_code( #[allow(clippy::too_many_arguments)] pub fn diff_code( + arch: &dyn ObjArch, config: &DiffObjConfig, - arch: ObjArchitecture, left_data: &[u8], right_data: &[u8], left_symbol: &mut ObjSymbol, @@ -76,82 +54,10 @@ pub fn diff_code( ..(left_symbol.section_address + left_symbol.size) as usize]; let right_code = &right_data[right_symbol.section_address as usize ..(right_symbol.section_address + right_symbol.size) as usize]; - let (left_out, right_out) = match arch { - #[cfg(feature = "ppc")] - ObjArchitecture::PowerPc => ( - obj::ppc::process_code( - config, - left_code, - left_symbol.address, - left_relocs, - left_line_info, - )?, - obj::ppc::process_code( - config, - right_code, - right_symbol.address, - right_relocs, - right_line_info, - )?, - ), - #[cfg(feature = "mips")] - ObjArchitecture::Mips => ( - obj::mips::process_code( - config, - left_code, - left_symbol.address, - left_symbol.address + left_symbol.size, - left_relocs, - left_line_info, - )?, - obj::mips::process_code( - config, - right_code, - right_symbol.address, - left_symbol.address + left_symbol.size, - right_relocs, - right_line_info, - )?, - ), - #[cfg(feature = "x86")] - ObjArchitecture::X86_32 => ( - obj::x86::process_code( - config, - left_code, - 32, - left_symbol.address, - left_relocs, - left_line_info, - )?, - obj::x86::process_code( - config, - right_code, - 32, - right_symbol.address, - right_relocs, - right_line_info, - )?, - ), - #[cfg(feature = "x86")] - ObjArchitecture::X86_64 => ( - obj::x86::process_code( - config, - left_code, - 64, - left_symbol.address, - left_relocs, - left_line_info, - )?, - obj::x86::process_code( - config, - right_code, - 64, - right_symbol.address, - right_relocs, - right_line_info, - )?, - ), - }; + let left_out = + arch.process_code(config, left_code, left_symbol.address, left_relocs, left_line_info)?; + let right_out = + arch.process_code(config, right_code, right_symbol.address, right_relocs, right_line_info)?; let mut left_diff = Vec::::new(); let mut right_diff = Vec::::new(); @@ -281,7 +187,7 @@ fn reloc_eq( let (Some(left), Some(right)) = (left_reloc, right_reloc) else { return false; }; - if left.kind != right.kind { + if left.flags != right.flags { return false; } if config.relax_reloc_diffs { diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index 268bd07..f8e32a6 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -9,9 +9,18 @@ use crate::{ code::{diff_code, find_section_and_symbol, no_diff_code}, data::{diff_bss_symbols, diff_data, no_diff_data}, }, - obj::{x86::X86Formatter, ObjInfo, ObjIns, ObjSectionKind}, + obj::{ObjInfo, ObjIns, ObjSectionKind}, }; +#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize)] +pub enum X86Formatter { + #[default] + Intel, + Gas, + Nasm, + Masm, +} + #[derive(Debug, Clone, Default, Eq, PartialEq, serde::Deserialize, serde::Serialize)] #[serde(default)] pub struct DiffObjConfig { @@ -44,8 +53,8 @@ pub fn diff_objs( left_symbol.diff_symbol = Some(right_symbol.name.clone()); right_symbol.diff_symbol = Some(left_symbol.name.clone()); diff_code( + left.arch.as_ref(), config, - left.architecture, &left_section.data, &right_section.data, left_symbol, @@ -57,8 +66,8 @@ pub fn diff_objs( )?; } else { no_diff_code( + left.arch.as_ref(), config, - left.architecture, &left_section.data, left_symbol, &left_section.relocations, @@ -86,8 +95,8 @@ pub fn diff_objs( for right_symbol in &mut right_section.symbols { if right_symbol.instructions.is_empty() { no_diff_code( + right.arch.as_ref(), config, - right.architecture, &right_section.data, right_symbol, &right_section.relocations, diff --git a/objdiff-core/src/lib.rs b/objdiff-core/src/lib.rs index 1374b74..7d8e77d 100644 --- a/objdiff-core/src/lib.rs +++ b/objdiff-core/src/lib.rs @@ -1,3 +1,4 @@ +pub mod arch; #[cfg(feature = "config")] pub mod config; pub mod diff; diff --git a/objdiff-core/src/obj/mips.rs b/objdiff-core/src/obj/mips.rs deleted file mode 100644 index 8133e74..0000000 --- a/objdiff-core/src/obj/mips.rs +++ /dev/null @@ -1,155 +0,0 @@ -use std::collections::BTreeMap; - -use anyhow::Result; -use rabbitizer::{config, Abi, InstrCategory, Instruction, OperandType}; - -use crate::{ - diff::{DiffObjConfig, ProcessCodeResult}, - obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjRelocKind}, -}; - -fn configure_rabbitizer() { - unsafe { - config::RabbitizerConfig_Cfg.reg_names.fpr_abi_names = Abi::O32; - } -} - -pub fn process_code( - config: &DiffObjConfig, - data: &[u8], - start_address: u64, - end_address: u64, - relocs: &[ObjReloc], - line_info: &Option>, -) -> Result { - configure_rabbitizer(); - - let ins_count = data.len() / 4; - let mut ops = Vec::::with_capacity(ins_count); - let mut insts = Vec::::with_capacity(ins_count); - let mut cur_addr = start_address as u32; - for chunk in data.chunks_exact(4) { - let reloc = relocs.iter().find(|r| (r.address as u32 & !3) == cur_addr); - let code = u32::from_be_bytes(chunk.try_into()?); - let instruction = Instruction::new(code, cur_addr, InstrCategory::CPU); - - let op = instruction.unique_id as u16; - ops.push(op); - - let mnemonic = instruction.opcode_name().to_string(); - let is_branch = instruction.is_branch(); - let branch_offset = instruction.branch_offset(); - let branch_dest = if is_branch { - cur_addr.checked_add_signed(branch_offset).map(|a| a as u64) - } else { - None - }; - - let operands = instruction.get_operands_slice(); - let mut args = Vec::with_capacity(operands.len() + 1); - for (idx, op) in operands.iter().enumerate() { - if idx > 0 { - if config.space_between_args { - args.push(ObjInsArg::PlainText(", ".to_string())); - } else { - args.push(ObjInsArg::PlainText(",".to_string())); - } - } - - match op { - OperandType::cpu_immediate - | OperandType::cpu_label - | OperandType::cpu_branch_target_label => { - if let Some(branch_dest) = branch_dest { - args.push(ObjInsArg::BranchDest(branch_dest)); - } else if let Some(reloc) = reloc { - if matches!(&reloc.target_section, Some(s) if s == ".text") - && reloc.target.address > start_address - && reloc.target.address < end_address - { - args.push(ObjInsArg::BranchDest(reloc.target.address)); - } else { - push_reloc(&mut args, reloc); - } - } else { - args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( - op.disassemble(&instruction, None), - ))); - } - } - OperandType::cpu_immediate_base => { - if let Some(reloc) = reloc { - push_reloc(&mut args, reloc); - } else { - args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( - OperandType::cpu_immediate.disassemble(&instruction, None), - ))); - } - args.push(ObjInsArg::PlainText("(".to_string())); - args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( - OperandType::cpu_rs.disassemble(&instruction, None), - ))); - args.push(ObjInsArg::PlainText(")".to_string())); - } - _ => { - args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque( - op.disassemble(&instruction, None), - ))); - } - } - } - let line = line_info - .as_ref() - .and_then(|map| map.range(..=cur_addr as u64).last().map(|(_, &b)| b)); - insts.push(ObjIns { - address: cur_addr as u64, - size: 4, - op, - mnemonic, - args, - reloc: reloc.cloned(), - branch_dest, - line, - orig: None, - }); - cur_addr += 4; - } - Ok(ProcessCodeResult { ops, insts }) -} - -fn push_reloc(args: &mut Vec, reloc: &ObjReloc) { - match reloc.kind { - ObjRelocKind::MipsHi16 => { - args.push(ObjInsArg::PlainText("%hi(".to_string())); - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText(")".to_string())); - } - ObjRelocKind::MipsLo16 => { - args.push(ObjInsArg::PlainText("%lo(".to_string())); - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText(")".to_string())); - } - ObjRelocKind::MipsGot16 => { - args.push(ObjInsArg::PlainText("%got(".to_string())); - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText(")".to_string())); - } - ObjRelocKind::MipsCall16 => { - args.push(ObjInsArg::PlainText("%call16(".to_string())); - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText(")".to_string())); - } - ObjRelocKind::MipsGpRel16 => { - args.push(ObjInsArg::PlainText("%gp_rel(".to_string())); - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText(")".to_string())); - } - ObjRelocKind::Mips26 => { - args.push(ObjInsArg::Reloc); - } - ObjRelocKind::MipsGpRel32 => { - todo!("unimplemented: mips gp_rel32"); - } - kind => panic!("Unsupported MIPS relocation kind: {:?}", kind), - } -} diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index a7c1525..8ada8a7 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -1,19 +1,14 @@ -#[cfg(feature = "mips")] -pub mod mips; -#[cfg(feature = "ppc")] -pub mod ppc; pub mod read; pub mod split_meta; -#[cfg(feature = "x86")] -pub mod x86; use std::{collections::BTreeMap, fmt, path::PathBuf}; use filetime::FileTime; use flagset::{flags, FlagSet}; +use object::RelocationFlags; use split_meta::SplitMeta; -use crate::util::ReallySigned; +use crate::{arch::ObjArch, util::ReallySigned}; #[derive(Debug, Eq, PartialEq, Copy, Clone)] pub enum ObjSectionKind { @@ -196,21 +191,8 @@ pub struct ObjSymbol { pub match_percent: Option, } -#[derive(Debug, Copy, Clone)] -pub enum ObjArchitecture { - #[cfg(feature = "ppc")] - PowerPc, - #[cfg(feature = "mips")] - Mips, - #[cfg(feature = "x86")] - X86_32, - #[cfg(feature = "x86")] - X86_64, -} - -#[derive(Debug, Clone)] pub struct ObjInfo { - pub architecture: ObjArchitecture, + pub arch: Box, pub path: PathBuf, pub timestamp: FileTime, pub sections: Vec, @@ -222,50 +204,9 @@ pub struct ObjInfo { pub split_meta: Option, } -#[derive(Debug, Eq, PartialEq, Copy, Clone)] -pub enum ObjRelocKind { - Absolute, - #[cfg(feature = "ppc")] - PpcAddr16Hi, - #[cfg(feature = "ppc")] - PpcAddr16Ha, - #[cfg(feature = "ppc")] - PpcAddr16Lo, - // #[cfg(feature = "ppc")] - // PpcAddr32, - // #[cfg(feature = "ppc")] - // PpcRel32, - // #[cfg(feature = "ppc")] - // PpcAddr24, - #[cfg(feature = "ppc")] - PpcRel24, - // #[cfg(feature = "ppc")] - // PpcAddr14, - #[cfg(feature = "ppc")] - PpcRel14, - #[cfg(feature = "ppc")] - PpcEmbSda21, - #[cfg(feature = "mips")] - Mips26, - #[cfg(feature = "mips")] - MipsHi16, - #[cfg(feature = "mips")] - MipsLo16, - #[cfg(feature = "mips")] - MipsGot16, - #[cfg(feature = "mips")] - MipsCall16, - #[cfg(feature = "mips")] - MipsGpRel16, - #[cfg(feature = "mips")] - MipsGpRel32, - #[cfg(feature = "x86")] - X86PcRel32, -} - #[derive(Debug, Clone)] pub struct ObjReloc { - pub kind: ObjRelocKind, + pub flags: RelocationFlags, pub address: u64, pub target: ObjSymbol, pub target_section: Option, diff --git a/objdiff-core/src/obj/ppc.rs b/objdiff-core/src/obj/ppc.rs deleted file mode 100644 index 08d473c..0000000 --- a/objdiff-core/src/obj/ppc.rs +++ /dev/null @@ -1,164 +0,0 @@ -use std::collections::BTreeMap; - -use anyhow::{bail, Result}; -use ppc750cl::{disasm_iter, Argument, SimplifiedIns, GPR}; - -use crate::{ - diff::{DiffObjConfig, ProcessCodeResult}, - obj::{ObjIns, ObjInsArg, ObjInsArgValue, ObjReloc, ObjRelocKind}, -}; - -// Relative relocation, can be Simm, Offset or BranchDest -fn is_relative_arg(arg: &Argument) -> bool { - matches!(arg, Argument::Simm(_) | Argument::Offset(_) | Argument::BranchDest(_)) -} - -// Relative or absolute relocation, can be Uimm, Simm or Offset -fn is_rel_abs_arg(arg: &Argument) -> bool { - matches!(arg, Argument::Uimm(_) | Argument::Simm(_) | Argument::Offset(_)) -} - -fn is_offset_arg(arg: &Argument) -> bool { matches!(arg, Argument::Offset(_)) } - -pub fn process_code( - config: &DiffObjConfig, - data: &[u8], - address: u64, - relocs: &[ObjReloc], - line_info: &Option>, -) -> Result { - let ins_count = data.len() / 4; - let mut ops = Vec::::with_capacity(ins_count); - let mut insts = Vec::::with_capacity(ins_count); - for mut ins in disasm_iter(data, address as u32) { - let reloc = relocs.iter().find(|r| (r.address as u32 & !3) == ins.addr); - if let Some(reloc) = reloc { - // Zero out relocations - ins.code = match reloc.kind { - ObjRelocKind::PpcEmbSda21 => ins.code & !0x1FFFFF, - ObjRelocKind::PpcRel24 => ins.code & !0x3FFFFFC, - ObjRelocKind::PpcRel14 => ins.code & !0xFFFC, - ObjRelocKind::PpcAddr16Hi - | ObjRelocKind::PpcAddr16Ha - | ObjRelocKind::PpcAddr16Lo => ins.code & !0xFFFF, - _ => ins.code, - }; - } - let simplified = ins.clone().simplified(); - - let mut reloc_arg = None; - if let Some(reloc) = reloc { - match reloc.kind { - ObjRelocKind::PpcEmbSda21 => { - reloc_arg = Some(1); - } - ObjRelocKind::PpcRel24 | ObjRelocKind::PpcRel14 => { - reloc_arg = simplified.args.iter().rposition(is_relative_arg); - } - ObjRelocKind::PpcAddr16Hi - | ObjRelocKind::PpcAddr16Ha - | ObjRelocKind::PpcAddr16Lo => { - reloc_arg = simplified.args.iter().rposition(is_rel_abs_arg); - } - _ => {} - } - } - - let mut args = vec![]; - let mut branch_dest = None; - let mut writing_offset = false; - for (idx, arg) in simplified.args.iter().enumerate() { - if idx > 0 && !writing_offset { - if config.space_between_args { - args.push(ObjInsArg::PlainText(", ".to_string())); - } else { - args.push(ObjInsArg::PlainText(",".to_string())); - } - } - - if reloc_arg == Some(idx) { - let reloc = reloc.unwrap(); - push_reloc(&mut args, reloc)?; - // For @sda21, we can omit the register argument - if reloc.kind == ObjRelocKind::PpcEmbSda21 - // Sanity check: the next argument should be r0 - && matches!(simplified.args.get(idx + 1), Some(Argument::GPR(GPR(0)))) - { - break; - } - } else { - match arg { - Argument::Simm(simm) => { - args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(simm.0 as i64))); - } - Argument::Uimm(uimm) => { - args.push(ObjInsArg::Arg(ObjInsArgValue::Unsigned(uimm.0 as u64))); - } - Argument::Offset(offset) => { - args.push(ObjInsArg::Arg(ObjInsArgValue::Signed(offset.0 as i64))); - } - Argument::BranchDest(dest) => { - let dest = ins.addr.wrapping_add_signed(dest.0) as u64; - args.push(ObjInsArg::BranchDest(dest)); - branch_dest = Some(dest); - } - _ => { - args.push(ObjInsArg::Arg(ObjInsArgValue::Opaque(arg.to_string()))); - } - }; - } - - if writing_offset { - args.push(ObjInsArg::PlainText(")".to_string())); - writing_offset = false; - } - if is_offset_arg(arg) { - args.push(ObjInsArg::PlainText("(".to_string())); - writing_offset = true; - } - } - - ops.push(simplified.ins.op as u16); - let line = line_info - .as_ref() - .and_then(|map| map.range(..=simplified.ins.addr as u64).last().map(|(_, &b)| b)); - insts.push(ObjIns { - address: simplified.ins.addr as u64, - size: 4, - mnemonic: format!("{}{}", simplified.mnemonic, simplified.suffix), - args, - reloc: reloc.cloned(), - op: ins.op as u16, - branch_dest, - line, - orig: Some(format!("{}", SimplifiedIns::basic_form(ins))), - }); - } - Ok(ProcessCodeResult { ops, insts }) -} - -fn push_reloc(args: &mut Vec, reloc: &ObjReloc) -> Result<()> { - match reloc.kind { - ObjRelocKind::PpcAddr16Lo => { - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText("@l".to_string())); - } - ObjRelocKind::PpcAddr16Hi => { - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText("@h".to_string())); - } - ObjRelocKind::PpcAddr16Ha => { - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText("@ha".to_string())); - } - ObjRelocKind::PpcEmbSda21 => { - args.push(ObjInsArg::Reloc); - args.push(ObjInsArg::PlainText("@sda21".to_string())); - } - ObjRelocKind::PpcRel24 | ObjRelocKind::PpcRel14 => { - args.push(ObjInsArg::Reloc); - } - kind => bail!("Unsupported PPC relocation kind: {:?}", kind), - }; - Ok(()) -} diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs index 6ed5fbd..190c994 100644 --- a/objdiff-core/src/obj/read.rs +++ b/objdiff-core/src/obj/read.rs @@ -5,15 +5,16 @@ use byteorder::{BigEndian, ReadBytesExt}; use filetime::FileTime; use flagset::Flags; use object::{ - elf, pe, Architecture, BinaryFormat, Endian, File, Object, ObjectSection, ObjectSymbol, - RelocationFlags, RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolKind, SymbolScope, - SymbolSection, + BinaryFormat, File, Object, ObjectSection, ObjectSymbol, RelocationTarget, SectionIndex, + SectionKind, Symbol, SymbolKind, SymbolScope, SymbolSection, }; -use crate::obj::{ - split_meta::{SplitMeta, SPLITMETA_SECTION}, - ObjArchitecture, ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol, - ObjSymbolFlagSet, ObjSymbolFlags, +use crate::{ + arch::{new_arch, ObjArch}, + obj::{ + split_meta::{SplitMeta, SPLITMETA_SECTION}, + ObjInfo, ObjReloc, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, + }, }; fn to_obj_section_kind(kind: SectionKind) -> Option { @@ -26,6 +27,7 @@ fn to_obj_section_kind(kind: SectionKind) -> Option { } fn to_obj_symbol( + arch: &dyn ObjArch, obj_file: &File<'_>, symbol: &Symbol<'_, '_>, addend: i64, @@ -59,15 +61,7 @@ fn to_obj_symbol( } else { symbol.address() }; - let mut demangled_name = None; - #[cfg(feature = "ppc")] - if obj_file.architecture() == Architecture::PowerPc { - demangled_name = cwdemangle::demangle(name, &Default::default()); - } - #[cfg(feature = "x86")] - if matches!(obj_file.format(), BinaryFormat::Coff | BinaryFormat::Pe) && name.starts_with('?') { - demangled_name = msvc_demangler::demangle(name, msvc_demangler::DemangleFlags::llvm()).ok(); - } + let demangled_name = arch.demangle(name); // Find the virtual address for the symbol if available let virtual_address = split_meta .and_then(|m| m.virtual_addresses.as_ref()) @@ -129,6 +123,7 @@ fn filter_sections(obj_file: &File<'_>, split_meta: Option<&SplitMeta>) -> Resul } fn symbols_by_section( + arch: &dyn ObjArch, obj_file: &File<'_>, section: &ObjSection, split_meta: Option<&SplitMeta>, @@ -147,7 +142,7 @@ fn symbols_by_section( continue; } } - result.push(to_obj_symbol(obj_file, &symbol, 0, split_meta)?); + result.push(to_obj_symbol(arch, obj_file, &symbol, 0, split_meta)?); } } } @@ -165,15 +160,20 @@ fn symbols_by_section( Ok(result) } -fn common_symbols(obj_file: &File<'_>, split_meta: Option<&SplitMeta>) -> Result> { +fn common_symbols( + arch: &dyn ObjArch, + obj_file: &File<'_>, + split_meta: Option<&SplitMeta>, +) -> Result> { obj_file .symbols() .filter(Symbol::is_common) - .map(|symbol| to_obj_symbol(obj_file, &symbol, 0, split_meta)) + .map(|symbol| to_obj_symbol(arch, obj_file, &symbol, 0, split_meta)) .collect::>>() } fn find_section_symbol( + arch: &dyn ObjArch, obj_file: &File<'_>, target: &Symbol<'_, '_>, address: u64, @@ -197,7 +197,7 @@ fn find_section_symbol( } continue; } - return to_obj_symbol(obj_file, &symbol, 0, split_meta); + return to_obj_symbol(arch, obj_file, &symbol, 0, split_meta); } let (name, offset) = closest_symbol .and_then(|s| s.name().map(|n| (n, s.address())).ok()) @@ -221,7 +221,7 @@ fn find_section_symbol( } fn relocations_by_section( - arch: ObjArchitecture, + arch: &dyn ObjArch, obj_file: &File<'_>, section: &ObjSection, split_meta: Option<&SplitMeta>, @@ -231,6 +231,10 @@ fn relocations_by_section( for (address, reloc) in obj_section.relocations() { let symbol = match reloc.target() { RelocationTarget::Symbol(idx) => { + if idx.0 == u32::MAX as usize { + // ??? + continue; + } let Ok(symbol) = obj_file.symbol_by_index(idx) else { log::warn!( "Failed to locate relocation {:#x} target symbol {}", @@ -243,60 +247,7 @@ fn relocations_by_section( } _ => bail!("Unhandled relocation target: {:?}", reloc.target()), }; - let kind = match reloc.flags() { - RelocationFlags::Elf { r_type } => match arch { - #[cfg(feature = "ppc")] - ObjArchitecture::PowerPc => match r_type { - elf::R_PPC_ADDR32 | elf::R_PPC_UADDR32 => ObjRelocKind::Absolute, - elf::R_PPC_ADDR16_LO => ObjRelocKind::PpcAddr16Lo, - elf::R_PPC_ADDR16_HI => ObjRelocKind::PpcAddr16Hi, - elf::R_PPC_ADDR16_HA => ObjRelocKind::PpcAddr16Ha, - elf::R_PPC_REL24 => ObjRelocKind::PpcRel24, - elf::R_PPC_REL14 => ObjRelocKind::PpcRel14, - elf::R_PPC_EMB_SDA21 => ObjRelocKind::PpcEmbSda21, - _ => bail!("Unhandled ELF PPC relocation type: {r_type}"), - }, - #[cfg(feature = "mips")] - ObjArchitecture::Mips => match r_type { - elf::R_MIPS_32 => ObjRelocKind::Absolute, - elf::R_MIPS_26 => ObjRelocKind::Mips26, - elf::R_MIPS_HI16 => ObjRelocKind::MipsHi16, - elf::R_MIPS_LO16 => ObjRelocKind::MipsLo16, - elf::R_MIPS_GOT16 => ObjRelocKind::MipsGot16, - elf::R_MIPS_CALL16 => ObjRelocKind::MipsCall16, - elf::R_MIPS_GPREL16 => ObjRelocKind::MipsGpRel16, - elf::R_MIPS_GPREL32 => ObjRelocKind::MipsGpRel32, - _ => bail!("Unhandled ELF MIPS relocation type: {r_type}"), - }, - #[cfg(feature = "x86")] - ObjArchitecture::X86_32 => match r_type { - elf::R_386_32 => ObjRelocKind::Absolute, - elf::R_386_PC32 => ObjRelocKind::X86PcRel32, - _ => bail!("Unhandled ELF x86_32 relocation type: {r_type}"), - }, - #[cfg(feature = "x86")] - ObjArchitecture::X86_64 => match r_type { - elf::R_X86_64_32 => ObjRelocKind::Absolute, - elf::R_X86_64_PC32 => ObjRelocKind::X86PcRel32, - _ => bail!("Unhandled ELF x86_64 relocation type: {r_type}"), - }, - }, - RelocationFlags::Coff { typ } => match arch { - #[cfg(feature = "ppc")] - ObjArchitecture::PowerPc => bail!("Unhandled PE/COFF PPC relocation type: {typ}"), - #[cfg(feature = "mips")] - ObjArchitecture::Mips => bail!("Unhandled PE/COFF MIPS relocation type: {typ}"), - #[cfg(feature = "x86")] - ObjArchitecture::X86_32 => match typ { - pe::IMAGE_REL_I386_DIR32 => ObjRelocKind::Absolute, - pe::IMAGE_REL_I386_REL32 => ObjRelocKind::X86PcRel32, - _ => bail!("Unhandled PE/COFF x86 relocation type: {typ}"), - }, - #[cfg(feature = "x86")] - ObjArchitecture::X86_64 => bail!("Unhandled PE/COFF x86_64 relocation type: {typ}"), - }, - flags => bail!("Unhandled relocation flags: {:?}", flags), - }; + let flags = reloc.flags(); // TODO validate reloc here? let target_section = match symbol.section() { SymbolSection::Common => Some(".comm".to_string()), SymbolSection::Section(idx) => { @@ -305,40 +256,22 @@ fn relocations_by_section( _ => None, }; let addend = if reloc.has_implicit_addend() { - let data = section.data[address as usize..address as usize + 4].try_into()?; - let addend = obj_file.endianness().read_u32_bytes(data); - match kind { - ObjRelocKind::Absolute => addend as i64, - #[cfg(feature = "mips")] - ObjRelocKind::MipsHi16 => ((addend & 0x0000FFFF) << 16) as i32 as i64, - #[cfg(feature = "mips")] - ObjRelocKind::MipsLo16 - | ObjRelocKind::MipsGot16 - | ObjRelocKind::MipsCall16 - | ObjRelocKind::MipsGpRel16 => (addend & 0x0000FFFF) as i16 as i64, - #[cfg(feature = "mips")] - ObjRelocKind::MipsGpRel32 => addend as i32 as i64, - #[cfg(feature = "mips")] - ObjRelocKind::Mips26 => ((addend & 0x03FFFFFF) << 2) as i64, - #[cfg(feature = "x86")] - ObjRelocKind::X86PcRel32 => addend as i32 as i64, - _ => bail!("Unsupported implicit relocation {kind:?}"), - } + arch.implcit_addend(section, address, &reloc)? } else { reloc.addend() }; // println!("Reloc: {reloc:?}, symbol: {symbol:?}, addend: {addend:#X}"); let target = match symbol.kind() { SymbolKind::Text | SymbolKind::Data | SymbolKind::Label | SymbolKind::Unknown => { - to_obj_symbol(obj_file, &symbol, addend, split_meta) + to_obj_symbol(arch, obj_file, &symbol, addend, split_meta) } SymbolKind::Section => { ensure!(addend >= 0, "Negative addend in reloc: {addend}"); - find_section_symbol(obj_file, &symbol, addend as u64, split_meta) + find_section_symbol(arch, obj_file, &symbol, addend as u64, split_meta) } kind => Err(anyhow!("Unhandled relocation symbol type {kind:?}")), }?; - relocations.push(ObjReloc { kind, address, target, target_section }); + relocations.push(ObjReloc { flags, address, target, target_section }); } Ok(relocations) } @@ -408,34 +341,25 @@ pub fn read(obj_path: &Path) -> Result { (unsafe { memmap2::Mmap::map(&file) }?, timestamp) }; let obj_file = File::parse(&*data)?; - let architecture = match obj_file.architecture() { - #[cfg(feature = "ppc")] - Architecture::PowerPc => ObjArchitecture::PowerPc, - #[cfg(feature = "mips")] - Architecture::Mips => ObjArchitecture::Mips, - #[cfg(feature = "x86")] - Architecture::I386 => ObjArchitecture::X86_32, - #[cfg(feature = "x86")] - Architecture::X86_64 => ObjArchitecture::X86_64, - _ => bail!("Unsupported architecture: {:?}", obj_file.architecture()), - }; + let arch = new_arch(&obj_file)?; let split_meta = split_meta(&obj_file)?; - let mut result = ObjInfo { - architecture, + let mut sections = filter_sections(&obj_file, split_meta.as_ref())?; + for section in &mut sections { + section.symbols = + symbols_by_section(arch.as_ref(), &obj_file, section, split_meta.as_ref())?; + section.relocations = + relocations_by_section(arch.as_ref(), &obj_file, section, split_meta.as_ref())?; + } + let common = common_symbols(arch.as_ref(), &obj_file, split_meta.as_ref())?; + Ok(ObjInfo { + arch, path: obj_path.to_owned(), timestamp, - sections: filter_sections(&obj_file, split_meta.as_ref())?, - common: common_symbols(&obj_file, split_meta.as_ref())?, + sections, + common, line_info: line_info(&obj_file)?, - split_meta: None, - }; - for section in &mut result.sections { - section.symbols = symbols_by_section(&obj_file, section, split_meta.as_ref())?; - section.relocations = - relocations_by_section(architecture, &obj_file, section, split_meta.as_ref())?; - } - result.split_meta = split_meta; - Ok(result) + split_meta, + }) } pub fn has_function(obj_path: &Path, symbol_name: &str) -> Result { diff --git a/objdiff-gui/src/app.rs b/objdiff-gui/src/app.rs index 887c085..fdbe954 100644 --- a/objdiff-gui/src/app.rs +++ b/objdiff-gui/src/app.rs @@ -290,7 +290,7 @@ impl App { title: "Error".to_string(), progress_percent: 0.0, progress_items: None, - status: "".to_string(), + status: String::new(), error: Some(err), })); } diff --git a/objdiff-gui/src/views/config.rs b/objdiff-gui/src/views/config.rs index 59aeb46..de4f541 100644 --- a/objdiff-gui/src/views/config.rs +++ b/objdiff-gui/src/views/config.rs @@ -16,9 +16,9 @@ use egui::{ use globset::Glob; use objdiff_core::{ config::{ProjectObject, DEFAULT_WATCH_PATTERNS}, - obj::x86::X86Formatter, }; use self_update::cargo_crate_version; +use objdiff_core::diff::X86Formatter; use crate::{ app::{AppConfig, AppConfigRef, ObjectConfig}, diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index 085ade1..880b2a7 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -3,6 +3,7 @@ use std::default::Default; use egui::{text::LayoutJob, Align, Label, Layout, Sense, Vec2, Widget}; use egui_extras::{Column, TableBuilder, TableRow}; use objdiff_core::{ + arch::ObjArch, diff::display::{display_diff, DiffText, HighlightKind}, obj::{ ObjInfo, ObjIns, ObjInsArg, ObjInsArgValue, ObjInsDiff, ObjInsDiffKind, ObjSection, @@ -21,7 +22,13 @@ pub struct FunctionViewState { pub highlight: HighlightKind, } -fn ins_hover_ui(ui: &mut egui::Ui, section: &ObjSection, ins: &ObjIns, appearance: &Appearance) { +fn ins_hover_ui( + ui: &mut egui::Ui, + arch: &dyn ObjArch, + section: &ObjSection, + ins: &ObjIns, + appearance: &Appearance, +) { ui.scope(|ui| { ui.style_mut().override_text_style = Some(egui::TextStyle::Monospace); ui.style_mut().wrap = Some(false); @@ -51,7 +58,7 @@ fn ins_hover_ui(ui: &mut egui::Ui, section: &ObjSection, ins: &ObjIns, appearanc } if let Some(reloc) = &ins.reloc { - ui.label(format!("Relocation type: {:?}", reloc.kind)); + ui.label(format!("Relocation type: {}", arch.display_reloc(reloc.flags))); ui.colored_label(appearance.highlight_color, format!("Name: {}", reloc.target.name)); if let Some(section) = &reloc.target_section { ui.colored_label(appearance.highlight_color, format!("Section: {section}")); @@ -122,10 +129,14 @@ fn ins_context_menu(ui: &mut egui::Ui, ins: &ObjIns) { fn find_symbol<'a>( obj: &'a ObjInfo, selected_symbol: &SymbolReference, -) -> Option<(&'a ObjSection, &'a ObjSymbol)> { +) -> Option<(&'a dyn ObjArch, &'a ObjSection, &'a ObjSymbol)> { obj.sections.iter().find_map(|section| { section.symbols.iter().find_map(|symbol| { - (symbol.name == selected_symbol.symbol_name).then_some((section, symbol)) + (symbol.name == selected_symbol.symbol_name).then_some(( + obj.arch.as_ref(), + section, + symbol, + )) }) }) } @@ -238,6 +249,7 @@ fn asm_row_ui( fn asm_col_ui( row: &mut TableRow<'_, '_>, ins_diff: &ObjInsDiff, + arch: &dyn ObjArch, section: &ObjSection, symbol: &ObjSymbol, appearance: &Appearance, @@ -247,7 +259,7 @@ fn asm_col_ui( asm_row_ui(ui, ins_diff, symbol, appearance, ins_view_state); }); if let Some(ins) = &ins_diff.ins { - response.on_hover_ui_at_pointer(|ui| ins_hover_ui(ui, section, ins, appearance)); + response.on_hover_ui_at_pointer(|ui| ins_hover_ui(ui, arch, section, ins, appearance)); } } @@ -267,14 +279,15 @@ fn asm_table_ui( ) -> Option<()> { let left_symbol = left_obj.and_then(|obj| find_symbol(obj, selected_symbol)); let right_symbol = right_obj.and_then(|obj| find_symbol(obj, selected_symbol)); - let instructions_len = left_symbol.or(right_symbol).map(|(_, s)| s.instructions.len())?; + let instructions_len = left_symbol.or(right_symbol).map(|(_, _, s)| s.instructions.len())?; table.body(|body| { body.rows(appearance.code_font.size, instructions_len, |mut row| { let row_index = row.index(); - if let Some((section, symbol)) = left_symbol { + if let Some((arch, section, symbol)) = left_symbol { asm_col_ui( &mut row, &symbol.instructions[row_index], + arch, section, symbol, appearance, @@ -283,10 +296,11 @@ fn asm_table_ui( } else { empty_col_ui(&mut row); } - if let Some((section, symbol)) = right_symbol { + if let Some((arch, section, symbol)) = right_symbol { asm_col_ui( &mut row, &symbol.instructions[row_index], + arch, section, symbol, appearance, @@ -399,7 +413,7 @@ pub fn function_diff_ui(ui: &mut egui::Ui, state: &mut DiffViewState, appearance .second_obj .as_ref() .and_then(|obj| find_symbol(obj, selected_symbol)) - .and_then(|(_, symbol)| symbol.match_percent) + .and_then(|(_, _, symbol)| symbol.match_percent) { ui.colored_label( match_color_for_symbol(match_percent, appearance),