Files
ultrasm64-2/lib/hackerlibultra/tools/disassemble_elf.py
bicyclesoda 13313f3e31 [WIP] GCC Building + libhackerultra (#17)
Co-authored-by: a <a>
2025-06-19 21:25:47 -04:00

313 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
#
# ELF disassembler that attempts to be matching
#
import argparse, struct, sys
from libelf import *
from mdebug import *
from mips_isa import *
from util import *
def debug_log(msg):
print(msg, file=sys.stderr)
class MipsDisasm:
"""
"""
def __init__(self, elf_file) -> None:
self.elf_file = elf_file
mdebug_section = elf_file.find_section_by_type(SHT_MIPS_DEBUG)
if mdebug_section is not None:
self.mdebug = mdebug_section
self.has_mdebug = True
else:
self.has_mdebug = False
self.cur_file = None
self.comment_section_pos = 1
self.section_local_labels = {}
def add_section_local_label(self, section, offset):
if section not in self.section_local_labels:
self.section_local_labels.update({section : set()})
self.section_local_labels[section].add(offset)
def advance_file(self):
seen_cur_file = False
for sym in self.elf_file.symtab.symbol_entries:
if sym.type == ST_FILE:
if seen_cur_file or self.cur_file is None:
self.cur_file = sym
break
elif self.cur_file == sym:
seen_cur_file = True
return self.cur_file is not None
def disassemble_all_sections(self):
print(MipsDisasm.asm_prelude())
# debug_log("Name Type Addr Off Size ES Flg Lk Inf Al")
for section in self.elf_file.sections:
local_labels = self.section_local_labels.get(section.name, None)
# debug_log(section)
if section.name in ['', '.strtab', '.shstrtab', '.symtab', '.reginfo', '.comment', '.note', '.options', '.mdebug', '.gptab.data', '.gptab.bss'] or \
(section.sh_type == SHT_REL or section.sh_type == SHT_RELA):
continue
if section.sh_size == 0:
continue
print("")
print(MipsDisasm.begin_section(section))
if section.is_executable():
self.disassemble_exec(section)
elif section.sh_type == SHT_PROGBITS:
# TODO kmc as doesn't support incbin, byte array this
# print(f".incbin \"libultra.a\", 0x{section.sh_offset:08X}, 0x{section.sh_size:X}")
first = True
for i,b in enumerate(section.data):
if local_labels is not None and i in local_labels:
if not first:
print("")
print(f".{section.name[1].upper()}_{i:08X}:")
print(" .byte ", end='')
first = True
elif first:
print(" .byte ", end='')
if not first:
print(", ", end='')
print(f"0x{int(b):02X}", end='')
first = False
print("")
elif section.sh_type == SHT_NOBITS:
print(f".skip 0x{section.sh_size:X}")
else:
assert False, f"Unhandled section: {section.name}"
# debug_log("/// UNHANDLED ///")
def pass_section(self, section):
pass
@staticmethod
def asm_prelude():
return f""".include "macro.inc"
#include "regdef.h"
// assembler directives
.set noat // allow manual use of $at
.set noreorder // don't insert nops after branches"""
@staticmethod
def begin_section(section):
section_flags = section.flags_str().lower().replace(' ', '')
if section_flags != "":
section_flags = f", \"{section_flags}\""
section_type = ""
if section.sh_type == SHT_PROGBITS:
section_type = ", @progbits"
elif section.sh_type == SHT_NOBITS:
section_type = ", @nobits"
if section_type != "" and section_flags == "":
section_flags = ", \"\""
return f""".section {section.name}{section_flags}{section_type}
.balign {section.sh_addralign}
"""
def get_label_name(self, addr, pdr=None, optional=False):
if pdr is not None:
sym = pdr.lookup_sym(addr, EcoffSt.LABEL)
if sym is not None:
return sym.name
if not optional:
return f".L{addr:08X}"
else:
return None
def get_comment_string(self, start):
comment_section = self.elf_file.find_section_by_name(".comment")
end = comment_section.data.find(b'\0', start)
if end == -1:
return None, None
comment = comment_section.data[start:end].decode("ASCII")
return comment, end + 1
def print_end(self, vaddr, eof):
ends = eof.get(vaddr, None)
if ends is not None:
for sym in ends:
print(f" .type {sym.name}, @{'function' if sym.type == ST_FUNC else 'object'}")
if sym.st_size != 0:
print(f" .size {sym.name}, . - {sym.name}")
print(f" .end {sym.name}\n")
def disassemble_exec(self, section):
raw_insns = as_word_list(section.data)
insns = [decode_insn(raw, section.sh_addr + j * 4) for j,raw in enumerate(raw_insns)]
# enumerate branch labels
branch_labels = set()
for i,insn in enumerate(insns):
if insn.id in MIPS_BRANCH_INSNS or insn.id == MIPS_INS_J:
branch_labels.add(insn.target if insn.id == MIPS_INS_J else insn.offset)
eof = {} # vaddr : name
def add_end(vaddr, sym):
if vaddr not in eof:
eof[vaddr] = set()
eof[vaddr].add(sym)
cur_fdr = None
cur_pdr = None
for i,insn in enumerate(insns):
mnemonic = insn.mnemonic
op_str = insn.op_str
# Update mdebug info
src_inf = ""
if self.has_mdebug:
# Get new fdr if there is one
fdr = self.mdebug.fdr_foraddr(i * 4, extensions=('.c', '.s'))
if fdr is not None:
# debug_log(fdr.name)
cur_fdr = fdr
# Get new pdr if there is one
if cur_fdr is not None:
pdr = cur_fdr.pdr_foraddr(i * 4)
if pdr is not None:
# debug_log(pdr)
cur_pdr = pdr
# Line numbers
if cur_pdr is not None:
asm_line = i - cur_pdr.addr//4
if asm_line < len(cur_pdr.lines):
src_inf = f" {cur_pdr.lines[asm_line]:4}"
else:
src_inf = " PADDING"
# Symbols for this address
syms = section.get_sym(i * 4)
# if len(syms) != 0:
# debug_log("\n".join([str(sym) for sym in syms]))
# Print end
self.print_end(insn.vaddr, eof)
# Print symbol
for sym in syms:
if sym.name == "gcc2_compiled.":
print(f"// compiler generated")
if self.cur_file is None:
print(f".version \"01.01\"")
if self.advance_file():
print(f".file 1 \"{self.cur_file.name}\"")
comment_string = None
while comment_string != "\"GCC: (GNU) 2.7.2\"":
comment_string, self.comment_section_pos = self.get_comment_string(self.comment_section_pos)
if comment_string is None:
break
print(f".ident \"{comment_string}\"")
if sym.bind == SB_GLOBAL:
print(f"glabel {sym.name}")
else:
print(f"{sym.name}:")
if sym.st_size != 0:
print(f" .ent {sym.name}")
add_end(insn.vaddr + sym.st_size, sym)
else:
print(f" .type {sym.name}, @{'function' if sym.type == ST_FUNC else 'object'}\n")
# Print branch labels
lbl = self.get_label_name(insn.vaddr, pdr=cur_pdr, optional=not insn.vaddr in branch_labels)
if lbl is not None:
print(f"{lbl}:")
# Relocations for this address
rels = section.get_rel(i * 4)
assert len(rels) < 2 # There should never be more than 1 relocation for a single address, right?
# if len(rels) != 0:
# debug_log("\n".join([str(rel) for rel in rels]))
# Apply relocation
if len(rels) != 0:
rel = rels[0]
if rel.rel_type == R_MIPS_26:
if insn.id == MIPS_INS_JAL:
op_str = rel.relocated_symbol.name
if op_str == ".text" and cur_fdr is not None:
pdr = cur_fdr.pdr_foraddr(insn.target)
if pdr is not None:
op_str = pdr.name
elif insn.id != MIPS_INS_J: # Branch labels for j instructions are also R_MIPS_26 relocations
assert False , f"Got unexpected R_MIPS_26 relocation {insn.id}"
elif rel.rel_type == R_MIPS_HI16:
assert insn.id in [MIPS_INS_LUI]
rel_name = rel.relocated_symbol.name
if rel.relocated_symbol.type == ST_SECTION:
rel_name = f".{rel_name[1].upper()}_00000000"
if cur_fdr is not None:
pass
op_str = f"{insn.abi.gpr_names[insn.rt]}, %hi({rel_name})"
elif rel.rel_type == R_MIPS_LO16:
# Ideally this should be in the elf code so the relocations don't look identical
addend = insn.imm
rel_name = rel.relocated_symbol.name
if rel.relocated_symbol.type == ST_SECTION:
rel_name = f".{rel_name[1].upper()}_{addend:08X}"
self.add_section_local_label(rel.relocated_symbol.name, addend)
addend = 0
addend_str = f" + 0x{addend:X}" if addend != 0 else ""
if insn.id == MIPS_INS_ADDIU:
op_str = f"{insn.abi.gpr_names[insn.rt]}, {insn.abi.gpr_names[insn.rs]}, %lo({rel_name}{addend_str})"
elif insn.id in MIPS_LOAD_STORE_INSNS:
if insn.id in MIPS_FP_LOAD_STORE_INSNS:
op_str = f"{insn.abi.cop1_names[insn.ft]}, "
else:
op_str = f"{insn.abi.gpr_names[insn.rt]}, "
op_str += f"%lo({rel_name}{addend_str})({insn.abi.gpr_names[insn.base]})"
else:
assert False
else:
assert False
# Apply branch labels
if insn.id in MIPS_BRANCH_INSNS:
op_str_parts = []
for field in insn.fields:
if field == 'offset':
op_str_parts.append(self.get_label_name(insn.offset, cur_pdr))
else:
op_str_parts.append(insn.format_field(field))
op_str = ", ".join(op_str_parts)
elif insn.id == MIPS_INS_J:
op_str = self.get_label_name(insn.target, cur_pdr)
print(f"/* {section.sh_offset + i * 4:06X} {insn.vaddr:08X} {insn.raw:08X}{src_inf} */ {mnemonic:12}{op_str:35}".rstrip())
self.print_end(section.sh_addr + section.sh_size, eof)
def main():
parser = argparse.ArgumentParser(description="Disassemble relocatable ELF object.")
parser.add_argument("filepath", help="path to the ELF file")
# TODO unimplemented optionals
parser.add_argument("--compiler", help="original compiler that produced the ELF (IDO or GCC, IDO default)", default="IDO")
parser.add_argument("--strenc", help="string encoding, default is EUC-JP for IDO and SJIS for GCC")
args = parser.parse_args()
elf_file = None
with open(args.filepath, "rb") as elf:
elf_file = ElfFile(bytearray(elf.read()))
disassembler = MipsDisasm(elf_file)
disassembler.disassemble_all_sections()
if __name__ == '__main__':
main()