You've already forked ultrasm64-2
mirror of
https://github.com/HackerN64/ultrasm64-2.git
synced 2026-01-21 10:38:08 -08:00
313 lines
12 KiB
Python
Executable File
313 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# ELF disassembler that attempts to be matching
|
|
#
|
|
|
|
import argparse, struct, sys
|
|
|
|
from libelf import *
|
|
from mdebug import *
|
|
from mips_isa import *
|
|
from util import *
|
|
|
|
def debug_log(msg):
|
|
print(msg, file=sys.stderr)
|
|
|
|
class MipsDisasm:
|
|
"""
|
|
"""
|
|
|
|
def __init__(self, elf_file) -> None:
|
|
self.elf_file = elf_file
|
|
mdebug_section = elf_file.find_section_by_type(SHT_MIPS_DEBUG)
|
|
if mdebug_section is not None:
|
|
self.mdebug = mdebug_section
|
|
self.has_mdebug = True
|
|
else:
|
|
self.has_mdebug = False
|
|
self.cur_file = None
|
|
self.comment_section_pos = 1
|
|
self.section_local_labels = {}
|
|
|
|
def add_section_local_label(self, section, offset):
|
|
if section not in self.section_local_labels:
|
|
self.section_local_labels.update({section : set()})
|
|
self.section_local_labels[section].add(offset)
|
|
|
|
def advance_file(self):
|
|
seen_cur_file = False
|
|
for sym in self.elf_file.symtab.symbol_entries:
|
|
if sym.type == ST_FILE:
|
|
if seen_cur_file or self.cur_file is None:
|
|
self.cur_file = sym
|
|
break
|
|
elif self.cur_file == sym:
|
|
seen_cur_file = True
|
|
return self.cur_file is not None
|
|
|
|
def disassemble_all_sections(self):
|
|
print(MipsDisasm.asm_prelude())
|
|
|
|
# debug_log("Name Type Addr Off Size ES Flg Lk Inf Al")
|
|
for section in self.elf_file.sections:
|
|
local_labels = self.section_local_labels.get(section.name, None)
|
|
# debug_log(section)
|
|
if section.name in ['', '.strtab', '.shstrtab', '.symtab', '.reginfo', '.comment', '.note', '.options', '.mdebug', '.gptab.data', '.gptab.bss'] or \
|
|
(section.sh_type == SHT_REL or section.sh_type == SHT_RELA):
|
|
continue
|
|
if section.sh_size == 0:
|
|
continue
|
|
print("")
|
|
print(MipsDisasm.begin_section(section))
|
|
if section.is_executable():
|
|
self.disassemble_exec(section)
|
|
elif section.sh_type == SHT_PROGBITS:
|
|
# TODO kmc as doesn't support incbin, byte array this
|
|
# print(f".incbin \"libultra.a\", 0x{section.sh_offset:08X}, 0x{section.sh_size:X}")
|
|
first = True
|
|
for i,b in enumerate(section.data):
|
|
if local_labels is not None and i in local_labels:
|
|
if not first:
|
|
print("")
|
|
print(f".{section.name[1].upper()}_{i:08X}:")
|
|
print(" .byte ", end='')
|
|
first = True
|
|
elif first:
|
|
print(" .byte ", end='')
|
|
if not first:
|
|
print(", ", end='')
|
|
print(f"0x{int(b):02X}", end='')
|
|
first = False
|
|
print("")
|
|
elif section.sh_type == SHT_NOBITS:
|
|
print(f".skip 0x{section.sh_size:X}")
|
|
else:
|
|
assert False, f"Unhandled section: {section.name}"
|
|
# debug_log("/// UNHANDLED ///")
|
|
|
|
def pass_section(self, section):
|
|
pass
|
|
|
|
@staticmethod
|
|
def asm_prelude():
|
|
return f""".include "macro.inc"
|
|
#include "regdef.h"
|
|
|
|
// assembler directives
|
|
.set noat // allow manual use of $at
|
|
.set noreorder // don't insert nops after branches"""
|
|
|
|
@staticmethod
|
|
def begin_section(section):
|
|
section_flags = section.flags_str().lower().replace(' ', '')
|
|
if section_flags != "":
|
|
section_flags = f", \"{section_flags}\""
|
|
section_type = ""
|
|
if section.sh_type == SHT_PROGBITS:
|
|
section_type = ", @progbits"
|
|
elif section.sh_type == SHT_NOBITS:
|
|
section_type = ", @nobits"
|
|
if section_type != "" and section_flags == "":
|
|
section_flags = ", \"\""
|
|
|
|
return f""".section {section.name}{section_flags}{section_type}
|
|
.balign {section.sh_addralign}
|
|
"""
|
|
|
|
def get_label_name(self, addr, pdr=None, optional=False):
|
|
if pdr is not None:
|
|
sym = pdr.lookup_sym(addr, EcoffSt.LABEL)
|
|
if sym is not None:
|
|
return sym.name
|
|
if not optional:
|
|
return f".L{addr:08X}"
|
|
else:
|
|
return None
|
|
|
|
def get_comment_string(self, start):
|
|
comment_section = self.elf_file.find_section_by_name(".comment")
|
|
end = comment_section.data.find(b'\0', start)
|
|
if end == -1:
|
|
return None, None
|
|
comment = comment_section.data[start:end].decode("ASCII")
|
|
return comment, end + 1
|
|
|
|
def print_end(self, vaddr, eof):
|
|
ends = eof.get(vaddr, None)
|
|
if ends is not None:
|
|
for sym in ends:
|
|
print(f" .type {sym.name}, @{'function' if sym.type == ST_FUNC else 'object'}")
|
|
if sym.st_size != 0:
|
|
print(f" .size {sym.name}, . - {sym.name}")
|
|
print(f" .end {sym.name}\n")
|
|
|
|
def disassemble_exec(self, section):
|
|
raw_insns = as_word_list(section.data)
|
|
insns = [decode_insn(raw, section.sh_addr + j * 4) for j,raw in enumerate(raw_insns)]
|
|
|
|
# enumerate branch labels
|
|
branch_labels = set()
|
|
|
|
for i,insn in enumerate(insns):
|
|
if insn.id in MIPS_BRANCH_INSNS or insn.id == MIPS_INS_J:
|
|
branch_labels.add(insn.target if insn.id == MIPS_INS_J else insn.offset)
|
|
|
|
eof = {} # vaddr : name
|
|
def add_end(vaddr, sym):
|
|
if vaddr not in eof:
|
|
eof[vaddr] = set()
|
|
eof[vaddr].add(sym)
|
|
|
|
cur_fdr = None
|
|
cur_pdr = None
|
|
for i,insn in enumerate(insns):
|
|
mnemonic = insn.mnemonic
|
|
op_str = insn.op_str
|
|
|
|
# Update mdebug info
|
|
src_inf = ""
|
|
if self.has_mdebug:
|
|
# Get new fdr if there is one
|
|
fdr = self.mdebug.fdr_foraddr(i * 4, extensions=('.c', '.s'))
|
|
if fdr is not None:
|
|
# debug_log(fdr.name)
|
|
cur_fdr = fdr
|
|
|
|
# Get new pdr if there is one
|
|
if cur_fdr is not None:
|
|
pdr = cur_fdr.pdr_foraddr(i * 4)
|
|
if pdr is not None:
|
|
# debug_log(pdr)
|
|
cur_pdr = pdr
|
|
|
|
# Line numbers
|
|
if cur_pdr is not None:
|
|
asm_line = i - cur_pdr.addr//4
|
|
if asm_line < len(cur_pdr.lines):
|
|
src_inf = f" {cur_pdr.lines[asm_line]:4}"
|
|
else:
|
|
src_inf = " PADDING"
|
|
|
|
# Symbols for this address
|
|
syms = section.get_sym(i * 4)
|
|
# if len(syms) != 0:
|
|
# debug_log("\n".join([str(sym) for sym in syms]))
|
|
|
|
# Print end
|
|
self.print_end(insn.vaddr, eof)
|
|
|
|
# Print symbol
|
|
for sym in syms:
|
|
if sym.name == "gcc2_compiled.":
|
|
print(f"// compiler generated")
|
|
if self.cur_file is None:
|
|
print(f".version \"01.01\"")
|
|
if self.advance_file():
|
|
print(f".file 1 \"{self.cur_file.name}\"")
|
|
|
|
comment_string = None
|
|
while comment_string != "\"GCC: (GNU) 2.7.2\"":
|
|
comment_string, self.comment_section_pos = self.get_comment_string(self.comment_section_pos)
|
|
if comment_string is None:
|
|
break
|
|
print(f".ident \"{comment_string}\"")
|
|
|
|
if sym.bind == SB_GLOBAL:
|
|
print(f"glabel {sym.name}")
|
|
else:
|
|
print(f"{sym.name}:")
|
|
|
|
if sym.st_size != 0:
|
|
print(f" .ent {sym.name}")
|
|
add_end(insn.vaddr + sym.st_size, sym)
|
|
else:
|
|
print(f" .type {sym.name}, @{'function' if sym.type == ST_FUNC else 'object'}\n")
|
|
|
|
# Print branch labels
|
|
lbl = self.get_label_name(insn.vaddr, pdr=cur_pdr, optional=not insn.vaddr in branch_labels)
|
|
if lbl is not None:
|
|
print(f"{lbl}:")
|
|
|
|
# Relocations for this address
|
|
rels = section.get_rel(i * 4)
|
|
assert len(rels) < 2 # There should never be more than 1 relocation for a single address, right?
|
|
# if len(rels) != 0:
|
|
# debug_log("\n".join([str(rel) for rel in rels]))
|
|
|
|
# Apply relocation
|
|
if len(rels) != 0:
|
|
rel = rels[0]
|
|
if rel.rel_type == R_MIPS_26:
|
|
if insn.id == MIPS_INS_JAL:
|
|
op_str = rel.relocated_symbol.name
|
|
if op_str == ".text" and cur_fdr is not None:
|
|
pdr = cur_fdr.pdr_foraddr(insn.target)
|
|
if pdr is not None:
|
|
op_str = pdr.name
|
|
elif insn.id != MIPS_INS_J: # Branch labels for j instructions are also R_MIPS_26 relocations
|
|
assert False , f"Got unexpected R_MIPS_26 relocation {insn.id}"
|
|
elif rel.rel_type == R_MIPS_HI16:
|
|
assert insn.id in [MIPS_INS_LUI]
|
|
rel_name = rel.relocated_symbol.name
|
|
if rel.relocated_symbol.type == ST_SECTION:
|
|
rel_name = f".{rel_name[1].upper()}_00000000"
|
|
if cur_fdr is not None:
|
|
pass
|
|
|
|
op_str = f"{insn.abi.gpr_names[insn.rt]}, %hi({rel_name})"
|
|
elif rel.rel_type == R_MIPS_LO16:
|
|
# Ideally this should be in the elf code so the relocations don't look identical
|
|
addend = insn.imm
|
|
rel_name = rel.relocated_symbol.name
|
|
if rel.relocated_symbol.type == ST_SECTION:
|
|
rel_name = f".{rel_name[1].upper()}_{addend:08X}"
|
|
self.add_section_local_label(rel.relocated_symbol.name, addend)
|
|
addend = 0
|
|
addend_str = f" + 0x{addend:X}" if addend != 0 else ""
|
|
|
|
if insn.id == MIPS_INS_ADDIU:
|
|
op_str = f"{insn.abi.gpr_names[insn.rt]}, {insn.abi.gpr_names[insn.rs]}, %lo({rel_name}{addend_str})"
|
|
elif insn.id in MIPS_LOAD_STORE_INSNS:
|
|
if insn.id in MIPS_FP_LOAD_STORE_INSNS:
|
|
op_str = f"{insn.abi.cop1_names[insn.ft]}, "
|
|
else:
|
|
op_str = f"{insn.abi.gpr_names[insn.rt]}, "
|
|
op_str += f"%lo({rel_name}{addend_str})({insn.abi.gpr_names[insn.base]})"
|
|
else:
|
|
assert False
|
|
else:
|
|
assert False
|
|
|
|
# Apply branch labels
|
|
if insn.id in MIPS_BRANCH_INSNS:
|
|
op_str_parts = []
|
|
for field in insn.fields:
|
|
if field == 'offset':
|
|
op_str_parts.append(self.get_label_name(insn.offset, cur_pdr))
|
|
else:
|
|
op_str_parts.append(insn.format_field(field))
|
|
op_str = ", ".join(op_str_parts)
|
|
elif insn.id == MIPS_INS_J:
|
|
op_str = self.get_label_name(insn.target, cur_pdr)
|
|
|
|
print(f"/* {section.sh_offset + i * 4:06X} {insn.vaddr:08X} {insn.raw:08X}{src_inf} */ {mnemonic:12}{op_str:35}".rstrip())
|
|
self.print_end(section.sh_addr + section.sh_size, eof)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Disassemble relocatable ELF object.")
|
|
parser.add_argument("filepath", help="path to the ELF file")
|
|
# TODO unimplemented optionals
|
|
parser.add_argument("--compiler", help="original compiler that produced the ELF (IDO or GCC, IDO default)", default="IDO")
|
|
parser.add_argument("--strenc", help="string encoding, default is EUC-JP for IDO and SJIS for GCC")
|
|
args = parser.parse_args()
|
|
|
|
elf_file = None
|
|
with open(args.filepath, "rb") as elf:
|
|
elf_file = ElfFile(bytearray(elf.read()))
|
|
|
|
disassembler = MipsDisasm(elf_file)
|
|
disassembler.disassemble_all_sections()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|