graph.py - parse pokecrystal into a function graph for d3.js

This commit is contained in:
Bryan Bishop 2012-06-21 03:37:13 -05:00
parent e471fbeb32
commit 3359121ba7
2 changed files with 163 additions and 5 deletions

143
extras/graph.py Normal file
View File

@ -0,0 +1,143 @@
#!/usr/bin/python
# author: Bryan Bishop <kanzure@gmail.com>
# date: 2012-06-20
import networkx as nx
from romstr import RomStr, DisAsm, \
relative_jumps, call_commands, \
relative_unconditional_jumps
class RomGraph(nx.DiGraph):
""" Graphs various functions pointing to each other.
TODO: Bank switches are nasty. They should be detected. Otherwise,
functions will point to non-functions within the same bank. Another way
to detect bankswitches is retroactively. By disassembling one function
after another within the function banks, it can be roughly assumed that
anything pointing to something else (within the same bank) is really
actually a bankswitch. An even better method to handle bankswitches
would be to just detect those situations in the asm (but I presently
forget how bankswitches are performed in pokecrystal).
"""
# some areas shouldn't be parsed as asm
exclusions = []
# where is the first function located?
start_address = 0x150
# and where is a good place to stop?
end_address = 0x4000 * 0x01 # only do the first bank? sure..
# where is the rom stored?
rompath = "../baserom.gbc"
def __init__(self, rom=None, **kwargs):
""" Loads and parses the ROM into a function graph.
"""
# continue the initialization
nx.DiGraph.__init__(self, **kwargs)
# load the graph
if rom == None:
self.load_rom()
else:
self.rom = rom
# start parsing the ROM
self.parse()
def load_rom(self):
""" Creates a RomStr from rompath.
"""
file_handler = open(self.rompath, "r")
self.rom = RomStr(file_handler.read())
file_handler.close()
def parse(self):
""" Parses the ROM starting with the first function address. Each
function is disassembled and parsed to find where else it leads to.
"""
functions = {}
address = self.start_address
other_addresses = set()
count = 0
while True:
if count > 100:
break
if address < self.end_address and address not in functions.keys():
# address is okay to parse at, keep going
pass
elif len(other_addresses) > 0:
# parse some other address possibly in a remote bank
address = other_addresses.pop()
else:
# no more addresses detected- exit loop
break
# parse the asm
func = self.rom.to_asm(address)
# store this parsed function
functions[address] = func
# where does this function jump to?
used_addresses = set(func.used_addresses())
# add this information to the graph
for used_address in used_addresses:
# only add this remote address if it's not yet parsed
if used_address not in functions.keys():
other_addresses.update([used_address])
# add this other address to the graph
self.add_node(used_address)
# add this as an edge between the two nodes
self.add_edge(address, used_address)
# setup the next function to be parsed
address = func.last_address
count += 1
self.functions = functions
def pretty_printer(self):
""" Shows some text output describing which nodes point to which other
nodes.
"""
print self.edges()
def to_d3(self):
""" Exports to d3.js because we're gangster like that.
"""
import networkx.readwrite.json_graph as json_graph
content = json_graph.dumps(self)
fh = open("graphs.json", "w")
fh.write(content)
fh.close()
class RedGraph(RomGraph):
""" Not implemented. Go away.
"""
rompath = "../pokered-baserom.gbc"
class CryGraph(RomGraph):
exclusions = [
[0x000, 0x149],
]
rompath = "../baserom.gbc"
if __name__ == "__main__":
crygraph = CryGraph()
crygraph.pretty_printer()
crygraph.to_d3()

View File

@ -13,7 +13,7 @@ end_08_scripts_with = [
] # possibly also:
# 0xc3, # jp
# 0xc18, # jr
# 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0xc3, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8,
# 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8,
# 0xd0, 0xc0, 0xc8, 0xc9
spacing = "\t"
@ -124,7 +124,7 @@ class RomStr(str):
that will be parsed, so that large patches of data aren't parsed as
code.
"""
if "0x" in address:
if type(address) == str and "0x" in address:
address = int(address, 16)
start_address = address
@ -302,6 +302,7 @@ class DisAsm:
opstr2 = base_opstr[:base_opstr.find("x")].lower() + insertion + base_opstr[base_opstr.find("x")+1:].lower()
asm_command["formatted_with_labels"] = opstr2
asm_command["target_address"] = target_address
current_byte_number += 1
offset += 1
@ -331,6 +332,7 @@ class DisAsm:
opstr2 = base_opstr[:base_opstr.find("?")].lower() + insertion + base_opstr[base_opstr.find("?")+1:].lower()
asm_command["formatted_with_labels"] = opstr2
asm_command["target_address"] = target_address
current_byte_number += 2
offset += 2
@ -423,19 +425,32 @@ class DisAsm:
offset += 1
# also save the last command if necessary
if asm_commands[asm_commands.keys()[-1]] is not asm_command:
if len(asm_commands.keys()) > 0 and asm_commands[asm_commands.keys()[-1]] is not asm_command:
asm_commands[asm_command["address"]] = asm_command
# store the set of commands on this object
self.asm_commands = asm_commands
self.end_address = offset + 1
self.last_address = self.end_address
def has_outstanding_labels(self, asm_commands, offset):
""" Checks if there are any labels that haven't yet been created.
""" # is this really necessary??
return False
def used_addresses(self):
""" Returns a list of unique addresses that this function will probably
call.
"""
addresses = set()
for (id, command) in self.asm_commands.items():
if command.has_key("target_address"):
addresses.add(command["target_address"])
return addresses
def __str__(self):
""" ASM pretty printer.
"""