You've already forked Diddy-Kong-Racing
mirror of
https://github.com/izzy2lost/Diddy-Kong-Racing.git
synced 2026-03-10 11:36:09 -07:00
The data being worked off of for these sections is less developed and much more noisy than the .data section, so in its current condition, there will be comparatively more mistakes in the output for them.
142 lines
5.3 KiB
Python
142 lines
5.3 KiB
Python
import os
|
|
import re
|
|
from bisect import bisect
|
|
from collections import OrderedDict
|
|
|
|
from file_util import FileUtil
|
|
|
|
DATA_FILE_PATH = 'data/dkr.data.s'
|
|
GLABEL_REGEX = r'D_[0-9A-F]{8}'
|
|
GLABEL_DEF_REGEX = r'glabel (%s)' % GLABEL_REGEX
|
|
RODATA_START = 'D_800E49DC' # i.e. the end of .data
|
|
BSS_START = 'D_800E98D0' # i.e. the end of .rodata
|
|
# List of labels that are not used in the file they are defined in.
|
|
# This throws off the splitter algorithm, so the troublesome ones
|
|
# must be individually blacklisted for now.
|
|
IGNORE_GLABELS = ['D_800E0001', 'D_800E63E0', 'D_800E94D0']
|
|
|
|
def _rom_offset(vaddr):
|
|
"""
|
|
Returns the ROM offset of the corresponding virtual address given.
|
|
Parameters:
|
|
vaddr: can be a string or integer. If string, it is assumed to be in
|
|
hex.
|
|
"""
|
|
if type(vaddr) == str:
|
|
vaddr = int(vaddr, 16)
|
|
return vaddr - 0x7FFFF400
|
|
|
|
def _get_glabels():
|
|
"""
|
|
Returns all the glabel definitions in the data file, split into .data,
|
|
.rodata, and .bss.
|
|
"""
|
|
data_file = FileUtil.get_text_from_file(DATA_FILE_PATH)
|
|
glabels = re.findall(GLABEL_DEF_REGEX, data_file)
|
|
glabels = [glabel for glabel in glabels if glabel not in IGNORE_GLABELS]
|
|
rodata_idx = glabels.index(RODATA_START)
|
|
bss_idx = glabels.index(BSS_START)
|
|
return glabels[:rodata_idx], glabels[rodata_idx:bss_idx], glabels[bss_idx:]
|
|
|
|
def _get_file_offset(file, contents):
|
|
"""
|
|
Returns the ROM offset of the given file. Throws exception upon error.
|
|
Parameters:
|
|
file: filename. Must be a .c or .s file.
|
|
contents: the contents of file.
|
|
"""
|
|
if file.endswith('.c'):
|
|
return _rom_offset(re.search('/\* RAM_POS: 0x([0-9A-F]{8}) \*/', contents)[1])
|
|
elif file.endswith('.s'):
|
|
return int(re.search('/\* ([0-9A-F]{6}) [0-9A-F]{8} [0-9A-F]{8} \*/', contents)[1], 16)
|
|
else:
|
|
raise exception('cannot find offset for file ' + file)
|
|
|
|
def _log_glabel_usage(glabels):
|
|
"""
|
|
Returns:
|
|
usage: A sorted map from glabel names to a sorted list of all the ROM
|
|
addresses it is accessed from.
|
|
c_file_offsets: A list of (filename, ROM offset) tuples from all the c
|
|
files used.
|
|
Parameters:
|
|
glabels: output from _get_glabels.
|
|
"""
|
|
usage = OrderedDict([(glabel, set()) for glabel in glabels])
|
|
files = FileUtil.get_filenames_from_directory_recursive('.', ('.c', '.s'))
|
|
c_file_offsets = []
|
|
for file in files:
|
|
contents = FileUtil.get_text_from_file(file)
|
|
try:
|
|
offset = _get_file_offset(file, contents)
|
|
if file.endswith('.c'):
|
|
c_file_offsets.append((file, offset))
|
|
matches = re.findall(GLABEL_REGEX, contents)
|
|
for glabel in matches:
|
|
if glabel in usage:
|
|
usage[glabel].add(offset)
|
|
except:
|
|
pass
|
|
for glabel in usage:
|
|
usage[glabel] = sorted(list(usage[glabel]))
|
|
c_file_offsets.sort(key=lambda f: f[1])
|
|
return usage, c_file_offsets
|
|
|
|
def _filter_glabel_usage(glabel_usage):
|
|
"""
|
|
Returns a sorted (by ROM offset) list of (glabel name, ROM offset), where
|
|
the ROM offset is the estimated location the glabel is defined at. Note
|
|
that this is an estimate; the algorithm used is greedy and may
|
|
overpredict.
|
|
Parameters:
|
|
glabel_usage: output from _log_glabel_usage.
|
|
"""
|
|
filtered_usage = []
|
|
cur_offset = min(glabel_usage[next(iter(glabel_usage))])
|
|
for glabel in glabel_usage:
|
|
usage = glabel_usage[glabel]
|
|
valid_offsets = usage[bisect(usage, cur_offset):]
|
|
if len(valid_offsets) > 0:
|
|
cur_offset = valid_offsets[0]
|
|
filtered_usage.append((glabel, cur_offset))
|
|
return filtered_usage
|
|
|
|
def _split_glabel_files(glabel_usage, c_file_offsets):
|
|
"""
|
|
Returns a sorted (by file offset) list of (file name, file offset, glabel name)
|
|
for every file, where glabel name is the name of the first glabel that
|
|
lives within the ROM address domain of the corresponding file.
|
|
Parameters:
|
|
glabel_usage: output from _filter_glabel_usage.
|
|
c_file_offsets: output from _log_glabel_usage.
|
|
"""
|
|
file_splits = []
|
|
glabel_idx = 0
|
|
for i in range(len(c_file_offsets)):
|
|
file = c_file_offsets[i]
|
|
while glabel_idx < len(glabel_usage) and glabel_usage[glabel_idx][1] < file[1]:
|
|
glabel_idx += 1
|
|
if glabel_idx < len(glabel_usage) and i < len(c_file_offsets) - 1:
|
|
glabel = glabel_usage[glabel_idx]
|
|
glabel_name = glabel[0] if glabel[1] < c_file_offsets[i + 1][1] else None
|
|
else:
|
|
glabel_name = None
|
|
file_splits.append((file[0], file[1], glabel_name))
|
|
return file_splits
|
|
|
|
def main():
|
|
FileUtil.set_working_dir_to_project_base()
|
|
data_glabels, rodata_glabels, bss_glabels = _get_glabels()
|
|
for section in [('.data', data_glabels), ('.rodata', rodata_glabels), ('.bss', bss_glabels)]:
|
|
glabels = section[1]
|
|
usage, c_file_offsets = _log_glabel_usage(glabels)
|
|
filtered_usage = _filter_glabel_usage(usage)
|
|
file_splits = _split_glabel_files(filtered_usage, c_file_offsets)
|
|
print('File splits for %s:' % section[0])
|
|
for split in file_splits:
|
|
print('%s (%06X): %s' % split)
|
|
print()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|