Files
Diddy-Kong-Racing/tools/python/split_data_regions.py
Antonio Castelli 72fe8f27bf Added .rodata and .bss sections to data region splitter tool.
The data being worked off of for these sections is less developed and much
more noisy than the .data section, so in its current condition, there will
be comparatively more mistakes in the output for them.
2021-05-26 00:05:17 -07:00

142 lines
5.3 KiB
Python

import os
import re
from bisect import bisect
from collections import OrderedDict
from file_util import FileUtil
DATA_FILE_PATH = 'data/dkr.data.s'
GLABEL_REGEX = r'D_[0-9A-F]{8}'
GLABEL_DEF_REGEX = r'glabel (%s)' % GLABEL_REGEX
RODATA_START = 'D_800E49DC' # i.e. the end of .data
BSS_START = 'D_800E98D0' # i.e. the end of .rodata
# List of labels that are not used in the file they are defined in.
# This throws off the splitter algorithm, so the troublesome ones
# must be individually blacklisted for now.
IGNORE_GLABELS = ['D_800E0001', 'D_800E63E0', 'D_800E94D0']
def _rom_offset(vaddr):
"""
Returns the ROM offset of the corresponding virtual address given.
Parameters:
vaddr: can be a string or integer. If string, it is assumed to be in
hex.
"""
if type(vaddr) == str:
vaddr = int(vaddr, 16)
return vaddr - 0x7FFFF400
def _get_glabels():
"""
Returns all the glabel definitions in the data file, split into .data,
.rodata, and .bss.
"""
data_file = FileUtil.get_text_from_file(DATA_FILE_PATH)
glabels = re.findall(GLABEL_DEF_REGEX, data_file)
glabels = [glabel for glabel in glabels if glabel not in IGNORE_GLABELS]
rodata_idx = glabels.index(RODATA_START)
bss_idx = glabels.index(BSS_START)
return glabels[:rodata_idx], glabels[rodata_idx:bss_idx], glabels[bss_idx:]
def _get_file_offset(file, contents):
"""
Returns the ROM offset of the given file. Throws exception upon error.
Parameters:
file: filename. Must be a .c or .s file.
contents: the contents of file.
"""
if file.endswith('.c'):
return _rom_offset(re.search('/\* RAM_POS: 0x([0-9A-F]{8}) \*/', contents)[1])
elif file.endswith('.s'):
return int(re.search('/\* ([0-9A-F]{6}) [0-9A-F]{8} [0-9A-F]{8} \*/', contents)[1], 16)
else:
raise exception('cannot find offset for file ' + file)
def _log_glabel_usage(glabels):
"""
Returns:
usage: A sorted map from glabel names to a sorted list of all the ROM
addresses it is accessed from.
c_file_offsets: A list of (filename, ROM offset) tuples from all the c
files used.
Parameters:
glabels: output from _get_glabels.
"""
usage = OrderedDict([(glabel, set()) for glabel in glabels])
files = FileUtil.get_filenames_from_directory_recursive('.', ('.c', '.s'))
c_file_offsets = []
for file in files:
contents = FileUtil.get_text_from_file(file)
try:
offset = _get_file_offset(file, contents)
if file.endswith('.c'):
c_file_offsets.append((file, offset))
matches = re.findall(GLABEL_REGEX, contents)
for glabel in matches:
if glabel in usage:
usage[glabel].add(offset)
except:
pass
for glabel in usage:
usage[glabel] = sorted(list(usage[glabel]))
c_file_offsets.sort(key=lambda f: f[1])
return usage, c_file_offsets
def _filter_glabel_usage(glabel_usage):
"""
Returns a sorted (by ROM offset) list of (glabel name, ROM offset), where
the ROM offset is the estimated location the glabel is defined at. Note
that this is an estimate; the algorithm used is greedy and may
overpredict.
Parameters:
glabel_usage: output from _log_glabel_usage.
"""
filtered_usage = []
cur_offset = min(glabel_usage[next(iter(glabel_usage))])
for glabel in glabel_usage:
usage = glabel_usage[glabel]
valid_offsets = usage[bisect(usage, cur_offset):]
if len(valid_offsets) > 0:
cur_offset = valid_offsets[0]
filtered_usage.append((glabel, cur_offset))
return filtered_usage
def _split_glabel_files(glabel_usage, c_file_offsets):
"""
Returns a sorted (by file offset) list of (file name, file offset, glabel name)
for every file, where glabel name is the name of the first glabel that
lives within the ROM address domain of the corresponding file.
Parameters:
glabel_usage: output from _filter_glabel_usage.
c_file_offsets: output from _log_glabel_usage.
"""
file_splits = []
glabel_idx = 0
for i in range(len(c_file_offsets)):
file = c_file_offsets[i]
while glabel_idx < len(glabel_usage) and glabel_usage[glabel_idx][1] < file[1]:
glabel_idx += 1
if glabel_idx < len(glabel_usage) and i < len(c_file_offsets) - 1:
glabel = glabel_usage[glabel_idx]
glabel_name = glabel[0] if glabel[1] < c_file_offsets[i + 1][1] else None
else:
glabel_name = None
file_splits.append((file[0], file[1], glabel_name))
return file_splits
def main():
FileUtil.set_working_dir_to_project_base()
data_glabels, rodata_glabels, bss_glabels = _get_glabels()
for section in [('.data', data_glabels), ('.rodata', rodata_glabels), ('.bss', bss_glabels)]:
glabels = section[1]
usage, c_file_offsets = _log_glabel_usage(glabels)
filtered_usage = _filter_glabel_usage(usage)
file_splits = _split_glabel_files(filtered_usage, c_file_offsets)
print('File splits for %s:' % section[0])
for split in file_splits:
print('%s (%06X): %s' % split)
print()
if __name__ == '__main__':
main()