Files
Diddy-Kong-Racing/tools/python/m2ctx.py
Ryan Myers 34508976cd Match audioline_init (#497)
* Fix some AVOID_UB stuff, and some small documenting.

* Fix m2ctx.py and add NON_EQUIV func_80045C48

* Minor issue cleanups

* Clean up audioline_ambient_create match

* Match func_80008040 as audioline_init, fix the generate ctx script, and some small warning stuff.

* Minor work on func_80008174 so it can be NON_EQUIVALENT again

* redefine asm abs

* Hackishly fix the ctx file when generated.

* Use some more constants and got rid of some start: auto stuff which is not recommended in splat anymore for BSS

* Do some documenting, and slightly better matches using JFG source.

* Formatting

* Clean up some more audio stuff

* Further cleanup and naming of things
2025-04-05 16:00:13 -04:00

527 lines
21 KiB
Python

#!/usr/bin/env python3
import os, subprocess, sys, re, time
from datetime import date
from pathlib import Path
script_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = script_dir + "/../../"
src_dir = root_dir + "src/"
lib_dir = root_dir + "libultra/"
VERSION = "us.v77"
ignoreFiles = ["include/sys/regdef.h", "include/regdef.h", "src/hasm/collision.c", "src/hasm/math_util.c", "src/stacks.c"]
search_folders = ["include/", "src/"]
# Needed for StereoPanMode
includeFiles = ['libultra/src/audio/synstartvoiceparam.h']
hack_directives_into_singleline = ['DRAW_TABLE_ENTRY', 'DRAW_TABLE_GROUP']
# From: https://stackoverflow.com/a/18381470
# Removes all single line & multi-line comments from a C file.
def remove_comments(string):
pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|//[^\r\n]*$)"
# first group captures quoted strings (double or single)
# second group captures comments (//single-line or /* multi-line */)
regex = re.compile(pattern, re.MULTILINE|re.DOTALL)
def _replacer(match):
# if the 2nd group (capturing comments) is not None,
# it means we have captured a non-quoted (real) comment string.
if match.group(2) is not None:
return "" # so we will return empty to remove the comment
else: # otherwise, we will return the 1st group
return match.group(1) # captured quoted-string
return regex.sub(_replacer, string)
def remove_text_from_inside_braces(string):
stack = []
result = []
in_braces = False
for char in string:
if char == '{':
if in_braces:
stack.append(char)
else:
in_braces = True
stack = ['{'] # Start a new nested level
elif char == '}':
if len(stack) > 1:
stack.pop() # Pop from the stack if we're inside nested braces
else:
in_braces = False
stack.clear() # End of the outermost curly braces block
else:
if not in_braces:
result.append(char) # Only append characters outside of braces
return ''.join(result)
def regex_get_matches(text, regex):
return re.finditer(regex, text, re.MULTILINE)
# Cleans output to reduce filesize.
cleanupUselessDefinesRegex = r"#if.*\n#(?:else|define).*\n#endif.*\n"
def cleanup(out):
defs = regex_get_matches(out, cleanupUselessDefinesRegex)
for remove in defs:
out = out.replace(remove.group(0), '')
out = out.replace('\t', ' ') # Replace tabs with spaces.
out = re.sub(r"\n\n+", "\n", out)
out = re.sub(r" +", " ", out)
return out
# Removes trailing commas from enums, which create warnings on decomp.me
fixEnumsRegex = r"enum.*{((?:[^}]*\n)*.*)}"
def fix_enums(text):
matches = regex_get_matches(text, fixEnumsRegex)
try:
for matchNum, match in enumerate(matches, start=1):
groupOne = match.group(1)
if groupOne != None and groupOne.endswith(",\n"):
newGroup = groupOne[0:-2] + "\n"
text = text.replace(groupOne, newGroup)
except TypeError:
pass
return text
# Fixes up a string to make it look nice. Also removes some unnecessary stuff.
def cleanup_string(text):
return ' '.join(text.strip()
.replace('UNUSED', '')
.replace('extern', '')
.replace('__stdcall', '')
.replace('\t', ' ')
.replace('\n', ' ')
.split())
# Returns the arguments for a function in a nice clean way.
def get_cleaned_args(argsString):
args = cleanup_string(argsString).split(',')
cleanedArgs = []
for arg in args:
arg = cleanup_string(arg)
if '*' in arg:
arg = arg[0:arg.rfind('*')+1]
firstAsterisk = arg.find('*')
cleanedArgs.append(cleanup_string(arg[0:firstAsterisk] + ' ' + arg[firstAsterisk:]))
else:
if ' ' in arg:
cleanedArgs.append(' '.join(arg.split()[0:-1]))
else:
cleanedArgs.append(arg)
return ', '.join(cleanedArgs)
# Collects functions in a file from a given regex
def collect_func_from_regex(filename, filetext, regex, data):
defs = regex_get_matches(filetext, regex)
for match in defs:
typeAndName = cleanup_string(match.group(1)).split()
funcType = cleanup_string(' '.join(typeAndName[0:-1]))
if funcType == 'return' or funcType == "else" or funcType.startswith('INCONSISTENT'):
continue; # Skip false matches
funcName = cleanup_string(typeAndName[-1])
args = get_cleaned_args(match.group(2))
if funcName not in data:
data[funcName] = { "type": funcType, "args": args, "filename": cleanup_string(filename) }
else:
if data[funcName]["type"] != funcType or data[funcName]["args"] != args:
print("Function doesn't match for", funcName)
print("[Current] type:", data[funcName]["type"], "| args:", data[funcName]["args"], "| filename:", data[funcName]["filename"])
print("[This] type:", funcType, "| args:", args, "| filename:", filename)
regex_func_def = r"^(?!\s*if\s+)([ \t]*(?:[A-Za-z0-9_*])+[ \t]+(?:[A-Za-z0-9_* ])+)[(]((?:[^)]|\n)*?)[)]\s*[{]"
regex_func_proto = r"^([ \t]*(?:[A-Za-z0-9_*])+[ \t]+(?:[A-Za-z0-9_* ])+)[(]((?:[^)]|\n)*?)[)]\s*[;]"
# Collects both prototypes & definitions in a file.
def collect_function_prototypes(filename, filetext, data):
collect_func_from_regex(filename, remove_text_from_inside_braces(filetext), regex_func_proto, data) # First get Prototypes
collect_func_from_regex(filename, filetext, regex_func_def, data) # Then get definitions.
# Only used for single line typedef (not structs or enums)
typedefRegex_1 = r"typedef[\t ]+(([A-Za-z_0-9 ]+)[ \t])+[ *\t]*([A-Za-z_0-9]+)(?:[\[][^\]]*[\]])*[ \t]*;"
# Used for typedefs for function pointers.
typedefRegex_2 = r"typedef[\t ]+(([A-Za-z_0-9 ]+)[ \t])+[ *\t]*(?:[(][* ]*([^)]+)[)][ \t]*[(][* ]*([^)]*)[)][^;]*)[ \t]*;"
# Get argument types for function pointers.
def get_typedef_funcpointer_arg_types(text):
out = []
args = cleanup_string(text).split(",")
for arg in args:
arg = cleanup_string(arg.replace('*', '')) # Don't need pointers, just the name.
if ' ' in arg:
out.append(arg.split()[0])
else:
out.append(arg)
return out
# Get all the typedefs in the file (Not including structs, unions, or enums)
def collect_typedefs(filename, filetext, data):
defs = regex_get_matches(filetext, typedefRegex_1)
for match in defs:
newType = match.group(3)
if newType is None:
newType = match.group(2)
checkType = cleanup_string(match.group(1).replace('*','')) # Don't need pointers, just the name.
data[newType] = {
"kind": "typedef",
"checkType": checkType,
"value": match.group(0)
}
defs = regex_get_matches(filetext, typedefRegex_2)
for match in defs:
newType = match.group(3)
checkType = cleanup_string(match.group(1).replace('*','')) # Don't need pointers, just the name.
data[newType] = {
"kind": "typedef",
"checkType": checkType,
"checkArgs": get_typedef_funcpointer_arg_types(match.group(4)),
"value": match.group(0)
}
# This regex extracts the type from a member.
structMemTypeRegex = r"[ \t]*(?:/[*][^*]*?[*]/)?[ \t]*?((?:[A-Za-z0-9_]+[ *\t]+(?=[^{ ]))+)[ \t*]*(?:(?:(?:[A-Za-z0-9_*]+)(?:[ \t*]*[\[][^]]*[\]])?[ \t]*)|(?:[^\n;]*));"
# Get all the types that are in the struct
def get_struct_types(structText):
defs = regex_get_matches(structText, structMemTypeRegex)
types = []
for mem in defs:
memType = mem.group(1).replace('*','').strip() # Don't need pointers, just the name.
types.append(memType)
if memType.startswith("struct "):
types.append(memType[7:]) # Might as well include both as a precaution.
return types
# Gets a struct/union from a file.
def collect_struct(filetext, data, structName, startIndex, index):
nest = 1
while(True):
nextOpen = filetext.find('{', index)
nextClose = filetext.find('}', index)
if nextOpen == -1 and nextClose != -1:
nest -= 1
index = nextClose + 1
if nest == 0:
break
continue
if nextOpen < nextClose:
nest += 1
index = nextOpen + 1
else:
nest -= 1
index = nextClose + 1
if nest == 0:
break
endIndex = filetext.find('\n', index)
while(filetext[index] == ' ' or filetext[index] == '\t'):
index += 1
structTypeName = ''
while(filetext[index] != ';' or filetext[index] == ' ' or filetext[index] == '\t'):
structTypeName += filetext[index]
index += 1
structText = filetext[startIndex:endIndex].replace('\t', ' ')
if len(structTypeName) < 1:
#print('Structure', structName, 'is not typedef!')
data['struct ' + structName] = {
'kind': "struct",
'value': structText,
'types': get_struct_types(structText)
}
else:
data[structTypeName] = {
'kind': "struct",
'value': structText,
'types': get_struct_types(structText)
}
structRegex = r"^(?:typedef\s*)?(?:struct|union)\s*([^\s]*)?\s*{"
# Get all the structs & unions from a file
def collect_structs(filename, filetext, data):
defs = regex_get_matches(filetext, structRegex)
for st in defs:
collect_struct(filetext, data, st.group(1), st.start(0), st.end(0))
enumsRegex = r"^(?:typedef\s*)?(?:enum)\s*([^\s]*)?\s*{(?:[^}]|\n)*}\s*([^\s;]*)\s*;"
# Get all the enums from a file
def collect_enums(filename, filetext, data):
defs = regex_get_matches(filetext, enumsRegex)
for enum in defs:
try:
enumName = enum.group(2)
if len(enumName) < 1:
enumName = "enum " + enum.group(1)
except IndexError:
enumName = "enum " + enum.group(1)
if filename == "src/video.h":
if len(enumName) < 1:
print("Could not find an enum name for: ", enum.group(0))
exit()
data[enumName] = {
"kind": "enum",
"value": enum.group(0)
}
def should_directive_be_in_singleline_category(direct):
directText = direct.group(0)
for hackDir in hack_directives_into_singleline:
if directText.startswith('#define ' + hackDir):
return True
return direct.group(1) is None
definesRegex = r"(^#\s*define.*\\(?:\n.*\\)*\n.*$)|(^#(?!include)(?!undef).*$)"
# Get all the preprocessor directives in a file, except for #include and #undef
def collect_directives(filename, filetext, data):
defs = regex_get_matches(filetext, definesRegex)
for direct in defs:
if should_directive_be_in_singleline_category(direct):
data['singleline'].append(direct.group(0))
data['all'].append(direct.group(0))
# Return the end position of a string
def find_end(text, findStr):
return text.find(findStr) + len(findStr)
# Returns the lines that contain the symbols of a section
def get_map_section_text(mapText, sectionSearch):
start = find_end(mapText, sectionSearch)
if mapText.find("\n", start + 1) > mapText.find("(", start + 1):
return ""
start = mapText.find("\n", start + 1)
end = mapText.find("(", start)
end = mapText.rfind("\n", start, end)
return mapText[start:end]
symbolsSectionTextRegex = r"\s*[^\s]+\s*([A-Za-z0-9_]+)"
# Returns the symbols of a section in the map file
def get_symbols_from_section_text(sectionText):
out = []
defs = regex_get_matches(sectionText, symbolsSectionTextRegex)
for var in defs:
out.append(var.group(1))
return out
# Get all the variable symbols of a filename from the dkr.map file.
def get_variable_symbols_from_map(filename, mapText):
sectionSearch = "build/" + filename[:-2] + ".c.o"
bssSectionText = get_map_section_text(mapText, sectionSearch + "(.bss)")
dataSectionText = get_map_section_text(mapText, sectionSearch + "(.data)")
rodataSectionText = get_map_section_text(mapText, sectionSearch + "(.rodata)")
return {
"bss": get_symbols_from_section_text(bssSectionText),
"data": get_symbols_from_section_text(dataSectionText),
"rodata": get_symbols_from_section_text(rodataSectionText)
}
# Gets the variable and it's value from the file.
def get_variable_text(filename, filetext, symbol, isRodata):
match = re.search(r"[ *]" + symbol + "[^A-Za-z_0-9]", filetext) # Doing regex for every variable is slow, but is needed.
if match is None:
if not isRodata:
print("Warning: \"" + symbol + "\" is not defined in \"" + filename + "\"")
return ""
start = match.start()
start = filetext.rfind("\n", 0, start) + 1
end = filetext.find(";", start) + 1
return filetext[start:end]
# Gets all the variables in a file.
def collect_variables(filename, filetext, mapText, data):
if not filename.endswith('.c'): # Only want .c files.
return
variables = get_variable_symbols_from_map(filename, mapText)
for var in variables["bss"]:
data[var] = get_variable_text(filename, filetext, var, False)
for var in variables["data"]:
data[var] = get_variable_text(filename, filetext, var, False)
for var in variables["rodata"]:
data[var] = get_variable_text(filename, filetext, var, True)
# Loads the dkr.map file from the build folder.
def load_map():
try:
return open(root_dir + "/build/dkr." + VERSION + ".map", "r").read()
except FileNotFoundError:
print("Error: dkr.map could not be found. Please build the rom first, then run this script.")
exit()
# Get all the relevant data from the files.
def collect(filenames):
data = {
'functions': {},
'variables': {},
'types': {}, # typedefs, structs, unions, enums
'directives': {
'singleline': [],
'all': []
}
}
mapText = load_map()
filepaths = filenames.split('\n') + includeFiles
for filename in filepaths:
if (len(filename) < 1) or (not os.path.isfile(filename)) or (filename in ignoreFiles):
continue
filetext = open(filename, 'r').read()
filetext = remove_comments(filetext)
collect_directives(filename, filetext, data['directives']) # Gets directives (Except #include & #undef)
collect_typedefs(filename, filetext, data['types']) # Gets typedefs
collect_structs(filename, filetext, data['types']) # Gets structs/unions
collect_enums(filename, filetext, data['types']) # Gets enums
collect_variables(filename, filetext, mapText, data['variables']) # Get variables
collect_function_prototypes(filename, filetext, data['functions']) # Get function prototypes
return data
# Writes a typedef/struct/union/enum to the output file
def write_single_type(types, name, addedTypes, nameStack):
out = ''
if name in nameStack:
print("Circular reference! value:", name, "| nameStack:", nameStack)
return out
nameStack.append(name)
if name not in addedTypes:
kind = types[name]['kind']
if kind == "typedef":
checkType = types[name]['checkType']
if checkType in types:
out += write_single_type(types, checkType, addedTypes, nameStack)
if 'checkArgs' in types[name]:
checkArgs = types[name]['checkArgs']
for argType in checkArgs:
if argType in types:
out += write_single_type(types, argType, addedTypes, nameStack)
out += types[name]['value'] + "\n"
elif kind == "struct":
structTypes = types[name]['types']
for memType in structTypes:
if memType in types and memType not in nameStack:
if memType != "Object" and memType != "Particle": # This is a hack, due to circular referencing making things difficult. :/
out += write_single_type(types, memType, addedTypes, nameStack)
out += types[name]['value'] + "\n"
elif kind == "enum":
out += types[name]['value'] + "\n"
addedTypes[name] = 1
nameStack.pop()
return out
# Writes all the typedef/struct/union/enum to the output file
def write_output_types(types):
out = ''
out += 'struct Object;\n' # This is a hack, due to circular referencing making things difficult. :/
addedTypes = {}
for name in types.keys():
nameStack = []
out += write_single_type(types, name, addedTypes, nameStack)
return out + '\n'
# Writes all the function prototypes to the output file
def write_output_functions(funcs):
out = ''
for name in funcs.keys():
out += funcs[name]['type'] + ' ' + name + '(' + funcs[name]['args'] + ');\n'
return out + '\n'
# Writes all the preprocessor directives to the output file
def write_output_directives(directives):
directives['singleline'].append('#define NULL 0\n') # Hack, since cleanup() will remove the original NULL definition.
return [ '\n'.join(directives['singleline']) + '\n', '\n'.join(directives['all']) + '\n' ]
# Writes a variable to the output file
def write_output_variable(key, variables, varKeys, doneKeys):
if key in doneKeys:
return ""
out = ""
text = variables[key]
for key2 in varKeys:
if len(variables[key2]) < 1:
continue
if key == key2:
continue
if key2 in text:
out += write_output_variable(key2, variables, varKeys, doneKeys)
out += text.replace('UNUSED', '') + '\n'
doneKeys[key] = 1
return out
# Writes all the variables to the output file
def write_output_variables(variables):
out = ''
doneKeys = {}
varKeys = variables.keys()
for key in varKeys:
if len(variables[key]) < 1:
continue
out += write_output_variable(key, variables, varKeys, doneKeys)
return out + '\n'
# Preprocess everything, since mips2c doesn't like the preprocessor.
def preprocess_all(directivesText):
open("__temp.c", "w").write(directivesText)
cpp_command = ["gcc", "-E", "-P", "-undef", "-DBUILD_VERSION=4", "-DVERSION_us_v77", "-D_LANGUAGE_C", "-D__sgi", "-DNON_MATCHING", "-D_Static_assert(x, y)=", "-D__attribute__(x)=", "__temp.c"]
out = subprocess.check_output(cpp_command, cwd=root_dir, encoding="utf-8")
os.remove("__temp.c")
return out
# Preprocess everything but the #defines.
def preprocess_directives(directivesText):
open("__temp.c", "w").write(directivesText)
cpp_command = ["gcc", "-E", "-P", "-fdirectives-only", "-undef", "-DBUILD_VERSION=4", "-DVERSION_us_v77", "-D_LANGUAGE_C", "-D__sgi", "-DNON_MATCHING", "-D_Static_assert(x, y)=", "-D__attribute__(x)=", "__temp.c"]
out = subprocess.check_output(cpp_command, cwd=root_dir, encoding="utf-8")
os.remove("__temp.c")
return out
# Creates the output file text.
def write_output(data):
header = '/*** DKR decomp context file (Automatically generated by m2ctx.py [' + date.today().strftime("%Y/%m/%d") + ']) ***/\n\n'
out = ''
directives = write_output_directives(data['directives'])
out += directives[0] # Only add single-line directives at first for preprocessing.
out += write_output_types(data['types'])
out += 's32 osTvType;\n' # Manually add this, since It doesn't seem to get added automatically.
out += write_output_variables(data['variables'])
out += write_output_functions(data['functions'])
out = fix_enums(out) # Fixes trailing commas in enums, which gets rid of a warning.
out = cleanup(out) # Removes useless stuff
out = preprocess_all(out) # Preprocesses everything, removing all the directives.
out = preprocess_directives(directives[1]) + out # Add the defines back in.
out = out.replace('\nOSPifRam ;', '') # Manual hack to remove this if it exists.
return header + out
# Uses the `find` program to return the filenames in the specified folders.
def find_files():
find_command = ["find"] + search_folders + ["-type", "f", "-name", "*.[ch]"]
try:
return subprocess.check_output(find_command, cwd=root_dir, encoding="utf-8")
except subprocess.CalledProcessError:
print(
"Failed to preprocess input file, when running command:\n"
+ cpp_command,
file=sys.stderr,
)
sys.exit(1)
def main():
print("Generating context file...")
data = collect(find_files())
with open(os.path.join(root_dir, "ctx.c"), "w", encoding="UTF-8") as f:
f.write(write_output(data))
print("Done! Generated as ctx.c")
if __name__ == "__main__":
main()