#!/usr/bin/env python3 import os, subprocess, sys, re, time from datetime import date from pathlib import Path script_dir = os.path.dirname(os.path.realpath(__file__)) root_dir = script_dir + "/../../" src_dir = root_dir + "src/" lib_dir = root_dir + "libultra/" VERSION = "us.v77" ignoreFiles = ["include/sys/regdef.h", "include/regdef.h", "src/hasm/collision.c", "src/hasm/math_util.c", "src/stacks.c"] search_folders = ["include/", "src/"] # Needed for StereoPanMode includeFiles = ['libultra/src/audio/synstartvoiceparam.h'] hack_directives_into_singleline = ['DRAW_TABLE_ENTRY', 'DRAW_TABLE_GROUP'] # From: https://stackoverflow.com/a/18381470 # Removes all single line & multi-line comments from a C file. def remove_comments(string): pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|//[^\r\n]*$)" # first group captures quoted strings (double or single) # second group captures comments (//single-line or /* multi-line */) regex = re.compile(pattern, re.MULTILINE|re.DOTALL) def _replacer(match): # if the 2nd group (capturing comments) is not None, # it means we have captured a non-quoted (real) comment string. if match.group(2) is not None: return "" # so we will return empty to remove the comment else: # otherwise, we will return the 1st group return match.group(1) # captured quoted-string return regex.sub(_replacer, string) def remove_text_from_inside_braces(string): stack = [] result = [] in_braces = False for char in string: if char == '{': if in_braces: stack.append(char) else: in_braces = True stack = ['{'] # Start a new nested level elif char == '}': if len(stack) > 1: stack.pop() # Pop from the stack if we're inside nested braces else: in_braces = False stack.clear() # End of the outermost curly braces block else: if not in_braces: result.append(char) # Only append characters outside of braces return ''.join(result) def regex_get_matches(text, regex): return re.finditer(regex, text, re.MULTILINE) # Cleans output to reduce filesize. cleanupUselessDefinesRegex = r"#if.*\n#(?:else|define).*\n#endif.*\n" def cleanup(out): defs = regex_get_matches(out, cleanupUselessDefinesRegex) for remove in defs: out = out.replace(remove.group(0), '') out = out.replace('\t', ' ') # Replace tabs with spaces. out = re.sub(r"\n\n+", "\n", out) out = re.sub(r" +", " ", out) return out # Removes trailing commas from enums, which create warnings on decomp.me fixEnumsRegex = r"enum.*{((?:[^}]*\n)*.*)}" def fix_enums(text): matches = regex_get_matches(text, fixEnumsRegex) try: for matchNum, match in enumerate(matches, start=1): groupOne = match.group(1) if groupOne != None and groupOne.endswith(",\n"): newGroup = groupOne[0:-2] + "\n" text = text.replace(groupOne, newGroup) except TypeError: pass return text # Fixes up a string to make it look nice. Also removes some unnecessary stuff. def cleanup_string(text): return ' '.join(text.strip() .replace('UNUSED', '') .replace('extern', '') .replace('__stdcall', '') .replace('\t', ' ') .replace('\n', ' ') .split()) # Returns the arguments for a function in a nice clean way. def get_cleaned_args(argsString): args = cleanup_string(argsString).split(',') cleanedArgs = [] for arg in args: arg = cleanup_string(arg) if '*' in arg: arg = arg[0:arg.rfind('*')+1] firstAsterisk = arg.find('*') cleanedArgs.append(cleanup_string(arg[0:firstAsterisk] + ' ' + arg[firstAsterisk:])) else: if ' ' in arg: cleanedArgs.append(' '.join(arg.split()[0:-1])) else: cleanedArgs.append(arg) return ', '.join(cleanedArgs) # Collects functions in a file from a given regex def collect_func_from_regex(filename, filetext, regex, data): defs = regex_get_matches(filetext, regex) for match in defs: typeAndName = cleanup_string(match.group(1)).split() funcType = cleanup_string(' '.join(typeAndName[0:-1])) if funcType == 'return' or funcType == "else" or funcType.startswith('INCONSISTENT'): continue; # Skip false matches funcName = cleanup_string(typeAndName[-1]) args = get_cleaned_args(match.group(2)) if funcName not in data: data[funcName] = { "type": funcType, "args": args, "filename": cleanup_string(filename) } else: if data[funcName]["type"] != funcType or data[funcName]["args"] != args: print("Function doesn't match for", funcName) print("[Current] type:", data[funcName]["type"], "| args:", data[funcName]["args"], "| filename:", data[funcName]["filename"]) print("[This] type:", funcType, "| args:", args, "| filename:", filename) regex_func_def = r"^(?!\s*if\s+)([ \t]*(?:[A-Za-z0-9_*])+[ \t]+(?:[A-Za-z0-9_* ])+)[(]((?:[^)]|\n)*?)[)]\s*[{]" regex_func_proto = r"^([ \t]*(?:[A-Za-z0-9_*])+[ \t]+(?:[A-Za-z0-9_* ])+)[(]((?:[^)]|\n)*?)[)]\s*[;]" # Collects both prototypes & definitions in a file. def collect_function_prototypes(filename, filetext, data): collect_func_from_regex(filename, remove_text_from_inside_braces(filetext), regex_func_proto, data) # First get Prototypes collect_func_from_regex(filename, filetext, regex_func_def, data) # Then get definitions. # Only used for single line typedef (not structs or enums) typedefRegex_1 = r"typedef[\t ]+(([A-Za-z_0-9 ]+)[ \t])+[ *\t]*([A-Za-z_0-9]+)(?:[\[][^\]]*[\]])*[ \t]*;" # Used for typedefs for function pointers. typedefRegex_2 = r"typedef[\t ]+(([A-Za-z_0-9 ]+)[ \t])+[ *\t]*(?:[(][* ]*([^)]+)[)][ \t]*[(][* ]*([^)]*)[)][^;]*)[ \t]*;" # Get argument types for function pointers. def get_typedef_funcpointer_arg_types(text): out = [] args = cleanup_string(text).split(",") for arg in args: arg = cleanup_string(arg.replace('*', '')) # Don't need pointers, just the name. if ' ' in arg: out.append(arg.split()[0]) else: out.append(arg) return out # Get all the typedefs in the file (Not including structs, unions, or enums) def collect_typedefs(filename, filetext, data): defs = regex_get_matches(filetext, typedefRegex_1) for match in defs: newType = match.group(3) if newType is None: newType = match.group(2) checkType = cleanup_string(match.group(1).replace('*','')) # Don't need pointers, just the name. data[newType] = { "kind": "typedef", "checkType": checkType, "value": match.group(0) } defs = regex_get_matches(filetext, typedefRegex_2) for match in defs: newType = match.group(3) checkType = cleanup_string(match.group(1).replace('*','')) # Don't need pointers, just the name. data[newType] = { "kind": "typedef", "checkType": checkType, "checkArgs": get_typedef_funcpointer_arg_types(match.group(4)), "value": match.group(0) } # This regex extracts the type from a member. structMemTypeRegex = r"[ \t]*(?:/[*][^*]*?[*]/)?[ \t]*?((?:[A-Za-z0-9_]+[ *\t]+(?=[^{ ]))+)[ \t*]*(?:(?:(?:[A-Za-z0-9_*]+)(?:[ \t*]*[\[][^]]*[\]])?[ \t]*)|(?:[^\n;]*));" # Get all the types that are in the struct def get_struct_types(structText): defs = regex_get_matches(structText, structMemTypeRegex) types = [] for mem in defs: memType = mem.group(1).replace('*','').strip() # Don't need pointers, just the name. types.append(memType) if memType.startswith("struct "): types.append(memType[7:]) # Might as well include both as a precaution. return types # Gets a struct/union from a file. def collect_struct(filetext, data, structName, startIndex, index): nest = 1 while(True): nextOpen = filetext.find('{', index) nextClose = filetext.find('}', index) if nextOpen == -1 and nextClose != -1: nest -= 1 index = nextClose + 1 if nest == 0: break continue if nextOpen < nextClose: nest += 1 index = nextOpen + 1 else: nest -= 1 index = nextClose + 1 if nest == 0: break endIndex = filetext.find('\n', index) while(filetext[index] == ' ' or filetext[index] == '\t'): index += 1 structTypeName = '' while(filetext[index] != ';' or filetext[index] == ' ' or filetext[index] == '\t'): structTypeName += filetext[index] index += 1 structText = filetext[startIndex:endIndex].replace('\t', ' ') if len(structTypeName) < 1: #print('Structure', structName, 'is not typedef!') data['struct ' + structName] = { 'kind': "struct", 'value': structText, 'types': get_struct_types(structText) } else: data[structTypeName] = { 'kind': "struct", 'value': structText, 'types': get_struct_types(structText) } structRegex = r"^(?:typedef\s*)?(?:struct|union)\s*([^\s]*)?\s*{" # Get all the structs & unions from a file def collect_structs(filename, filetext, data): defs = regex_get_matches(filetext, structRegex) for st in defs: collect_struct(filetext, data, st.group(1), st.start(0), st.end(0)) enumsRegex = r"^(?:typedef\s*)?(?:enum)\s*([^\s]*)?\s*{(?:[^}]|\n)*}\s*([^\s;]*)\s*;" # Get all the enums from a file def collect_enums(filename, filetext, data): defs = regex_get_matches(filetext, enumsRegex) for enum in defs: try: enumName = enum.group(2) if len(enumName) < 1: enumName = "enum " + enum.group(1) except IndexError: enumName = "enum " + enum.group(1) if filename == "src/video.h": if len(enumName) < 1: print("Could not find an enum name for: ", enum.group(0)) exit() data[enumName] = { "kind": "enum", "value": enum.group(0) } def should_directive_be_in_singleline_category(direct): directText = direct.group(0) for hackDir in hack_directives_into_singleline: if directText.startswith('#define ' + hackDir): return True return direct.group(1) is None definesRegex = r"(^#\s*define.*\\(?:\n.*\\)*\n.*$)|(^#(?!include)(?!undef).*$)" # Get all the preprocessor directives in a file, except for #include and #undef def collect_directives(filename, filetext, data): defs = regex_get_matches(filetext, definesRegex) for direct in defs: if should_directive_be_in_singleline_category(direct): data['singleline'].append(direct.group(0)) data['all'].append(direct.group(0)) # Return the end position of a string def find_end(text, findStr): return text.find(findStr) + len(findStr) # Returns the lines that contain the symbols of a section def get_map_section_text(mapText, sectionSearch): start = find_end(mapText, sectionSearch) if mapText.find("\n", start + 1) > mapText.find("(", start + 1): return "" start = mapText.find("\n", start + 1) end = mapText.find("(", start) end = mapText.rfind("\n", start, end) return mapText[start:end] symbolsSectionTextRegex = r"\s*[^\s]+\s*([A-Za-z0-9_]+)" # Returns the symbols of a section in the map file def get_symbols_from_section_text(sectionText): out = [] defs = regex_get_matches(sectionText, symbolsSectionTextRegex) for var in defs: out.append(var.group(1)) return out # Get all the variable symbols of a filename from the dkr.map file. def get_variable_symbols_from_map(filename, mapText): sectionSearch = "build/" + filename[:-2] + ".c.o" bssSectionText = get_map_section_text(mapText, sectionSearch + "(.bss)") dataSectionText = get_map_section_text(mapText, sectionSearch + "(.data)") rodataSectionText = get_map_section_text(mapText, sectionSearch + "(.rodata)") return { "bss": get_symbols_from_section_text(bssSectionText), "data": get_symbols_from_section_text(dataSectionText), "rodata": get_symbols_from_section_text(rodataSectionText) } # Gets the variable and it's value from the file. def get_variable_text(filename, filetext, symbol, isRodata): match = re.search(r"[ *]" + symbol + "[^A-Za-z_0-9]", filetext) # Doing regex for every variable is slow, but is needed. if match is None: if not isRodata: print("Warning: \"" + symbol + "\" is not defined in \"" + filename + "\"") return "" start = match.start() start = filetext.rfind("\n", 0, start) + 1 end = filetext.find(";", start) + 1 return filetext[start:end] # Gets all the variables in a file. def collect_variables(filename, filetext, mapText, data): if not filename.endswith('.c'): # Only want .c files. return variables = get_variable_symbols_from_map(filename, mapText) for var in variables["bss"]: data[var] = get_variable_text(filename, filetext, var, False) for var in variables["data"]: data[var] = get_variable_text(filename, filetext, var, False) for var in variables["rodata"]: data[var] = get_variable_text(filename, filetext, var, True) # Loads the dkr.map file from the build folder. def load_map(): try: return open(root_dir + "/build/dkr." + VERSION + ".map", "r").read() except FileNotFoundError: print("Error: dkr.map could not be found. Please build the rom first, then run this script.") exit() # Get all the relevant data from the files. def collect(filenames): data = { 'functions': {}, 'variables': {}, 'types': {}, # typedefs, structs, unions, enums 'directives': { 'singleline': [], 'all': [] } } mapText = load_map() filepaths = filenames.split('\n') + includeFiles for filename in filepaths: if (len(filename) < 1) or (not os.path.isfile(filename)) or (filename in ignoreFiles): continue filetext = open(filename, 'r').read() filetext = remove_comments(filetext) collect_directives(filename, filetext, data['directives']) # Gets directives (Except #include & #undef) collect_typedefs(filename, filetext, data['types']) # Gets typedefs collect_structs(filename, filetext, data['types']) # Gets structs/unions collect_enums(filename, filetext, data['types']) # Gets enums collect_variables(filename, filetext, mapText, data['variables']) # Get variables collect_function_prototypes(filename, filetext, data['functions']) # Get function prototypes return data # Writes a typedef/struct/union/enum to the output file def write_single_type(types, name, addedTypes, nameStack): out = '' if name in nameStack: print("Circular reference! value:", name, "| nameStack:", nameStack) return out nameStack.append(name) if name not in addedTypes: kind = types[name]['kind'] if kind == "typedef": checkType = types[name]['checkType'] if checkType in types: out += write_single_type(types, checkType, addedTypes, nameStack) if 'checkArgs' in types[name]: checkArgs = types[name]['checkArgs'] for argType in checkArgs: if argType in types: out += write_single_type(types, argType, addedTypes, nameStack) out += types[name]['value'] + "\n" elif kind == "struct": structTypes = types[name]['types'] for memType in structTypes: if memType in types and memType not in nameStack: if memType != "Object" and memType != "Particle": # This is a hack, due to circular referencing making things difficult. :/ out += write_single_type(types, memType, addedTypes, nameStack) out += types[name]['value'] + "\n" elif kind == "enum": out += types[name]['value'] + "\n" addedTypes[name] = 1 nameStack.pop() return out # Writes all the typedef/struct/union/enum to the output file def write_output_types(types): out = '' out += 'struct Object;\n' # This is a hack, due to circular referencing making things difficult. :/ addedTypes = {} for name in types.keys(): nameStack = [] out += write_single_type(types, name, addedTypes, nameStack) return out + '\n' # Writes all the function prototypes to the output file def write_output_functions(funcs): out = '' for name in funcs.keys(): out += funcs[name]['type'] + ' ' + name + '(' + funcs[name]['args'] + ');\n' return out + '\n' # Writes all the preprocessor directives to the output file def write_output_directives(directives): directives['singleline'].append('#define NULL 0\n') # Hack, since cleanup() will remove the original NULL definition. return [ '\n'.join(directives['singleline']) + '\n', '\n'.join(directives['all']) + '\n' ] # Writes a variable to the output file def write_output_variable(key, variables, varKeys, doneKeys): if key in doneKeys: return "" out = "" text = variables[key] for key2 in varKeys: if len(variables[key2]) < 1: continue if key == key2: continue if key2 in text: out += write_output_variable(key2, variables, varKeys, doneKeys) out += text.replace('UNUSED', '') + '\n' doneKeys[key] = 1 return out # Writes all the variables to the output file def write_output_variables(variables): out = '' doneKeys = {} varKeys = variables.keys() for key in varKeys: if len(variables[key]) < 1: continue out += write_output_variable(key, variables, varKeys, doneKeys) return out + '\n' # Preprocess everything, since mips2c doesn't like the preprocessor. def preprocess_all(directivesText): open("__temp.c", "w").write(directivesText) cpp_command = ["gcc", "-E", "-P", "-undef", "-DBUILD_VERSION=4", "-DVERSION_us_v77", "-D_LANGUAGE_C", "-D__sgi", "-DNON_MATCHING", "-D_Static_assert(x, y)=", "-D__attribute__(x)=", "__temp.c"] out = subprocess.check_output(cpp_command, cwd=root_dir, encoding="utf-8") os.remove("__temp.c") return out # Preprocess everything but the #defines. def preprocess_directives(directivesText): open("__temp.c", "w").write(directivesText) cpp_command = ["gcc", "-E", "-P", "-fdirectives-only", "-undef", "-DBUILD_VERSION=4", "-DVERSION_us_v77", "-D_LANGUAGE_C", "-D__sgi", "-DNON_MATCHING", "-D_Static_assert(x, y)=", "-D__attribute__(x)=", "__temp.c"] out = subprocess.check_output(cpp_command, cwd=root_dir, encoding="utf-8") os.remove("__temp.c") return out # Creates the output file text. def write_output(data): header = '/*** DKR decomp context file (Automatically generated by m2ctx.py [' + date.today().strftime("%Y/%m/%d") + ']) ***/\n\n' out = '' directives = write_output_directives(data['directives']) out += directives[0] # Only add single-line directives at first for preprocessing. out += write_output_types(data['types']) out += 's32 osTvType;\n' # Manually add this, since It doesn't seem to get added automatically. out += write_output_variables(data['variables']) out += write_output_functions(data['functions']) out = fix_enums(out) # Fixes trailing commas in enums, which gets rid of a warning. out = cleanup(out) # Removes useless stuff out = preprocess_all(out) # Preprocesses everything, removing all the directives. out = preprocess_directives(directives[1]) + out # Add the defines back in. out = out.replace('\nOSPifRam ;', '') # Manual hack to remove this if it exists. return header + out # Uses the `find` program to return the filenames in the specified folders. def find_files(): find_command = ["find"] + search_folders + ["-type", "f", "-name", "*.[ch]"] try: return subprocess.check_output(find_command, cwd=root_dir, encoding="utf-8") except subprocess.CalledProcessError: print( "Failed to preprocess input file, when running command:\n" + cpp_command, file=sys.stderr, ) sys.exit(1) def main(): print("Generating context file...") data = collect(find_files()) with open(os.path.join(root_dir, "ctx.c"), "w", encoding="UTF-8") as f: f.write(write_output(data)) print("Done! Generated as ctx.c") if __name__ == "__main__": main()