mirror of
https://github.com/encounter/ac-decomp.git
synced 2026-03-30 10:57:04 -07:00
502 lines
21 KiB
Python
502 lines
21 KiB
Python
# This script makes leaves most of the heavy lifting to pcpp which does preprocessing and expansion of files:
|
|
# https://github.com/ned14/pcpp
|
|
# To use it make sure you run 'pip install pcpp'
|
|
#
|
|
# This script also optionally uses pyperclip to conveniently copy the context to the clipboard:
|
|
# https://github.com/asweigart/pyperclip
|
|
# Install via `pip install pyperclip`
|
|
|
|
import os
|
|
import re
|
|
import typing
|
|
import argparse
|
|
import pyperclip
|
|
from glob import glob
|
|
from re import Pattern
|
|
from io import StringIO
|
|
from pcpp import Preprocessor
|
|
from pcpp import CmdPreprocessor
|
|
from contextlib import redirect_stdout
|
|
|
|
#region Context Options
|
|
class ContextGenerationOptions:
|
|
should_strip_declspec = False
|
|
should_strip_attributes = False
|
|
should_strip_at_address = False
|
|
should_convert_binary_literals = False
|
|
should_replace_enums_in_initializers = False
|
|
should_strip_initializer_trailing_commas = False
|
|
#endregion
|
|
|
|
#region Regex Patterns
|
|
at_address_pattern = re.compile(r"(?:.*?)(?:[a-zA-Z_$][\w$]*\s*\*?\s[a-zA-Z_$][\w$\[\]]*)\s*((?:AT_ADDRESS|:)(?:\s*\(?\s*)(0x[0-9a-fA-F]+|[a-zA-Z_$][\w$]*)\)?);")
|
|
attribute_pattern = re.compile(r"(__attribute__)")
|
|
declspec_pattern = re.compile(r"(__declspec)")
|
|
binary_literal_pattern = re.compile(r"\b(0b[01]+)\b")
|
|
trailing_initializer_pattern = re.compile(r"^.*?=\s*\{(?:.|\s)+?(,)?\s*(?:\/\/.*?|\/\*.*?\*\/)*\s*?\}\s*;", re.MULTILINE)
|
|
enum_array_size_initializer_pattern = re.compile(r"\[\s*([a-zA-Z_$][\w$]*)\s*\]\s*;")
|
|
enum_declaration_pattern = re.compile(r"^.*(?:typedef\s+)*enum\s(?:[a-zA-Z_$][\w$]*)*\s*\{\s*((?:.|\s)*?)\}\s*(?:[a-zA-Z_$][\w$]*)*\s*;", re.MULTILINE)
|
|
enum_value_pattern = re.compile(r"([a-zA-Z_$][\w$]*)\s*(?:=\s*(.*))*")
|
|
word_pattern = re.compile(r"\b([a-zA-Z_][\w]*)\b")
|
|
white_space_pattern = re.compile(r"\s+")
|
|
cast_patterns = re.compile(r"\(int\)")
|
|
#endregion
|
|
|
|
#region Defaults
|
|
default_defines: typing.Dict[str, str] = {"__MWERKS__" : "1", "_LANGUAGE_C": "1", "F3DEX_GBI_2": "1"}
|
|
|
|
src_dir = "src"
|
|
include_dir = "include"
|
|
cwd_dir = os.getcwd()
|
|
script_dir = os.path.dirname(os.path.realpath(__file__))
|
|
root_dir = os.path.abspath(os.path.join(script_dir, ".."))
|
|
default_include_directories: typing.List[str] = [
|
|
os.path.join(root_dir, src_dir),
|
|
os.path.join(root_dir, include_dir),
|
|
os.path.join(script_dir, src_dir),
|
|
os.path.join(script_dir, include_dir),
|
|
os.path.join(cwd_dir, src_dir),
|
|
os.path.join(cwd_dir, include_dir),
|
|
]
|
|
|
|
default_output_filename = "ctx.h"
|
|
#endregion
|
|
|
|
#region N64 SDK
|
|
def get_n64_sdk(sdk_argument: str)->str:
|
|
if sdk_argument:
|
|
return sdk_argument
|
|
|
|
# No sdk path provided. Try to use default
|
|
sdk_argument = os.environ['N64_SDK']
|
|
if not sdk_argument:
|
|
return None
|
|
|
|
# Since we don't want the user to have to type the full path, all they need
|
|
# is to provide the top-level folder for the SDK
|
|
sdk_argument = os.path.join(sdk_argument, "ultra/usr/include")
|
|
return sdk_argument
|
|
#endregion
|
|
|
|
#region Attribute Stripping
|
|
def strip_attributes(text_to_strip: str)->str:
|
|
if not text_to_strip:
|
|
return text_to_strip
|
|
|
|
attribute_matches = reversed(list(re.finditer(attribute_pattern, text_to_strip)))
|
|
for attribute_match in attribute_matches:
|
|
# Find the end index of the second double paranthesis
|
|
paren_count = 0
|
|
|
|
match_span = attribute_match.span(0)
|
|
end_index = match_span[1]
|
|
attribute_opened = False
|
|
while end_index < len(text_to_strip):
|
|
if text_to_strip[end_index] == "(":
|
|
paren_count += 1
|
|
|
|
if paren_count == 2:
|
|
attribute_opened = True
|
|
|
|
if text_to_strip[end_index] == ")":
|
|
paren_count -= 1
|
|
|
|
if attribute_opened and paren_count == 0:
|
|
end_index += 1
|
|
break
|
|
|
|
end_index += 1
|
|
|
|
# Create the substring
|
|
start_index = match_span[0]
|
|
prefix = text_to_strip[0:start_index]
|
|
postfix = text_to_strip[end_index:len(text_to_strip)]
|
|
text_to_strip = prefix + postfix
|
|
|
|
return text_to_strip
|
|
#endregion
|
|
|
|
#region declspec Stripping
|
|
def strip_declspec(text_to_strip: str)->str:
|
|
if not text_to_strip:
|
|
return text_to_strip
|
|
|
|
declspec_matches = reversed(list(re.finditer(declspec_pattern, text_to_strip)))
|
|
for declspec_match in declspec_matches:
|
|
# Find the end index of the second double paranthesis
|
|
paren_count = 0
|
|
|
|
match_span = declspec_match.span(0)
|
|
end_index = match_span[1]
|
|
declspec_opened = False
|
|
while end_index < len(text_to_strip):
|
|
if text_to_strip[end_index] == "(":
|
|
paren_count += 1
|
|
|
|
if paren_count == 1:
|
|
declspec_opened = True
|
|
|
|
if text_to_strip[end_index] == ")":
|
|
paren_count -= 1
|
|
|
|
if declspec_opened and paren_count == 0:
|
|
end_index += 1
|
|
break
|
|
|
|
end_index += 1
|
|
|
|
# Create the substring
|
|
start_index = match_span[0]
|
|
prefix = text_to_strip[0:start_index]
|
|
postfix = text_to_strip[end_index:len(text_to_strip)]
|
|
text_to_strip = prefix + postfix
|
|
|
|
return text_to_strip
|
|
#endregion
|
|
|
|
#region At Address Stripping
|
|
def strip_at_address(text_to_strip: str) -> str:
|
|
if not text_to_strip:
|
|
return text_to_strip
|
|
|
|
at_address_matches = reversed(list(re.finditer(at_address_pattern, text_to_strip)))
|
|
for attribute_match in at_address_matches:
|
|
# Create the substring
|
|
match_span = attribute_match.span(1)
|
|
start_index = match_span[0]
|
|
end_index = match_span[1]
|
|
prefix = text_to_strip[0:start_index]
|
|
postfix = text_to_strip[end_index:len(text_to_strip)]
|
|
text_to_strip = prefix + postfix
|
|
|
|
return text_to_strip
|
|
#endregion
|
|
|
|
#region Binary Literal Conversion
|
|
def convert_binary_literals(text_to_strip: str) -> str:
|
|
if not text_to_strip:
|
|
return text_to_strip
|
|
|
|
binary_literal_matches = reversed(list(re.finditer(binary_literal_pattern, text_to_strip)))
|
|
for binary_literal_match in binary_literal_matches:
|
|
# Create the substring
|
|
match_span = binary_literal_match.span(1)
|
|
start_index = match_span[0]
|
|
end_index = match_span[1]
|
|
|
|
# Convert from binary literal format to regular int
|
|
binary_converted = int(text_to_strip[start_index:end_index], 2)
|
|
|
|
prefix = text_to_strip[0:start_index]
|
|
postfix = text_to_strip[end_index:len(text_to_strip)]
|
|
text_to_strip = prefix + str(binary_converted) + postfix
|
|
|
|
return text_to_strip
|
|
#endregion
|
|
|
|
#region Strip Trailing Commas
|
|
def strip_initializer_trailing_commas(text_to_strip: str) -> str:
|
|
if not text_to_strip:
|
|
return text_to_strip
|
|
|
|
trailing_comma_matches = reversed(list(re.finditer(trailing_initializer_pattern, text_to_strip)))
|
|
for comma_match in trailing_comma_matches:
|
|
# Create the substring
|
|
if not comma_match[1]:
|
|
continue
|
|
|
|
match_span = comma_match.span(1)
|
|
start_index = match_span[0]
|
|
end_index = match_span[1]
|
|
prefix = text_to_strip[0:start_index]
|
|
postfix = text_to_strip[end_index:len(text_to_strip)]
|
|
text_to_strip = prefix + postfix
|
|
|
|
return text_to_strip
|
|
#endregion
|
|
|
|
#region Enums
|
|
def replace_enums_with_numeric_values(text_to_strip: str)->str:
|
|
if not text_to_strip:
|
|
return text_to_strip
|
|
|
|
# Check if there are any uses of enums to initialize arrays
|
|
enum_array_size_initializer_matches = list(re.finditer(enum_array_size_initializer_pattern, text_to_strip))
|
|
if len(enum_array_size_initializer_matches) == 0:
|
|
# None found, so no need to evaluate the enums
|
|
return text_to_strip
|
|
|
|
# We need to replace enums. But to do so we need to gather all of the enum values from the context thus far
|
|
enum_declarations = list(re.finditer(enum_declaration_pattern, text_to_strip))
|
|
if len(enum_declarations) == 0:
|
|
return text_to_strip
|
|
|
|
preprocessor = Preprocessor()
|
|
enum_to_numeric_dict : typing.Dict[str, int] = {}
|
|
for enum_declaration in enum_declarations:
|
|
enum_members = enum_declaration[1]
|
|
split_enum_members = enum_members.split(",")
|
|
|
|
enum_numeric_value = 0
|
|
for split_member in split_enum_members:
|
|
split_member = re.sub(white_space_pattern, "", split_member)
|
|
if not split_member or split_member.isspace():
|
|
continue
|
|
|
|
enum_value_match = re.match(enum_value_pattern, split_member)
|
|
enum_member_name = enum_value_match[1]
|
|
|
|
# Does the enum have an explicit value assigned?
|
|
if enum_value_match[2]:
|
|
assigned_value = enum_value_match[2]
|
|
try:
|
|
# Replace usages of enum with numeric value
|
|
numeric_expression = enum_value_match[2]
|
|
|
|
# Remove casts
|
|
numeric_expression = re.sub(cast_patterns, "", numeric_expression)
|
|
|
|
# Replace enum names with numerical values
|
|
for word_match in reversed(list(re.finditer(word_pattern, numeric_expression))):
|
|
word = word_match[1]
|
|
if word not in enum_to_numeric_dict:
|
|
continue
|
|
|
|
word_span = word_match.span(1)
|
|
numeric_expression = numeric_expression[0:word_span[0]] + str(enum_to_numeric_dict[word]) + numeric_expression[word_span[1]:len(numeric_expression)]
|
|
|
|
# Try to parse it out
|
|
tokens = preprocessor.tokenize(numeric_expression)
|
|
evaluation = preprocessor.evalexpr(tokens)
|
|
assigned_value = evaluation[0]
|
|
except Exception as e:
|
|
# Can't parse. Might be another enum
|
|
print(e)
|
|
|
|
# Convert to int
|
|
enum_numeric_value = int(assigned_value)
|
|
|
|
# Record the value
|
|
enum_to_numeric_dict[enum_member_name] = enum_numeric_value
|
|
|
|
# By default the enum increases by 1
|
|
enum_numeric_value += 1
|
|
|
|
# With the enum map built we can now replace the usages with the numeric values
|
|
enum_array_size_initializer_matches_reversed = reversed(enum_array_size_initializer_matches)
|
|
for array_size_initializer_match in enum_array_size_initializer_matches_reversed:
|
|
# Does this use a known enum?
|
|
enum_name = array_size_initializer_match[1]
|
|
if enum_name not in enum_to_numeric_dict:
|
|
continue
|
|
|
|
enum_numeric_value = enum_to_numeric_dict[enum_name]
|
|
|
|
# Create the substring
|
|
match_span = array_size_initializer_match.span(1)
|
|
start_index = match_span[0]
|
|
end_index = match_span[1]
|
|
|
|
prefix = text_to_strip[0:start_index]
|
|
postfix = text_to_strip[end_index:len(text_to_strip)]
|
|
text_to_strip = prefix + str(enum_numeric_value) + postfix
|
|
|
|
return text_to_strip
|
|
#endregion
|
|
|
|
#region Preprocessing
|
|
def generate_context(preprocessor_arguments: typing.List[str], context_options: ContextGenerationOptions)->str:
|
|
# Create the temp string writer to pass to the preprocessor since we still want to modify
|
|
# the contents for project-specific conditions
|
|
with StringIO() as preprocessor_string_writer:
|
|
with redirect_stdout(preprocessor_string_writer):
|
|
# Parse the target file:
|
|
CmdPreprocessor(preprocessor_arguments)
|
|
|
|
# Check if empty
|
|
string_writer_position = preprocessor_string_writer.tell()
|
|
if string_writer_position == 0:
|
|
return None
|
|
|
|
# Do we need to sanitize this further?
|
|
if not context_options.should_strip_declspec and not context_options.should_strip_attributes and not context_options.should_strip_at_address and not context_options.should_strip_initializer_trailing_commas and not context_options.should_convert_binary_literals:
|
|
# No sanitation needed, so write the entire file out
|
|
return preprocessor_string_writer.getvalue()
|
|
|
|
# Sanitize/change the file depending on the context options
|
|
with StringIO() as context_string_writer:
|
|
# Sanitize line-by line for easier parsing
|
|
preprocessor_string_writer.seek(0)
|
|
while True:
|
|
line_to_write = preprocessor_string_writer.readline()
|
|
if not line_to_write:
|
|
break
|
|
|
|
if context_options.should_strip_declspec:
|
|
line_to_write = strip_declspec(line_to_write)
|
|
|
|
if context_options.should_strip_attributes:
|
|
line_to_write = strip_attributes(line_to_write)
|
|
|
|
if context_options.should_strip_at_address:
|
|
line_to_write = strip_at_address(line_to_write)
|
|
|
|
if context_options.should_convert_binary_literals:
|
|
line_to_write = convert_binary_literals(line_to_write)
|
|
|
|
context_string_writer.writelines(line_to_write)
|
|
|
|
# SIngle line cleanup completed
|
|
generated_context = context_string_writer.getvalue()
|
|
|
|
# Search for multi-line cleanup
|
|
if context_options.should_strip_initializer_trailing_commas or context_options.should_replace_enums_in_initializers:
|
|
if context_options.should_strip_initializer_trailing_commas:
|
|
generated_context = strip_initializer_trailing_commas(generated_context)
|
|
|
|
if context_options.should_replace_enums_in_initializers:
|
|
generated_context = replace_enums_with_numeric_values(generated_context)
|
|
|
|
return generated_context
|
|
#endregion
|
|
|
|
#region Main
|
|
def main():
|
|
# Write initial parser
|
|
parser = argparse.ArgumentParser(prog="Decomp Context", description="Wrapper around pcpp that can create a context file which can be used for decompilation", add_help=False)
|
|
parser.add_argument("c_file", nargs="?", help="File from which to create context")
|
|
parser.add_argument("-h", "-help", "--help", dest="help", action="store_true")
|
|
parser.add_argument("-n64", "--n64-sdk", dest="n64_sdk", help="Path to the N64 SDK top level directory", action="store")
|
|
parser.add_argument('-D', dest = 'defines', metavar = 'macro[=val]', nargs = 1, action = 'append', help = 'Predefine name as a macro [with value]')
|
|
parser.add_argument("--strip-declspec", dest="strip_declspec", help="If __declspec() string should be stripped", action="store_true", default=False)
|
|
parser.add_argument("--strip-attributes", dest="strip_attributes", help="If __attribute__(()) string should be stripped", action="store_true", default=False)
|
|
parser.add_argument("--strip-at-address", dest="strip_at_address", help="If AT_ADDRESS or : formatted string should be stripped", action="store_true", default=False)
|
|
parser.add_argument("--strip-initializer_trailing_commas", dest="strip_initializer_trailing_commas", help="If trailing commas in initializers should be stripped", action="store_true", default=False)
|
|
parser.add_argument("--convert-binary-literals", dest="convert_binary_literals", help="If binary literals (0bxxxx) should be converted to decimal", action="store_true", default=False)
|
|
parser.add_argument("--replace-enums-in-initializers", dest="replace_enums_in_initializers", help="If enums should be replaced by its numeric value in initializers", action="store_true", default=False)
|
|
parser.add_argument("--clipboard", dest="copy_to_clipboard", help="If the context should be copied to the clipboard", action="store_true", default=False)
|
|
|
|
# For the output path, we either want to be explicit or relative, but not both
|
|
output_target_group = parser.add_mutually_exclusive_group()
|
|
output_target_group.add_argument("-o", dest="output_path", help="Explicit path to output the context file to", action="store")
|
|
output_target_group.add_argument("-r", "--relative", dest="relative", help="Generate context relative to the source file", action="store_true")
|
|
|
|
# When targeting a specific platform we want to only do one thing or another
|
|
platform_target_group = parser.add_mutually_exclusive_group()
|
|
platform_target_group.add_argument("--m2c", dest="m2c", help="Generates an m2c-friendly file", action="store_true")
|
|
platform_target_group.add_argument("--ghidra", dest="ghidra", help="Generates an Ghidra-friendly file", action="store_true")
|
|
|
|
# Parse the known arguments
|
|
parsed_args = parser.parse_known_args()
|
|
known_args = parsed_args[0]
|
|
|
|
preprocessor_arguments = ['pcpp']
|
|
if known_args.help:
|
|
# Since this script acts as a wrapper for the main pcpp script
|
|
# we want to manually display the help and pass it through to the
|
|
# pcpp preprocessor to show its full list of arguments
|
|
parser.print_help()
|
|
preprocessor_arguments.append("--help")
|
|
CmdPreprocessor(preprocessor_arguments).tokenize
|
|
return
|
|
|
|
# Append in the default include directories
|
|
include_directories: typing.List[str] = []
|
|
include_directories.extend(default_include_directories)
|
|
n64_sdk = get_n64_sdk(known_args.n64_sdk)
|
|
if n64_sdk:
|
|
include_directories.append(n64_sdk)
|
|
|
|
for include_directory in include_directories:
|
|
preprocessor_arguments.extend(("-I", include_directory))
|
|
|
|
# Check if we have any passed in defines
|
|
include_defines = []
|
|
known_defines: typing.List[str] = []
|
|
if known_args.defines:
|
|
argument_defines = [x[0] for x in known_args.defines]
|
|
for define in argument_defines:
|
|
include_defines.append(define)
|
|
known_defines.append(define.split("=")[0])
|
|
|
|
if not known_args.c_file:
|
|
# If not file is specified it is assumed we want to create a mega context
|
|
# file that is the aggregate of all include files
|
|
include_files : typing.Set[str, str] = set()
|
|
for include_directory in default_include_directories:
|
|
files = [y for x in os.walk(include_directory) for y in glob(os.path.join(x[0], '*.h'))]
|
|
for include_file in files:
|
|
include_files.add(include_file)
|
|
|
|
# Add each file as an input so that pccpp can parse them into a single output file
|
|
# Sort the files for some consistency
|
|
sorted_files = list(include_files)
|
|
sorted_files.sort()
|
|
for include_file in include_files:
|
|
preprocessor_arguments.append(include_file)
|
|
else:
|
|
# Add the file we want to read
|
|
c_file = known_args.c_file
|
|
preprocessor_arguments.append(known_args.c_file)
|
|
|
|
# Add in the default defines unless explicitly passed in as arguments
|
|
for default_define, default_define_value in default_defines.items():
|
|
if default_define in known_defines:
|
|
continue
|
|
define_str: str = default_define + "=" + default_define_value
|
|
include_defines.append(define_str)
|
|
|
|
# Add the defines to the arguments
|
|
for define in include_defines:
|
|
preprocessor_arguments.extend(("-D", define))
|
|
|
|
# If not targeting Ghidra or m2c we can include more in
|
|
if not known_args.ghidra and not known_args.m2c:
|
|
preprocessor_arguments.append("--passthru-defines")
|
|
else:
|
|
# Don't include the line directives if targeting Ghidra/m2c
|
|
preprocessor_arguments.append("--line-directive")
|
|
|
|
# For debugging purposes, include unfound includes in output to mark errors
|
|
preprocessor_arguments.append("--passthru-unfound-includes")
|
|
|
|
# Compress to minimize whitespace
|
|
preprocessor_arguments.append("--compress")
|
|
|
|
# Add unknown arguments and pass them to pcpp
|
|
pass_through_args = parsed_args[1]
|
|
preprocessor_arguments.extend(pass_through_args)
|
|
|
|
# Check if we need to do further conversions after the file is preprocessed
|
|
context_options = ContextGenerationOptions()
|
|
context_options.should_strip_declspec = known_args.strip_declspec or known_args.ghidra or known_args.m2c
|
|
context_options.should_strip_at_address = known_args.strip_at_address or known_args.ghidra or known_args.m2c
|
|
context_options.should_strip_attributes = known_args.strip_attributes or known_args.m2c
|
|
context_options.should_convert_binary_literals = known_args.convert_binary_literals or known_args.ghidra
|
|
context_options.should_strip_initializer_trailing_commas = known_args.strip_initializer_trailing_commas or known_args.ghidra
|
|
context_options.should_replace_enums_in_initializers = known_args.replace_enums_in_initializers or known_args.ghidra
|
|
|
|
# Generate the context
|
|
generated_context = generate_context(preprocessor_arguments, context_options)
|
|
|
|
# Determine the file to write to
|
|
target_file_name = None
|
|
if known_args.output_path:
|
|
target_file_name = known_args.output_path
|
|
elif known_args.relative:
|
|
target_file_name = f"{c_file}.ctx"
|
|
else:
|
|
target_file_name = os.path.join(os.getcwd(), default_output_filename)
|
|
|
|
# Write the generated context to the file
|
|
with open(target_file_name, "w", encoding="utf-8", newline="\n") as file_writer:
|
|
file_writer.write(generated_context)
|
|
|
|
# Check if we also want to copy to the clipboard
|
|
if known_args.copy_to_clipboard:
|
|
pyperclip.copy(generated_context)
|
|
#endregion
|
|
|
|
if __name__ == "__main__":
|
|
main()
|