move main preprocessor into pokemontools

Only pokecrystal-specific preprocessor stuff lives in preprocessor.py
now. The other stuff lives in pokemontools.
This commit is contained in:
Bryan Bishop 2013-08-28 22:48:44 -05:00
parent 4f685791c1
commit c32cc3dcb8
2 changed files with 6 additions and 603 deletions

2
extras

@ -1 +1 @@
Subproject commit 016f0206b5029fc83a6200be29b0f980c76dfd90
Subproject commit 0856dee10ad124a6f313492561ed1c2a2df74abd

View File

@ -3,6 +3,8 @@
import sys
import extras.pokemontools.preprocessor as preprocessor
from extras.pokemontools.crystal import (
command_classes,
Warp,
@ -33,610 +35,11 @@ macros += movement_command_classes
macros += music_classes
macros += effect_classes
# show lines before preprocessing in stdout
show_original_lines = False
# helpful for debugging macros
do_macro_sanity_check = False
class SkippableMacro(object):
macro_name = "db"
chars = {
"": 0x05,
"": 0x06,
"": 0x07,
"": 0x08,
"": 0x09,
"": 0x0A,
"": 0x0B,
"": 0x0C,
"": 0x0D,
"": 0x0E,
"": 0x0F,
"": 0x10,
"": 0x11,
"": 0x12,
"": 0x13,
"": 0x19,
"": 0x1A,
"": 0x1B,
"": 0x1C,
"": 0x26,
"": 0x27,
"": 0x28,
"": 0x29,
"": 0x2A,
"": 0x2B,
"": 0x2C,
"": 0x2D,
"": 0x2E,
"": 0x2F,
"": 0x30,
"": 0x31,
"": 0x32,
"": 0x33,
"": 0x34,
"": 0x3A,
"": 0x3B,
"": 0x3C,
"": 0x3D,
"": 0x3E,
"": 0x40,
"": 0x41,
"": 0x42,
"": 0x43,
"": 0x44,
"": 0x45,
"": 0x46,
"": 0x47,
"": 0x48,
"": 0x80,
"": 0x81,
"": 0x82,
"": 0x83,
"": 0x84,
"": 0x85,
"": 0x86,
"": 0x87,
"": 0x88,
"": 0x89,
"": 0x8A,
"": 0x8B,
"": 0x8C,
"": 0x8D,
"": 0x8E,
"": 0x8F,
"": 0x90,
"": 0x91,
"": 0x92,
"": 0x93,
"": 0x94,
"": 0x95,
"": 0x96,
"": 0x97,
"": 0x98,
"": 0x99,
"": 0x9A,
"": 0x9B,
"": 0x9C,
"": 0x9D,
"": 0x9E,
"": 0x9F,
"": 0xA0,
"": 0xA1,
"": 0xA2,
"": 0xA3,
"": 0xA4,
"": 0xA5,
"": 0xA6,
"": 0xA7,
"": 0xA8,
"": 0xA9,
"": 0xAA,
"": 0xAB,
"": 0xAC,
"": 0xAD,
"": 0xAE,
"": 0xAF,
"": 0xB0,
"": 0xB1,
"": 0xB2,
"": 0xB3,
"": 0xB4,
"": 0xB5,
"": 0xB6,
"": 0xB7,
"": 0xB8,
"": 0xB9,
"": 0xBA,
"": 0xBB,
"": 0xBC,
"": 0xBD,
"": 0xBE,
"": 0xBF,
"": 0xC0,
"": 0xC1,
"": 0xC2,
"": 0xC3,
"": 0xC4,
"": 0xC5,
"": 0xC6,
"": 0xC7,
"": 0xC8,
"": 0xC9,
"": 0xCA,
"": 0xCB,
"": 0xCC,
"": 0xCD,
"": 0xCE,
"": 0xCF,
"": 0xD0,
"": 0xD1,
"": 0xD2,
"": 0xD3,
"": 0xD4,
"": 0xD5,
"": 0xD6,
"": 0xD7,
"": 0xD8,
"": 0xD9,
"": 0xDA,
"": 0xDB,
"": 0xDC,
"": 0xDD,
"": 0xDE,
"": 0xDF,
"": 0xE0,
"": 0xE1,
"": 0xE2,
"": 0xE3,
"": 0xE9,
"@": 0x50,
"#": 0x54,
"": 0x75,
"": 0x79,
"": 0x7A,
"": 0x7B,
"": 0x7C,
"": 0x7D,
"": 0x7E,
"": 0x74,
" ": 0x7F,
"A": 0x80,
"B": 0x81,
"C": 0x82,
"D": 0x83,
"E": 0x84,
"F": 0x85,
"G": 0x86,
"H": 0x87,
"I": 0x88,
"J": 0x89,
"K": 0x8A,
"L": 0x8B,
"M": 0x8C,
"N": 0x8D,
"O": 0x8E,
"P": 0x8F,
"Q": 0x90,
"R": 0x91,
"S": 0x92,
"T": 0x93,
"U": 0x94,
"V": 0x95,
"W": 0x96,
"X": 0x97,
"Y": 0x98,
"Z": 0x99,
"(": 0x9A,
")": 0x9B,
":": 0x9C,
";": 0x9D,
"[": 0x9E,
"]": 0x9F,
"a": 0xA0,
"b": 0xA1,
"c": 0xA2,
"d": 0xA3,
"e": 0xA4,
"f": 0xA5,
"g": 0xA6,
"h": 0xA7,
"i": 0xA8,
"j": 0xA9,
"k": 0xAA,
"l": 0xAB,
"m": 0xAC,
"n": 0xAD,
"o": 0xAE,
"p": 0xAF,
"q": 0xB0,
"r": 0xB1,
"s": 0xB2,
"t": 0xB3,
"u": 0xB4,
"v": 0xB5,
"w": 0xB6,
"x": 0xB7,
"y": 0xB8,
"z": 0xB9,
"Ä": 0xC0,
"Ö": 0xC1,
"Ü": 0xC2,
"ä": 0xC3,
"ö": 0xC4,
"ü": 0xC5,
"'d": 0xD0,
"'l": 0xD1,
"'m": 0xD2,
"'r": 0xD3,
"'s": 0xD4,
"'t": 0xD5,
"'v": 0xD6,
"'": 0xE0,
"-": 0xE3,
"?": 0xE6,
"!": 0xE7,
".": 0xE8,
"&": 0xE9,
"é": 0xEA,
"": 0xEB,
"": 0xEC,
"": 0xED,
"": 0xEE,
"": 0xEF,
"¥": 0xF0,
"×": 0xF1,
"/": 0xF3,
",": 0xF4,
"": 0xF5,
"0": 0xF6,
"1": 0xF7,
"2": 0xF8,
"3": 0xF9,
"4": 0xFA,
"5": 0xFB,
"6": 0xFC,
"7": 0xFD,
"8": 0xFE,
"9": 0xFF
}
def separate_comment(l):
def preprocess(macros):
"""
Separates asm and comments on a single line.
Entry point for the preprocessor.
"""
in_quotes = False
for i in xrange(len(l)):
if not in_quotes:
if l[i] == ";":
break
if l[i] == "\"":
in_quotes = not in_quotes
return l[:i], l[i:] or None
def quote_translator(asm):
"""
Writes asm with quoted text translated into bytes.
"""
# split by quotes
asms = asm.split('"')
# skip asm that actually does use ASCII in quotes
if "SECTION" in asms[0]\
or "INCBIN" in asms[0]\
or "INCLUDE" in asms[0]:
return asm
print_macro = False
if asms[0].strip() == 'print':
asms[0] = asms[0].replace('print','db 0,')
print_macro = True
output = ''
even = False
for token in asms:
if even:
characters = []
# token is a string to convert to byte values
while len(token):
# read a single UTF-8 codepoint
char = token[0]
if ord(char) < 0xc0:
token = token[1:]
# certain apostrophe-letter pairs are considered a single character
if char == "'" and token:
if token[0] in 'dlmrstv':
char += token[0]
token = token[1:]
elif ord(char) < 0xe0:
char = char + token[1:2]
token = token[2:]
elif ord(char) < 0xf0:
char = char + token[1:3]
token = token[3:]
elif ord(char) < 0xf8:
char = char + token[1:4]
token = token[4:]
elif ord(char) < 0xfc:
char = char + token[1:5]
token = token[5:]
else:
char = char + token[1:6]
token = token[6:]
characters += [char]
if print_macro:
line = 0
while len(characters):
last_char = 1
if len(characters) > 18 and characters[-1] != '@':
for i, char in enumerate(characters):
last_char = i + 1
if ' ' not in characters[i+1:18]: break
output += ", ".join("${0:02X}".format(chars[char]) for char in characters[:last_char-1])
if characters[last_char-1] != " ":
output += ", ${0:02X}".format(characters[last_char-1])
if not line & 1:
line_ending = 0x4f
else:
line_ending = 0x51
output += ", ${0:02X}".format(line_ending)
line += 1
else:
output += ", ".join(["${0:02X}".format(chars[char]) for char in characters[:last_char]])
characters = characters[last_char:]
if len(characters): output += ", "
# end text
line_ending = 0x57
output += ", ${0:02X}".format(line_ending)
output += ", ".join(["${0:02X}".format(chars[char]) for char in characters])
else:
output += token
even = not even
return output
def extract_token(asm):
return asm.split(" ")[0].strip()
def make_macro_table(macros):
return dict(((macro.macro_name, macro) for macro in macros))
def macro_test(asm, macro_table):
"""
Returns a matching macro, or None/False.
"""
# macros are determined by the first symbol on the line
token = extract_token(asm)
# check against all names
if token in macro_table:
return (macro_table[token], token)
else:
return (None, None)
def is_based_on(something, base):
"""
Checks whether or not 'something' is a class that is a subclass of a class
by name. This is a terrible hack but it removes a direct dependency on
existing macros.
Used by macro_translator.
"""
options = [str(klass.__name__) for klass in something.__bases__]
options += [something.__name__]
return (base in options)
def macro_translator(macro, token, line, skippable_macros):
"""
Converts a line with a macro into a rgbasm-compatible line.
"""
assert macro.macro_name == token, "macro/token mismatch"
original_line = line
# remove trailing newline
if line[-1] == "\n":
line = line[:-1]
else:
original_line += "\n"
# remove first tab
has_tab = False
if line[0] == "\t":
has_tab = True
line = line[1:]
# remove duplicate whitespace (also trailing)
line = " ".join(line.split())
params = []
# check if the line has params
if " " in line:
# split the line into separate parameters
params = line.replace(token, "").split(",")
# check if there are no params (redundant)
if len(params) == 1 and params[0] == "":
raise Exception, "macro has no params?"
# write out a comment showing the original line
if show_original_lines:
sys.stdout.write("; original_line: " + original_line)
# "db" is a macro because of SkippableMacro
# rgbasm can handle "db" so no preprocessing is required
# (don't check its param count)
if macro.__name__ in skippable_macros or (macro.macro_name == "db" and macro in skippable_macros):
sys.stdout.write(original_line)
return
# certain macros don't need an initial byte written
# do: all scripting macros
# don't: signpost, warp_def, person_event, xy_trigger
if not macro.override_byte_check:
sys.stdout.write("db ${0:02X}\n".format(macro.id))
# --- long-winded sanity check goes here ---
if do_macro_sanity_check:
# sanity check... this won't work because PointerLabelBeforeBank shows
# up as two params, so these two lengths will always be different.
#assert len(params) == len(macro.param_types), \
# "mismatched number of parameters on this line: " + \
# original_line
# v2 sanity check :) although it sorta sucks that this loop happens twice?
allowed_length = 0
for (index, param_type) in macro.param_types.items():
param_klass = param_type["class"]
if param_klass.byte_type == "db":
allowed_length += 1 # just one value
elif param_klass.byte_type == "dw":
if param_klass.size == 2:
allowed_length += 1 # just label
elif param_klass.size == 3:
allowed_length += 2 # bank and label
else:
raise Exception, "dunno what to do with a macro param with a size > 3"
else:
raise Exception, "dunno what to do with this non db/dw macro param: " + \
str(param_klass) + " in line: " + original_line
# sometimes the allowed length can vary
if hasattr(macro, "allowed_lengths"):
allowed_lengths = macro.allowed_lengths + [allowed_length]
else:
allowed_lengths = [allowed_length]
assert len(params) in allowed_lengths, \
"mismatched number of parameters on this line: " + \
original_line
# --- end of ridiculously long sanity check ---
# used for storetext
correction = 0
output = ""
index = 0
while index < len(params):
try:
param_type = macro.param_types[index - correction]
except KeyError as exception:
raise Exception("line is: " + str(line) + " and macro is: " + str(macro))
description = param_type["name"]
param_klass = param_type["class"]
byte_type = param_klass.byte_type # db or dw
size = param_klass.size
param = params[index].strip()
# param_klass.to_asm() won't work here because it doesn't
# include db/dw.
# some parameters are really multiple types of bytes
if (byte_type == "dw" and size != 2) or \
(byte_type == "db" and size != 1):
output += ("; " + description + "\n")
if size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"):
# write the bank first
output += ("db " + param + "\n")
# write the pointer second
output += ("dw " + params[index+1].strip() + "\n")
index += 2
correction += 1
elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"):
# write the pointer first
output += ("dw " + param + "\n")
# write the bank second
output += ("db " + params[index+1].strip() + "\n")
index += 2
correction += 1
elif size == 3 and "from_asm" in dir(param_klass):
output += ("db " + param_klass.from_asm(param) + "\n")
index += 1
else:
raise Exception, "dunno what to do with this macro " + \
"param (" + str(param_klass) + ") " + "on this line: " + \
original_line
# or just print out the byte
else:
output += (byte_type + " " + param + " ; " + description + "\n")
index += 1
sys.stdout.write(output)
def read_line(l, skippable_macros, macro_table):
"""Preprocesses a given line of asm."""
# strip comments from asm
asm, comment = separate_comment(l)
# export all labels
if ':' in asm[:asm.find('"')]:
sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n')
# expect preprocessed .asm files
if "INCLUDE" in asm:
asm = asm.replace('.asm','.tx')
sys.stdout.write(asm)
# ascii string macro preserves the bytes as ascii (skip the translator)
elif len(asm) > 6 and "ascii " == asm[:6] or "\tascii " == asm[:7]:
asm = asm.replace("ascii", "db", 1)
sys.stdout.write(asm)
# convert text to bytes when a quote appears (not in a comment)
elif "\"" in asm:
sys.stdout.write(quote_translator(asm))
# check against other preprocessor features
else:
macro, token = macro_test(asm, macro_table)
if macro:
macro_translator(macro, token, asm, skippable_macros)
else:
sys.stdout.write(asm)
if comment: sys.stdout.write(comment)
def preprocess(macros, skippable_macros=None, lines=None):
"""Main entry point for the preprocessor."""
if skippable_macros == None:
skippable_macros = [SkippableMacro]
macro_table = make_macro_table(list(set(macros + skippable_macros)))
# HACK for pokecrystal. Must be after make_macro_table call.
skippable_macros += ["TextEndingCommand"]
if not lines:
# read each line from stdin
lines = (sys.stdin.readlines())
elif not isinstance(lines, list):
# split up the input into individual lines
lines = lines.split("\n")
for l in lines:
read_line(l, skippable_macros, macro_table)
return preprocessor.preprocess(macros)
# only run against stdin when not included as a module
if __name__ == "__main__":