pokecrystal-board/textpre.py
2012-04-26 10:56:21 -05:00

364 lines
5.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
chars = {
"": 0x05,
"": 0x06,
"": 0x07,
"": 0x08,
"": 0x09,
"": 0x0A,
"": 0x0B,
"": 0x0C,
"": 0x0D,
"": 0x0E,
"": 0x0F,
"": 0x10,
"": 0x11,
"": 0x12,
"": 0x13,
"": 0x19,
"": 0x1A,
"": 0x1B,
"": 0x1C,
"": 0x26,
"": 0x27,
"": 0x28,
"": 0x29,
"": 0x2A,
"": 0x2B,
"": 0x2C,
"": 0x2D,
"": 0x2E,
"": 0x2F,
"": 0x30,
"": 0x31,
"": 0x32,
"": 0x33,
"": 0x34,
"": 0x3A,
"": 0x3B,
"": 0x3C,
"": 0x3D,
"": 0x3E,
"": 0x40,
"": 0x41,
"": 0x42,
"": 0x43,
"": 0x44,
"": 0x45,
"": 0x46,
"": 0x47,
"": 0x48,
"": 0x80,
"": 0x81,
"": 0x82,
"": 0x83,
"": 0x84,
"": 0x85,
"": 0x86,
"": 0x87,
"": 0x88,
"": 0x89,
"": 0x8A,
"": 0x8B,
"": 0x8C,
"": 0x8D,
"": 0x8E,
"": 0x8F,
"": 0x90,
"": 0x91,
"": 0x92,
"": 0x93,
"": 0x94,
"": 0x95,
"": 0x96,
"": 0x97,
"": 0x98,
"": 0x99,
"": 0x9A,
"": 0x9B,
"": 0x9C,
"": 0x9D,
"": 0x9E,
"": 0x9F,
"": 0xA0,
"": 0xA1,
"": 0xA2,
"": 0xA3,
"": 0xA4,
"": 0xA5,
"": 0xA6,
"": 0xA7,
"": 0xA8,
"": 0xA9,
"": 0xAA,
"": 0xAB,
"": 0xAC,
"": 0xAD,
"": 0xAE,
"": 0xAF,
"": 0xB0,
"": 0xB1,
"": 0xB2,
"": 0xB3,
"": 0xB4,
"": 0xB5,
"": 0xB6,
"": 0xB7,
"": 0xB8,
"": 0xB9,
"": 0xBA,
"": 0xBB,
"": 0xBC,
"": 0xBD,
"": 0xBE,
"": 0xBF,
"": 0xC0,
"": 0xC1,
"": 0xC2,
"": 0xC3,
"": 0xC4,
"": 0xC5,
"": 0xC6,
"": 0xC7,
"": 0xC8,
"": 0xC9,
"": 0xCA,
"": 0xCB,
"": 0xCC,
"": 0xCD,
"": 0xCE,
"": 0xCF,
"": 0xD0,
"": 0xD1,
"": 0xD2,
"": 0xD3,
"": 0xD4,
"": 0xD5,
"": 0xD6,
"": 0xD7,
"": 0xD8,
"": 0xD9,
"": 0xDA,
"": 0xDB,
"": 0xDC,
"": 0xDD,
"": 0xDE,
"": 0xDF,
"": 0xE0,
"": 0xE1,
"": 0xE2,
"": 0xE3,
"@": 0x50,
"#": 0x54,
"": 0x75,
"": 0x79,
"": 0x7A,
"": 0x7B,
"": 0x7C,
"": 0x7D,
"": 0x7E,
"": 0x74,
" ": 0x7F,
"A": 0x80,
"B": 0x81,
"C": 0x82,
"D": 0x83,
"E": 0x84,
"F": 0x85,
"G": 0x86,
"H": 0x87,
"I": 0x88,
"J": 0x89,
"K": 0x8A,
"L": 0x8B,
"M": 0x8C,
"N": 0x8D,
"O": 0x8E,
"P": 0x8F,
"Q": 0x90,
"R": 0x91,
"S": 0x92,
"T": 0x93,
"U": 0x94,
"V": 0x95,
"W": 0x96,
"X": 0x97,
"Y": 0x98,
"Z": 0x99,
"(": 0x9A,
")": 0x9B,
":": 0x9C,
";": 0x9D,
"[": 0x9E,
"]": 0x9F,
"a": 0xA0,
"b": 0xA1,
"c": 0xA2,
"d": 0xA3,
"e": 0xA4,
"f": 0xA5,
"g": 0xA6,
"h": 0xA7,
"i": 0xA8,
"j": 0xA9,
"k": 0xAA,
"l": 0xAB,
"m": 0xAC,
"n": 0xAD,
"o": 0xAE,
"p": 0xAF,
"q": 0xB0,
"r": 0xB1,
"s": 0xB2,
"t": 0xB3,
"u": 0xB4,
"v": 0xB5,
"w": 0xB6,
"x": 0xB7,
"y": 0xB8,
"z": 0xB9,
"Ä": 0xC0,
"Ö": 0xC1,
"Ü": 0xC2,
"ä": 0xC3,
"ö": 0xC4,
"ü": 0xC5,
"'d": 0xD0,
"'l": 0xD1,
"'m": 0xD2,
"'r": 0xD3,
"'s": 0xD4,
"'t": 0xD5,
"'v": 0xD6,
"'": 0xE0,
"-": 0xE3,
"?": 0xE6,
"!": 0xE7,
".": 0xE8,
"&": 0xE9,
"é": 0xEA,
"": 0xEB,
"": 0xEF,
"¥": 0xF0,
"×": 0xF1,
"/": 0xF3,
",": 0xF4,
"": 0xF5,
"0": 0xF6,
"1": 0xF7,
"2": 0xF8,
"3": 0xF9,
"4": 0xFA,
"5": 0xFB,
"6": 0xFC,
"7": 0xFD,
"8": 0xFE,
"9": 0xFF
}
for l in sys.stdin:
# skip lines with no quotes
if "\"" not in l:
sys.stdout.write(l)
continue
# strip comments
asm = ""
comment = None
in_quotes = False
in_comment = False
for letter in l:
if in_comment:
comment += letter
elif in_quotes and letter != "\"":
asm += letter
elif in_quotes and letter == "\"":
in_quotes = False
asm += letter
elif not in_quotes and letter == "\"":
in_quotes = True
asm += letter
elif not in_quotes and letter != "\"":
if letter == ";":
in_comment = True
comment = ";"
else:
asm += letter
# skip asm with no quotes
if "\"" not in asm:
sys.stdout.write(l)
continue
# split by quotes
asms = asm.split("\"")
# skip asm that actually does use ASCII in quotes
lowasm = asms[0].lower()
if "section" in lowasm \
or "include" in lowasm \
or "incbin" in lowasm:
sys.stdout.write(l)
continue
even = False
i = 0
for token in asms:
i = i + 1
if even:
# token is a string to convert to byte values
while len(token):
# read a single UTF-8 codepoint
char = token[0]
if ord(char) >= 0xFC:
char = char + token[1:6]
token = token[6:]
elif ord(char) >= 0xF8:
char = char + token[1:5]
token = token[5:]
elif ord(char) >= 0xF0:
char = char + token[1:4]
token = token[4:]
elif ord(char) >= 0xE0:
char = char + token[1:3]
token = token[3:]
elif ord(char) >= 0xC0:
char = char + token[1:2]
token = token[2:]
else:
token = token[1:]
# certain apostrophe-letter pairs are only a single byte
if char == "'" and \
(token[0] == "d" or \
token[0] == "l" or \
token[0] == "m" or \
token[0] == "r" or \
token[0] == "s" or \
token[0] == "t" or \
token[0] == "v"):
char = char + token[0]
token = token[1:]
sys.stdout.write("${0:02X}".format(chars[char]))
if len(token):
sys.stdout.write(", ")
else:
sys.stdout.write(token)
even = not even
if comment != None:
sys.stdout.write(comment)