Use textpre.py instead of textpre.awk.

This commit is contained in:
IIMarckus 2012-03-05 00:05:36 -07:00
parent 093aa58ec1
commit d5fe70b03b
4 changed files with 343 additions and 307 deletions

View File

@ -1,6 +1,3 @@
#gawk sort order
export LC_CTYPE=C
.SUFFIXES: .asm .tx .o .gbc .SUFFIXES: .asm .tx .o .gbc
TEXTFILES = TEXTFILES =
@ -11,7 +8,7 @@ pokecrystal.o: pokecrystal.asm main.tx constants.asm ${TEXTFILES}
rgbasm -o pokecrystal.o pokecrystal.asm rgbasm -o pokecrystal.o pokecrystal.asm
.asm.tx: .asm.tx:
awk -f textpre.awk < $< > $@ python textpre.py < $< > $@
pokecrystal.gbc: pokecrystal.o pokecrystal.gbc: pokecrystal.o
rgblink -o $@ $< rgblink -o $@ $<

View File

@ -19,7 +19,7 @@ INCBIN "baserom.gbc",$20000,$4000
SECTION "bank9",DATA,BANK[$9] SECTION "bank9",DATA,BANK[$9]
INCBIN "baserom.gbc",$24000,$4000 INCBIN "baserom.gbc",$24000,$4000
SECTION "bankA",DATA,BANK[$A] SECTION "bankA",DATA,BANK[$A]
INCBIN "baserom.gbc",$28000,$$2A5e9 - $28000 INCBIN "baserom.gbc",$28000,$2A5e9 - $28000
db $03,$02 ; Sprout Tower 2F db $03,$02 ; Sprout Tower 2F
db $05,$05,$05 ; encounter rates: morn/day/nite db $05,$05,$05 ; encounter rates: morn/day/nite

View File

@ -1,302 +0,0 @@
BEGIN {
FS = "\""
char["ガ"] = "$05"
char["ギ"] = "$06"
char["グ"] = "$07"
char["ゲ"] = "$08"
char["ゴ"] = "$09"
char["ザ"] = "$0A"
char["ジ"] = "$0B"
char["ズ"] = "$0C"
char["ゼ"] = "$0D"
char["ゾ"] = "$0E"
char["ダ"] = "$0F"
char["ヂ"] = "$10"
char["ヅ"] = "$11"
char["デ"] = "$12"
char["ド"] = "$13"
char["バ"] = "$19"
char["ビ"] = "$1A"
char["ブ"] = "$1B"
char["ボ"] = "$1C"
char["が"] = "$26"
char["ぎ"] = "$27"
char["ぐ"] = "$28"
char["げ"] = "$29"
char["ご"] = "$2A"
char["ざ"] = "$2B"
char["じ"] = "$2C"
char["ず"] = "$2D"
char["ぜ"] = "$2E"
char["ぞ"] = "$2F"
char["だ"] = "$30"
char["ぢ"] = "$31"
char["づ"] = "$32"
char["で"] = "$33"
char["ど"] = "$34"
char["ば"] = "$3A"
char["び"] = "$3B"
char["ぶ"] = "$3C"
char["べ"] = "$3D"
char["ぼ"] = "$3E"
char["パ"] = "$40"
char["ピ"] = "$41"
char["プ"] = "$42"
char["ポ"] = "$43"
char["ぱ"] = "$44"
char["ぴ"] = "$45"
char["ぷ"] = "$46"
char["ぺ"] = "$47"
char["ぽ"] = "$48"
char["ア"] = "$80"
char["イ"] = "$81"
char["ウ"] = "$82"
char["エ"] = "$83"
char["ォ"] = "$84"
char["カ"] = "$85"
char["キ"] = "$86"
char["ク"] = "$87"
char["ケ"] = "$88"
char["コ"] = "$89"
char["サ"] = "$8A"
char["シ"] = "$8B"
char["ス"] = "$8C"
char["セ"] = "$8D"
char["ソ"] = "$8E"
char["タ"] = "$8F"
char["チ"] = "$90"
char["ツ"] = "$91"
char["テ"] = "$92"
char["ト"] = "$93"
char["ナ"] = "$94"
char["ニ"] = "$95"
char["ヌ"] = "$96"
char["ネ"] = "$97"
char[""] = "$98"
char["ハ"] = "$99"
char["ヒ"] = "$9A"
char["フ"] = "$9B"
char["ホ"] = "$9C"
char["マ"] = "$9D"
char["ミ"] = "$9E"
char["ム"] = "$9F"
char["メ"] = "$A0"
char["モ"] = "$A1"
char["ヤ"] = "$A2"
char["ユ"] = "$A3"
char["ヨ"] = "$A4"
char["ラ"] = "$A5"
char["ル"] = "$A6"
char["レ"] = "$A7"
char["ロ"] = "$A8"
char["ワ"] = "$A9"
char["ヲ"] = "$AA"
char["ン"] = "$AB"
char["ッ"] = "$AC"
char["ャ"] = "$AD"
char["ュ"] = "$AE"
char["ョ"] = "$AF"
char["ィ"] = "$B0"
char["あ"] = "$B1"
char["い"] = "$B2"
char["う"] = "$B3"
char["え"] = "$B4"
char["お"] = "$B5"
char["か"] = "$B6"
char["き"] = "$B7"
char["く"] = "$B8"
char["け"] = "$B9"
char["こ"] = "$BA"
char["さ"] = "$BB"
char["し"] = "$BC"
char["す"] = "$BD"
char["せ"] = "$BE"
char["そ"] = "$BF"
char["た"] = "$C0"
char["ち"] = "$C1"
char["つ"] = "$C2"
char["て"] = "$C3"
char["と"] = "$C4"
char["な"] = "$C5"
char["に"] = "$C6"
char["ぬ"] = "$C7"
char["ね"] = "$C8"
char["の"] = "$C9"
char["は"] = "$CA"
char["ひ"] = "$CB"
char["ふ"] = "$CC"
char["へ"] = "$CD"
char["ほ"] = "$CE"
char["ま"] = "$CF"
char["み"] = "$D0"
char["む"] = "$D1"
char["め"] = "$D2"
char["も"] = "$D3"
char["や"] = "$D4"
char["ゆ"] = "$D5"
char["よ"] = "$D6"
char["ら"] = "$D7"
char["り"] = "$D8"
char["る"] = "$D9"
char["れ"] = "$DA"
char["ろ"] = "$DB"
char["わ"] = "$DC"
char["を"] = "$DD"
char["ん"] = "$DE"
char["っ"] = "$DF"
char["ゃ"] = "$E0"
char["ゅ"] = "$E1"
char["ょ"] = "$E2"
char["ー"] = "$E3"
char["@"] = "$50"
char["#"] = "$54"
char["…"] = "$75"
# textbox borders
char["┌"] = "$79"
char["─"] = "$7A"
char["┐"] = "$7B"
char["│"] = "$7C"
char["└"] = "$7D"
char["┘"] = "$7E"
char["№"] = "$74"
char[" "] = "$7F"
char["A"] = "$80"
char["B"] = "$81"
char["C"] = "$82"
char["D"] = "$83"
char["E"] = "$84"
char["F"] = "$85"
char["G"] = "$86"
char["H"] = "$87"
char["I"] = "$88"
char["J"] = "$89"
char["K"] = "$8A"
char["L"] = "$8B"
char["M"] = "$8C"
char["N"] = "$8D"
char["O"] = "$8E"
char["P"] = "$8F"
char["Q"] = "$90"
char["R"] = "$91"
char["S"] = "$92"
char["T"] = "$93"
char["U"] = "$94"
char["V"] = "$95"
char["W"] = "$96"
char["X"] = "$97"
char["Y"] = "$98"
char["Z"] = "$99"
char["("] = "$9A"
char[")"] = "$9B"
char[":"] = "$9C"
char[";"] = "$9D"
char["["] = "$9E"
char["]"] = "$9F"
char["a"] = "$A0"
char["b"] = "$A1"
char["c"] = "$A2"
char["d"] = "$A3"
char["e"] = "$A4"
char["f"] = "$A5"
char["g"] = "$A6"
char["h"] = "$A7"
char["i"] = "$A8"
char["j"] = "$A9"
char["k"] = "$AA"
char["l"] = "$AB"
char["m"] = "$AC"
char["n"] = "$AD"
char["o"] = "$AE"
char["p"] = "$AF"
char["q"] = "$B0"
char["r"] = "$B1"
char["s"] = "$B2"
char["t"] = "$B3"
char["u"] = "$B4"
char["v"] = "$B5"
char["w"] = "$B6"
char["x"] = "$B7"
char["y"] = "$B8"
char["z"] = "$B9"
char["é"] = "$BA"
char["'t"] = "$BE"
char["'s"] = "$D4"
char["'"] = "$E0"
char["-"] = "$E3"
char["?"] = "$E6"
char["!"] = "$E7"
char["."] = "$E8"
char["&"] = "$E9"
char["♂"] = "$EF"
char["/"] = "$F3"
char[","] = "$F4"
char["♀"] = "$F5"
char["0"] = "$F6"
char["1"] = "$F7"
char["2"] = "$F8"
char["3"] = "$F9"
char["4"] = "$FA"
char["5"] = "$FB"
char["6"] = "$FC"
char["7"] = "$FD"
char["8"] = "$FE"
char["9"] = "$FF"
}
# skip lines that actually do use ASCII in quotes
tolower($1) ~ /section/ ||
tolower($1) ~ /include/ ||
tolower($1) ~ /incbin/
!(tolower($1) ~ /section/ || tolower($1) ~ /include/ || tolower($1) ~ /incbin/) {
i = 1
ORS = ""
while (i <= NF) {
if (i % 2 == 1)
print $(i)
else {
f = $(i)
while (f != "") {
c = substr(f,1,1)
f = substr(f,2,length(f) - 1)
if (c > "𐀀") { # U+10000
c = c substr(f,1,3)
f = substr(f,4,length(f) - 3)
} else if (c > "ࠀ") { # U+0800
c = c substr(f,1,2)
f = substr(f,3,length(f) - 2)
} else if (c > "€") { # U+0080
c = c substr(f,1,1)
f = substr(f,2,length(f) - 1)
}
if (c == "'") {
if (substr(f,1,1) == "m" ||
substr(f,1,1) == "r" ||
substr(f,1,1) == "t" ||
substr(f,1,1) == "s" ||
substr(f,1,1) == "v" ||
substr(f,1,1) == "l") {
c = c substr(f,1,1)
f = substr(f,2,length(f) - 1)
}
}
print char[c]
if (length(f))
print ","
}
}
i = i + 1
}
print "\n"
ORS = "\n"
}

341
textpre.py Normal file
View File

@ -0,0 +1,341 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
chars = {}
chars[""] = "$05"
chars[""] = "$06"
chars[""] = "$07"
chars[""] = "$08"
chars[""] = "$09"
chars[""] = "$0A"
chars[""] = "$0B"
chars[""] = "$0C"
chars[""] = "$0D"
chars[""] = "$0E"
chars[""] = "$0F"
chars[""] = "$10"
chars[""] = "$11"
chars[""] = "$12"
chars[""] = "$13"
chars[""] = "$19"
chars[""] = "$1A"
chars[""] = "$1B"
chars[""] = "$1C"
chars[""] = "$26"
chars[""] = "$27"
chars[""] = "$28"
chars[""] = "$29"
chars[""] = "$2A"
chars[""] = "$2B"
chars[""] = "$2C"
chars[""] = "$2D"
chars[""] = "$2E"
chars[""] = "$2F"
chars[""] = "$30"
chars[""] = "$31"
chars[""] = "$32"
chars[""] = "$33"
chars[""] = "$34"
chars[""] = "$3A"
chars[""] = "$3B"
chars[""] = "$3C"
chars[""] = "$3D"
chars[""] = "$3E"
chars[""] = "$40"
chars[""] = "$41"
chars[""] = "$42"
chars[""] = "$43"
chars[""] = "$44"
chars[""] = "$45"
chars[""] = "$46"
chars[""] = "$47"
chars[""] = "$48"
chars[""] = "$80"
chars[""] = "$81"
chars[""] = "$82"
chars[""] = "$83"
chars[""] = "$84"
chars[""] = "$85"
chars[""] = "$86"
chars[""] = "$87"
chars[""] = "$88"
chars[""] = "$89"
chars[""] = "$8A"
chars[""] = "$8B"
chars[""] = "$8C"
chars[""] = "$8D"
chars[""] = "$8E"
chars[""] = "$8F"
chars[""] = "$90"
chars[""] = "$91"
chars[""] = "$92"
chars[""] = "$93"
chars[""] = "$94"
chars[""] = "$95"
chars[""] = "$96"
chars[""] = "$97"
chars[""] = "$98"
chars[""] = "$99"
chars[""] = "$9A"
chars[""] = "$9B"
chars[""] = "$9C"
chars[""] = "$9D"
chars[""] = "$9E"
chars[""] = "$9F"
chars[""] = "$A0"
chars[""] = "$A1"
chars[""] = "$A2"
chars[""] = "$A3"
chars[""] = "$A4"
chars[""] = "$A5"
chars[""] = "$A6"
chars[""] = "$A7"
chars[""] = "$A8"
chars[""] = "$A9"
chars[""] = "$AA"
chars[""] = "$AB"
chars[""] = "$AC"
chars[""] = "$AD"
chars[""] = "$AE"
chars[""] = "$AF"
chars[""] = "$B0"
chars[""] = "$B1"
chars[""] = "$B2"
chars[""] = "$B3"
chars[""] = "$B4"
chars[""] = "$B5"
chars[""] = "$B6"
chars[""] = "$B7"
chars[""] = "$B8"
chars[""] = "$B9"
chars[""] = "$BA"
chars[""] = "$BB"
chars[""] = "$BC"
chars[""] = "$BD"
chars[""] = "$BE"
chars[""] = "$BF"
chars[""] = "$C0"
chars[""] = "$C1"
chars[""] = "$C2"
chars[""] = "$C3"
chars[""] = "$C4"
chars[""] = "$C5"
chars[""] = "$C6"
chars[""] = "$C7"
chars[""] = "$C8"
chars[""] = "$C9"
chars[""] = "$CA"
chars[""] = "$CB"
chars[""] = "$CC"
chars[""] = "$CD"
chars[""] = "$CE"
chars[""] = "$CF"
chars[""] = "$D0"
chars[""] = "$D1"
chars[""] = "$D2"
chars[""] = "$D3"
chars[""] = "$D4"
chars[""] = "$D5"
chars[""] = "$D6"
chars[""] = "$D7"
chars[""] = "$D8"
chars[""] = "$D9"
chars[""] = "$DA"
chars[""] = "$DB"
chars[""] = "$DC"
chars[""] = "$DD"
chars[""] = "$DE"
chars[""] = "$DF"
chars[""] = "$E0"
chars[""] = "$E1"
chars[""] = "$E2"
chars[""] = "$E3"
chars["@"] = "$50"
chars["#"] = "$54"
chars[""] = "$75"
# textbox borders
chars[""] = "$79"
chars[""] = "$7A"
chars[""] = "$7B"
chars[""] = "$7C"
chars[""] = "$7D"
chars[""] = "$7E"
chars[""] = "$74"
chars[" "] = "$7F"
chars["A"] = "$80"
chars["B"] = "$81"
chars["C"] = "$82"
chars["D"] = "$83"
chars["E"] = "$84"
chars["F"] = "$85"
chars["G"] = "$86"
chars["H"] = "$87"
chars["I"] = "$88"
chars["J"] = "$89"
chars["K"] = "$8A"
chars["L"] = "$8B"
chars["M"] = "$8C"
chars["N"] = "$8D"
chars["O"] = "$8E"
chars["P"] = "$8F"
chars["Q"] = "$90"
chars["R"] = "$91"
chars["S"] = "$92"
chars["T"] = "$93"
chars["U"] = "$94"
chars["V"] = "$95"
chars["W"] = "$96"
chars["X"] = "$97"
chars["Y"] = "$98"
chars["Z"] = "$99"
chars["("] = "$9A"
chars[")"] = "$9B"
chars[":"] = "$9C"
chars[";"] = "$9D"
chars["["] = "$9E"
chars["]"] = "$9F"
chars["a"] = "$A0"
chars["b"] = "$A1"
chars["c"] = "$A2"
chars["d"] = "$A3"
chars["e"] = "$A4"
chars["f"] = "$A5"
chars["g"] = "$A6"
chars["h"] = "$A7"
chars["i"] = "$A8"
chars["j"] = "$A9"
chars["k"] = "$AA"
chars["l"] = "$AB"
chars["m"] = "$AC"
chars["n"] = "$AD"
chars["o"] = "$AE"
chars["p"] = "$AF"
chars["q"] = "$B0"
chars["r"] = "$B1"
chars["s"] = "$B2"
chars["t"] = "$B3"
chars["u"] = "$B4"
chars["v"] = "$B5"
chars["w"] = "$B6"
chars["x"] = "$B7"
chars["y"] = "$B8"
chars["z"] = "$B9"
chars["Ä"] = "$C0"
chars["Ö"] = "$C1"
chars["Ü"] = "$C2"
chars["ä"] = "$C3"
chars["ö"] = "$C4"
chars["ü"] = "$C5"
chars["'d"] = "$D0"
chars["'l"] = "$D1"
chars["'m"] = "$D2"
chars["'r"] = "$D3"
chars["'s"] = "$D4"
chars["'t"] = "$D5"
chars["'v"] = "$D6"
chars["'"] = "$E0"
chars["-"] = "$E3"
chars["?"] = "$E6"
chars["!"] = "$E7"
chars["."] = "$E8"
chars["&"] = "$E9"
chars["é"] = "$EA"
chars[""] = "$EB"
chars[""] = "$EF"
chars["¥"] = "$F0"
chars["×"] = "$F1"
chars["/"] = "$F3"
chars[","] = "$F4"
chars[""] = "$F5"
chars["0"] = "$F6"
chars["1"] = "$F7"
chars["2"] = "$F8"
chars["3"] = "$F9"
chars["4"] = "$FA"
chars["5"] = "$FB"
chars["6"] = "$FC"
chars["7"] = "$FD"
chars["8"] = "$FE"
chars["9"] = "$FF"
for l in sys.stdin:
# strip comments
line = l.partition(";")
i = 0
asm = ""
while i < len(line) and l[0] != ";":
asm = asm + line[i]
i = i + 1
# skip asm with no quotes
if "\"" not in asm:
sys.stdout.write(l)
continue
# split by quotes
asms = asm.split("\"")
# skip asm that actually does use ASCII in quotes
lowasm = asms[0].lower()
if "section" in lowasm \
or "include" in lowasm \
or "incbin" in lowasm:
sys.stdout.write(l)
continue
even = False
i = 0
for token in asms:
i = i + 1
if even:
# token is a string to convert to byte values
while len(token):
# read a single UTF-8 codepoint
char = token[0]
if ord(char) >= 0xFC:
char = char + token[1:6]
token = token[6:]
elif ord(char) >= 0xF8:
char = char + token[1:5]
token = token[5:]
elif ord(char) >= 0xF0:
char = char + token[1:4]
token = token[4:]
elif ord(char) >= 0xE0:
char = char + token[1:3]
token = token[3:]
elif ord(char) >= 0xC0:
char = char + token[1:2]
token = token[2:]
else:
token = token[1:]
# certain apostrophe-letter pairs are only a single byte
if char == "'" and \
(token[0] == "d" or \
token[0] == "l" or \
token[0] == "m" or \
token[0] == "r" or \
token[0] == "s" or \
token[0] == "t" or \
token[0] == "v"):
char = char + token[0]
token = token[1:]
sys.stdout.write(chars[char])
if len(token):
sys.stdout.write(", ")
else:
sys.stdout.write(token)
even = not even