# -*- coding: utf-8 -*- import ply.lex as lex import sys, os FILENAME = '' # Current filename _tokens = ('STRING', 'NEWLINE', 'LABEL', 'ID', 'COMMA', 'PLUS', 'MINUS', 'LP', 'RP', 'MUL', 'DIV', 'POW', 'UMINUS', 'APO', 'INTEGER', 'ADDR', 'RB', 'LB', 'LOCALLABEL', 'LSHIFT', 'RSHIFT', 'BITWISE_OR', 'BITWISE_AND', 'LOGICAL_NOT', 'BITWISE_COMPLEMENT', ) reserved_instructions = { 'adc': 'ADC', 'add': 'ADD', 'and': 'AND', 'bit': 'BIT', 'call': 'CALL', 'ccf': 'CCF', 'cp': 'CP', 'cpd': 'CPD', 'cpdr': 'CPDR', 'cpi': 'CPI', 'cpir': 'CPIR', 'cpl': 'CPL', 'daa': 'DAA', 'dec': 'DEC', 'di': 'DI', 'djnz': 'DJNZ', 'ei': 'EI', 'ex': 'EX', 'exx': 'EXX', 'halt': 'HALT', 'im': 'IM', 'in': 'IN', 'inc': 'INC', 'ind': 'IND', 'indr': 'INDR', 'ini': 'INI', 'inir': 'INIR', 'jp': 'JP', 'jr': 'JR', 'ld': 'LD', 'ldd': 'LDD', 'lddr': 'LDDR', 'ldi': 'LDI', 'ldir': 'LDIR', 'neg': 'NEG', 'nop': 'NOP', 'or': 'OR', 'otdr': 'OTDR', 'otir': 'OTIR', 'out': 'OUT', 'outd': 'OUTD', 'outi': 'OUTI', 'pop': 'POP', 'push': 'PUSH', 'res': 'RES', 'ret': 'RET', 'reti': 'RETI', 'retn': 'RETN', 'rl': 'RL', 'rla': 'RLA', 'rlc': 'RLC', 'rlca': 'RLCA', 'rld': 'RLD', 'rr': 'RR', 'rra': 'RRA', 'rrc': 'RRC', 'rrca': 'RRCA', 'rrd': 'RRD', 'rst': 'RST', 'sbc': 'SBC', 'scf': 'SCF', 'set': 'SET', 'sla': 'SLA', 'sll': 'SLL', 'sra': 'SRA', 'srl': 'SRL', 'sub': 'SUB', 'xor': 'XOR', } pseudo = { # pseudo ops 'align': 'ALIGN', 'org': 'ORG', 'defb': 'DEFB', 'defm': 'DEFB', 'db' : 'DEFB', 'defs': 'DEFS', 'defw': 'DEFW', 'ds' : 'DEFS', 'dw' : 'DEFW', 'equ': 'EQU', 'proc': 'PROC', 'endp': 'ENDP', 'local': 'LOCAL', 'end': 'END', 'incbin': 'INCBIN' } regs8 = {'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'h': 'H', 'l': 'L', 'i': 'I', 'r': 'R', 'ixh': 'IXH', 'ixl': 'IXL', 'iyh': 'IYH', 'iyl': 'IYL' } regs16 = { 'af': 'AF', 'bc': 'BC', 'de': 'DE', 'hl': 'HL', 'ix': 'IX', 'iy': 'IY', 'sp': 'SP' } flags = { 'z' : 'Z', 'nz' : 'NZ', 'nc' : 'NC', 'po' : 'PO', 'pe' : 'PE', 'p' : 'P', 'm' : 'M', } preprocessor = { 'init' : '_INIT', 'line' : '_LINE' } # List of token names. _tokens = _tokens \ + tuple(reserved_instructions.values()) \ + tuple(pseudo.values()) \ + tuple(regs8.values()) \ + tuple(regs16.values()) \ + tuple(flags.values()) \ + tuple(preprocessor.values()) def get_uniques(l): ''' Returns a list with no repeated elements. ''' result = [] for i in l: if i not in result: result.append(i) return result tokens = get_uniques(_tokens) class Lexer(object): ''' Own class lexer to allow multiple instances. This lexer is just a wrapper of the current FILESTACK[-1] lexer ''' states = ( ('preproc', 'exclusive'), ) # -------------- TOKEN ACTIONS -------------- def __set_lineno(self, value): ''' Setter for lexer.lineno ''' self.lex.lineno = value def __get_lineno(self): ''' Getter for lexer.lineno ''' if self.lex is None: return 0 return self.lex.lineno lineno = property(__get_lineno, __set_lineno) def t_INITIAL_preproc_skip(self, t): r'[ \t]+' pass # Ignore whitespaces and tabs def t_CHAR(self, t): r"'.'" # A single char t.value = ord(t.value[1]) t.type = 'INTEGER' return t def t_HEXA(self, t): r'([0-9][0-9a-fA-F]*[hH])|(\$[0-9a-fA-F]+)' if t.value[0] == '$': t.value = t.value[1:] # Remove initial '$' else: t.value = t.value[:-1] # Remove last 'h' t.value = int(t.value, 16) # Convert to decimal t.type = 'INTEGER' return t def t_BIN(self, t): r'(%[01]+)|([01]+[bB])' # A Binary integer # Note 00B is a 0 binary, but # 00Bh is a 12 in hex. So this pattern must come # after HEXA if t.value[0] == '%': t.value = t.value[1:] # Remove initial % else: t.value = t.value[:-1] # Remove last 'b' t.value = int(t.value, 2) # Convert to decimal t.type = 'INTEGER' return t def t_INITIAL_preproc_INTEGER(self, t): r'[0-9]+' # an integer decimal number t.value = int(t.value) return t def t_INITIAL_ID(self, t): r'[_a-zA-Z.]([.]?[_a-zA-Z0-9\\@\#]+)*[:]?(\\\W)?' # Any identifier tmp = t.value # Saves original value if tmp[-1] == ':': t.type = 'LABEL' t.value = tmp[:-1] return t if tmp[0] == "." and (tmp[-2:] == "\@" or tmp[-3:] == "\@:"): t.type = "LOCALLABEL" t.value = tmp[1:] return t t.value = tmp.upper() # Convert it to uppercase, since our internal tables uses uppercase id = tmp.lower() t.type = reserved_instructions.get(id) if t.type is not None: return t t.type = pseudo.get(id) if t.type is not None: return t t.type = regs8.get(id) if t.type is not None: return t t.type = flags.get(id) if t.type is not None: return t t.type = regs16.get(id, 'ID') if t.type == 'ID': t.value = tmp # Restores original value return t def t_preproc_ID(self, t): r'[_a-zA-Z][_a-zA-Z0-9]*' # preprocessor directives t.type = preprocessor.get(t.value.lower(), 'ID') return t def t_COMMA(self, t): r',' return t def t_ADDR(self, t): r'\$' return t def t_LP(self, t): r'\(' return t def t_RP(self, t): r'\)' return t def t_RB(self, t): r'\[' return t def t_LB(self, t): r'\]' return t def t_LSHIFT(self, t): r'<<' return t def t_RSHIFT(self, t): r'>>' return t def t_BITWISE_OR(self, t): r'\|' return t def t_BITWISE_AND(self, t): r'\&' return t def t_BITWISE_COMPLEMENT(self, t): r'~' return t def t_LOGICAL_NOT(self, t): r'\!' return t def t_PLUS(self, t): r'\+' return t def t_MINUS(self, t): r'\-' return t def t_MUL(self, t): r'\*' return t def t_DIV(self, t): r'\/' return t def t_POW(self, t): r'\^' return t def t_APO(self, t): r"'" return t def t_INITIAL_preproc_STRING(self, t): r'"[^"]*"' # a doubled quoted string t.value = t.value[1:-1] # Remove quotes return t def t_INITIAL_preproc_error(self, t): ''' error handling rule ''' self.error("illegal character '%s'" % t.value[0]) def t_INITIAL_preproc_CONTINUE(self, t): r'\\\r?\n' t.lexer.lineno += 1 # Allows line breaking def t_COMMENT(self, t): r';.*' # Skip to end of line (except end of line) def t_INITIAL_preproc_NEWLINE(self, t): r'\r?\n' t.lexer.lineno += 1 t.lexer.begin('INITIAL') return t def t_INITIAL_SHARP(self, t): r'\#' if self.find_column(t) == 1: t.lexer.begin('preproc') else: self.error("illegal character '%s'" % t.value[0]) def __init__(self): ''' Creates a new GLOBAL lexer instance ''' self.lex = None self.filestack = [] # Current filename, and line number being parsed self.input_data = '' self.tokens = tokens self.next_token = None # if set to something, this will be returned once def input(self, str): ''' Defines input string, removing current lexer. ''' self.input_data = str self.lex = lex.lex(object = self) self.lex.input(self.input_data) def token(self): return self.lex.token() def find_column(self, token): ''' Compute column: - token is a token instance ''' i = token.lexpos while i > 0: if self.input_data[i - 1] == '\n': break i -= 1 column = token.lexpos - i + 1 return column def msg(self, str): ''' Prints an error msg. ''' #print '%s:%i %s' % (FILENAME, self.lex.lineno, str) print '%s:%s %s' % (FILENAME, "?", str) def error(self, str): ''' Prints an error msg, and exits. ''' self.msg('Error: %s' % str) sys.exit(1) def warning(self, str): ''' Emmits a warning and continue execution. ''' self.msg('Warning: %s' % str) # Needed for states tmp = lex.lex(object = Lexer(), lextab = 'zxbasmlextab') if __name__ == '__main__': FILENAME = sys.argv[1] tmp.input(open(sys.argv[1]).read()) tok = tmp.token() while tok: print tok tok = tmp.token()