""" This is a very primitive line based preprocessor, for times when using a C preprocessor isn't an option. """ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import sys import os import os.path import re from optparse import OptionParser import errno # hack around win32 mangling our line endings # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65443 if sys.platform == "win32": import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) os.linesep = '\n' import Expression __all__ = ['Preprocessor', 'preprocess'] class Preprocessor: """ Class for preprocessing text files. """ class Error(RuntimeError): def __init__(self, cpp, MSG, context): self.file = cpp.context['FILE'] self.line = cpp.context['LINE'] self.key = MSG RuntimeError.__init__(self, (self.file, self.line, self.key, context)) def __init__(self): self.context = Expression.Context() for k,v in {'FILE': '', 'LINE': 0, 'DIRECTORY': os.path.abspath('.')}.iteritems(): self.context[k] = v self.actionLevel = 0 self.disableLevel = 0 # ifStates can be # 0: hadTrue # 1: wantsTrue # 2: #else found self.ifStates = [] self.checkLineNumbers = False self.writtenLines = 0 self.filters = [] self.cmds = {} for cmd, level in {'define': 0, 'undef': 0, 'if': sys.maxint, 'ifdef': sys.maxint, 'ifndef': sys.maxint, 'else': 1, 'elif': 1, 'elifdef': 1, 'elifndef': 1, 'endif': sys.maxint, 'expand': 0, 'literal': 0, 'filter': 0, 'unfilter': 0, 'include': 0, 'includesubst': 0, 'error': 0}.iteritems(): self.cmds[cmd] = (level, getattr(self, 'do_' + cmd)) self.out = sys.stdout self.setMarker('#') self.LE = '\n' self.varsubst = re.compile('@(?P\w+)@', re.U) def warnUnused(self, file): if self.actionLevel == 0: sys.stderr.write('{0}: WARNING: no preprocessor directives found\n'.format(file)) elif self.actionLevel == 1: sys.stderr.write('{0}: WARNING: no useful preprocessor directives found\n'.format(file)) pass def setLineEndings(self, aLE): """ Set the line endings to be used for output. """ self.LE = {'cr': '\x0D', 'lf': '\x0A', 'crlf': '\x0D\x0A'}[aLE] def setMarker(self, aMarker): """ Set the marker to be used for processing directives. Used for handling CSS files, with pp.setMarker('%'), for example. The given marker may be None, in which case no markers are processed. """ self.marker = aMarker if aMarker: self.instruction = re.compile('{0}(?P[a-z]+)(?:\s(?P.*))?$' .format(aMarker), re.U) self.comment = re.compile(aMarker, re.U) else: class NoMatch(object): def match(self, *args): return False self.instruction = self.comment = NoMatch() def clone(self): """ Create a clone of the current processor, including line ending settings, marker, variable definitions, output stream. """ rv = Preprocessor() rv.context.update(self.context) rv.setMarker(self.marker) rv.LE = self.LE rv.out = self.out return rv def applyFilters(self, aLine): for f in self.filters: aLine = f[1](aLine) return aLine def write(self, aLine): """ Internal method for handling output. """ if self.checkLineNumbers: self.writtenLines += 1 ln = self.context['LINE'] if self.writtenLines != ln: self.out.write('//@line {line} "{file}"{le}'.format(line=ln, file=self.context['FILE'], le=self.LE)) self.writtenLines = ln filteredLine = self.applyFilters(aLine) if filteredLine != aLine: self.actionLevel = 2 # ensure our line ending. Only need to handle \n, as we're reading # with universal line ending support, at least for files. filteredLine = re.sub('\n', self.LE, filteredLine) self.out.write(filteredLine) def handleCommandLine(self, args, defaultToStdin = False): """ Parse a commandline into this parser. Uses OptionParser internally, no args mean sys.argv[1:]. """ p = self.getCommandLineParser() (options, args) = p.parse_args(args=args) includes = options.I if options.output: dir = os.path.dirname(options.output) if dir and not os.path.exists(dir): try: os.makedirs(dir) except OSError as error: if error.errno != errno.EEXIST: raise self.out = open(options.output, 'w') if defaultToStdin and len(args) == 0: args = [sys.stdin] includes.extend(args) if includes: for f in includes: self.do_include(f, False) self.warnUnused(f) pass def getCommandLineParser(self, unescapeDefines = False): escapedValue = re.compile('".*"$') numberValue = re.compile('\d+$') def handleE(option, opt, value, parser): for k,v in os.environ.iteritems(): self.context[k] = v def handleD(option, opt, value, parser): vals = value.split('=', 1) if len(vals) == 1: vals.append(1) elif unescapeDefines and escapedValue.match(vals[1]): # strip escaped string values vals[1] = vals[1][1:-1] elif numberValue.match(vals[1]): vals[1] = int(vals[1]) self.context[vals[0]] = vals[1] def handleU(option, opt, value, parser): del self.context[value] def handleF(option, opt, value, parser): self.do_filter(value) def handleLE(option, opt, value, parser): self.setLineEndings(value) def handleMarker(option, opt, value, parser): self.setMarker(value) p = OptionParser() p.add_option('-I', action='append', type="string", default = [], metavar="FILENAME", help='Include file') p.add_option('-E', action='callback', callback=handleE, help='Import the environment into the defined variables') p.add_option('-D', action='callback', callback=handleD, type="string", metavar="VAR[=VAL]", help='Define a variable') p.add_option('-U', action='callback', callback=handleU, type="string", metavar="VAR", help='Undefine a variable') p.add_option('-F', action='callback', callback=handleF, type="string", metavar="FILTER", help='Enable the specified filter') p.add_option('-o', '--output', type="string", default=None, metavar="FILENAME", help='Output to the specified file '+ 'instead of stdout') p.add_option('--line-endings', action='callback', callback=handleLE, type="string", metavar="[cr|lr|crlf]", help='Use the specified line endings [Default: OS dependent]') p.add_option('--marker', action='callback', callback=handleMarker, type="string", help='Use the specified marker instead of #') return p def handleLine(self, aLine): """ Handle a single line of input (internal). """ if self.actionLevel == 0 and self.comment.match(aLine): self.actionLevel = 1 m = self.instruction.match(aLine) if m: args = None cmd = m.group('cmd') try: args = m.group('args') except IndexError: pass if cmd not in self.cmds: raise Preprocessor.Error(self, 'INVALID_CMD', aLine) level, cmd = self.cmds[cmd] if (level >= self.disableLevel): cmd(args) if cmd != 'literal': self.actionLevel = 2 elif self.disableLevel == 0 and not self.comment.match(aLine): self.write(aLine) pass # Instruction handlers # These are named do_'instruction name' and take one argument # Variables def do_define(self, args): m = re.match('(?P\w+)(?:\s(?P.*))?', args, re.U) if not m: raise Preprocessor.Error(self, 'SYNTAX_DEF', args) val = 1 if m.group('value'): val = self.applyFilters(m.group('value')) try: val = int(val) except: pass self.context[m.group('name')] = val def do_undef(self, args): m = re.match('(?P\w+)$', args, re.U) if not m: raise Preprocessor.Error(self, 'SYNTAX_DEF', args) if args in self.context: del self.context[args] # Logic def ensure_not_else(self): if len(self.ifStates) == 0 or self.ifStates[-1] == 2: sys.stderr.write('WARNING: bad nesting of #else\n') def do_if(self, args, replace=False): if self.disableLevel and not replace: self.disableLevel += 1 return val = None try: e = Expression.Expression(args) val = e.evaluate(self.context) except Exception: # XXX do real error reporting raise Preprocessor.Error(self, 'SYNTAX_ERR', args) if type(val) == str: # we're looking for a number value, strings are false val = False if not val: self.disableLevel = 1 if replace: if val: self.disableLevel = 0 self.ifStates[-1] = self.disableLevel else: self.ifStates.append(self.disableLevel) pass def do_ifdef(self, args, replace=False): if self.disableLevel and not replace: self.disableLevel += 1 return if re.match('\W', args, re.U): raise Preprocessor.Error(self, 'INVALID_VAR', args) if args not in self.context: self.disableLevel = 1 if replace: if args in self.context: self.disableLevel = 0 self.ifStates[-1] = self.disableLevel else: self.ifStates.append(self.disableLevel) pass def do_ifndef(self, args, replace=False): if self.disableLevel and not replace: self.disableLevel += 1 return if re.match('\W', args, re.U): raise Preprocessor.Error(self, 'INVALID_VAR', args) if args in self.context: self.disableLevel = 1 if replace: if args not in self.context: self.disableLevel = 0 self.ifStates[-1] = self.disableLevel else: self.ifStates.append(self.disableLevel) pass def do_else(self, args, ifState = 2): self.ensure_not_else() hadTrue = self.ifStates[-1] == 0 self.ifStates[-1] = ifState # in-else if hadTrue: self.disableLevel = 1 return self.disableLevel = 0 def do_elif(self, args): if self.disableLevel == 1: if self.ifStates[-1] == 1: self.do_if(args, replace=True) else: self.do_else(None, self.ifStates[-1]) def do_elifdef(self, args): if self.disableLevel == 1: if self.ifStates[-1] == 1: self.do_ifdef(args, replace=True) else: self.do_else(None, self.ifStates[-1]) def do_elifndef(self, args): if self.disableLevel == 1: if self.ifStates[-1] == 1: self.do_ifndef(args, replace=True) else: self.do_else(None, self.ifStates[-1]) def do_endif(self, args): if self.disableLevel > 0: self.disableLevel -= 1 if self.disableLevel == 0: self.ifStates.pop() # output processing def do_expand(self, args): lst = re.split('__(\w+)__', args, re.U) do_replace = False def vsubst(v): if v in self.context: return str(self.context[v]) return '' for i in range(1, len(lst), 2): lst[i] = vsubst(lst[i]) lst.append('\n') # add back the newline self.write(reduce(lambda x, y: x+y, lst, '')) def do_literal(self, args): self.write(args + self.LE) def do_filter(self, args): filters = [f for f in args.split(' ') if hasattr(self, 'filter_' + f)] if len(filters) == 0: return current = dict(self.filters) for f in filters: current[f] = getattr(self, 'filter_' + f) filterNames = current.keys() filterNames.sort() self.filters = [(fn, current[fn]) for fn in filterNames] return def do_unfilter(self, args): filters = args.split(' ') current = dict(self.filters) for f in filters: if f in current: del current[f] filterNames = current.keys() filterNames.sort() self.filters = [(fn, current[fn]) for fn in filterNames] return # Filters # # emptyLines # Strips blank lines from the output. def filter_emptyLines(self, aLine): if aLine == '\n': return '' return aLine # slashslash # Strips everything after // def filter_slashslash(self, aLine): if (aLine.find('//') == -1): return aLine [aLine, rest] = aLine.split('//', 1) if rest: aLine += '\n' return aLine # spaces # Collapses sequences of spaces into a single space def filter_spaces(self, aLine): return re.sub(' +', ' ', aLine).strip(' ') # substition # helper to be used by both substition and attemptSubstitution def filter_substitution(self, aLine, fatal=True): def repl(matchobj): varname = matchobj.group('VAR') if varname in self.context: return str(self.context[varname]) if fatal: raise Preprocessor.Error(self, 'UNDEFINED_VAR', varname) return matchobj.group(0) return self.varsubst.sub(repl, aLine) def filter_attemptSubstitution(self, aLine): return self.filter_substitution(aLine, fatal=False) # File ops def do_include(self, args, filters=True): """ Preprocess a given file. args can either be a file name, or a file-like object. Files should be opened, and will be closed after processing. """ isName = type(args) == str or type(args) == unicode oldWrittenLines = self.writtenLines oldCheckLineNumbers = self.checkLineNumbers self.checkLineNumbers = False if isName: try: args = str(args) if filters: args = self.applyFilters(args) if not os.path.isabs(args): args = os.path.join(self.context['DIRECTORY'], args) args = open(args, 'rU') except Preprocessor.Error: raise except: raise Preprocessor.Error(self, 'FILE_NOT_FOUND', str(args)) self.checkLineNumbers = bool(re.search('\.(js|jsm|java)(?:\.in)?$', args.name)) oldFile = self.context['FILE'] oldLine = self.context['LINE'] oldDir = self.context['DIRECTORY'] if args.isatty(): # we're stdin, use '-' and '' for file and dir self.context['FILE'] = '-' self.context['DIRECTORY'] = '' else: abspath = os.path.abspath(args.name) self.context['FILE'] = abspath self.context['DIRECTORY'] = os.path.dirname(abspath) self.context['LINE'] = 0 self.writtenLines = 0 for l in args: self.context['LINE'] += 1 self.handleLine(l) args.close() self.context['FILE'] = oldFile self.checkLineNumbers = oldCheckLineNumbers self.writtenLines = oldWrittenLines self.context['LINE'] = oldLine self.context['DIRECTORY'] = oldDir def do_includesubst(self, args): args = self.filter_substitution(args) self.do_include(args) def do_error(self, args): raise Preprocessor.Error(self, 'Error: ', str(args)) def main(): pp = Preprocessor() pp.handleCommandLine(None, True) return def preprocess(includes=[sys.stdin], defines={}, output = sys.stdout, line_endings='\n', marker='#'): pp = Preprocessor() pp.context.update(defines) pp.setLineEndings(line_endings) pp.setMarker(marker) pp.out = output for f in includes: pp.do_include(f, False) if __name__ == "__main__": main()