# ***** BEGIN LICENSE BLOCK ***** # Version: MPL 1.1/GPL 2.0/LGPL 2.1 # # The contents of this file are subject to the Mozilla Public License Version # 1.1 (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # http://www.mozilla.org/MPL/ # # Software distributed under the License is distributed on an "AS IS" basis, # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License # for the specific language governing rights and limitations under the # License. # # The Original Code is l10n test automation. # # The Initial Developer of the Original Code is # Mozilla Foundation # Portions created by the Initial Developer are Copyright (C) 2006 # the Initial Developer. All Rights Reserved. # # Contributor(s): # Axel Hecht # # Alternatively, the contents of this file may be used under the terms of # either the GNU General Public License Version 2 or later (the "GPL"), or # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), # in which case the provisions of the GPL or the LGPL are applicable instead # of those above. If you wish to allow use of your version of this file only # under the terms of either the GPL or the LGPL, and not to allow others to # use your version of this file under the terms of the MPL, indicate your # decision by deleting the provisions above and replace them with the notice # and other provisions required by the GPL or the LGPL. If you do not delete # the provisions above, a recipient may use your version of this file under # the terms of any one of the MPL, the GPL or the LGPL. # # ***** END LICENSE BLOCK ***** import re import codecs import logging __constructors = [] class Parser: def __init__(self): if not hasattr(self, 'encoding'): self.encoding = 'utf-8'; pass def read(self, file): f = codecs.open(file, 'r', self.encoding) try: self.contents = f.read() except UnicodeDecodeError, e: logging.getLogger('locales').error("Can't read file: " + file + '; ' + str(e)) self.contents = u'' f.close() def parse(self, contents): (self.contents, length) = codecs.getdecoder(self.encoding)(contents) def mapping(self): m = {} for p in self: m[p[0]] = p[1] return m def compareEntries(self, en, l10n): return None def postProcessValue(self, val): return val def __iter__(self): self.offset = 0 return self def next(self): cm = self.comment.search(self.contents, self.offset) m = self.key.search(self.contents, self.offset) if not m: raise StopIteration # eagerly strip comments if cm and cm.start() < m.start(): self.offset = cm.end() return self.next() self.offset = m.end() return (m.group(1), self.postProcessValue(m.group(2))) def getParser(path): for item in __constructors: if re.search(item[0], path): return item[1] raise UserWarning, "Cannot find Parser" class DTDParser(Parser): def __init__(self): # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar #":" | [A-Z] | "_" | [a-z] | # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | # [#x10000-#xEFFFF] NameStartChar = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \ u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF'+\ u'\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD' # + \U00010000-\U000EFFFF seems to be unsupported in python # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | # [#x0300-#x036F] | [#x203F-#x2040] NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040' Name = '[' + NameStartChar + '][' + NameChar + ']*' self.key = re.compile('', re.S) self.comment = re.compile('', re.S) Parser.__init__(self) class PropertiesParser(Parser): def __init__(self): self.key = re.compile('^\s*([^#!\s\r\n][^=:\r\n]*?)\s*[:=][ \t]*(.*?)[ \t]*$',re.M) self.comment = re.compile('^\s*[#!].*$',re.M) self._post = re.compile('\\\\u([0-9a-fA-F]{4})') Parser.__init__(self) _arg_re = re.compile('%(?:(?P[0-9]+)\$)?(?P[0-9]+)?(?:.(?P[0-9]+))?(?P[hL]|(?:ll?))?(?P[dciouxXefgpCSsn])') def postProcessValue(self, val): m = self._post.search(val) if not m: return val while m: uChar = unichr(int(m.group(1), 16)) val = val.replace(m.group(), uChar) m = self._post.search(val) return val class DefinesParser(Parser): def __init__(self): self.key = re.compile('^#define\s+(\w+)\s*(.*?)\s*$',re.M) self.comment = re.compile('^#[^d][^e][^f][^i][^n][^e][^\s].*$',re.M) Parser.__init__(self) class BookmarksParser(Parser): def __init__(self): Parser.__init__(self) def __iter__(self): def getProps(f, base): r = [] if f.has_key(u'__title__'): r.append((base+'.title', f[u'__title__'])) if f.has_key(u'__desc__'): r.append((base+'.desc',f[u'__desc__'])) for i in range(len(f[u'children'])): c = f[u'children'][i] if c.has_key(u'children'): r += getProps(c, base + '.' + str(i)) else: for k,v in c.iteritems(): if k == u'HREF': r.append(('%s.%i.href'%(base, i), v)) if k == u'FEEDURL': r.append(('%s.%i.feedurl'%(base, i), v)) elif k == u'__title__': r.append(('%s.%i.title'%(base, i), v)) elif k == u'ICON': r.append(('%s.%i.icon'%(base, i), v)) return r return getProps(self.getDetails(), 'bookmarks').__iter__() def next(self): raise NotImplementedError def getDetails(self): def parse_link(f, line): link = {} for m in re.finditer('(?P[A-Z]+)="(?P[^"]+)', line): link[m.group('key').upper()] = m.group('val') m = re.search('"[ ]*>([^<]+)', line, re.I) link[u'__title__'] = m.group(1) f[u'children'].append(link) stack = [] f = {u'children': []} nextTitle = None nextDesc = None for ln in self.contents.splitlines(): if ln.find('= 0: stack.append(f) chld = {u'children':[], u'__title__': nextTitle, u'__desc__': nextDesc} nextTitle = None nextDesc = None f[u'children'].append(chld) f = chld elif re.search('([^<]+)') >= 0: if nextDesc: pass nextDesc = re.search('DD>([^<]+)', ln).group(1) elif ln.find('
= 0: parse_link(f, ln) elif ln.find('') >= 0: f = stack.pop() elif ln.find('= 0: f[u'__title__'] = re.search('>([^<]+)