from collections import namedtuple import libfoolang from libfoolang import _py2to3 print('main.py: Running...') ctx = libfoolang.AnalysisContext('iso-8859-1') u = ctx.get_from_buffer('foo.txt', b'') def unirepr(value): return (_py2to3.text_repr(value) if isinstance(value, _py2to3.text_type) else _py2to3.bytes_repr(value)) def get_from_buffer(buffer, charset): ctx.get_from_buffer('foo.txt', buffer, charset) def reparse(buffer, charset): u.reparse(buffer, charset) # Check that get_from_buffer/reparse correctly process buffer/charset # arguments: # # * either buffer is a bytes string, then the charset argument (if provided) or # the context-wide charset (iso-8859-1, see above) is used to decode it. # # * either buffer is a Unicode string, then the charset argument must be null # (None or empty string). Testcase = namedtuple('Testcase', 'buffer charset') testcases = [ Testcase(u'example # H\xe9llo', None), Testcase(u'example # H\xeallo', ''), Testcase(u'example # H\xebllo', 'utf-8'), Testcase(b'example # H\xe9llo', None), Testcase(b'example # H\xeallo', ''), Testcase(b'example # H\xebllo', 'iso-8859-1'), Testcase(b'example # H\xecllo', 'utf-8'), Testcase(b'example # H\xecllo', 'unknown-charset'), # Check that successfully parsing a unit with one encoding (UTF-8) has no # influence on the default encoding used later. Testcase(b'example # H\xc3\xa9llo', 'utf-8'), Testcase(b'example # H\xc3\xa9llo', None), ] for method in (get_from_buffer, reparse): print('== {} =='.format(method.__name__)) print('') for tc in testcases: try: method(tc.buffer, tc.charset) except Exception as exc: result = '{}: {}'.format(type(exc).__name__, exc) else: if u.diagnostics: result = '\n'.join(['diagnostics:'] + [' {}'.format(d) for d in u.diagnostics]) else: result = _py2to3.text_repr(u.text) print(' buffer={}, charset={}: {}'.format( unirepr(tc.buffer), repr(tc.charset), result, )) print('main.py: Done.')