# Copyright (c) 2008-2015 testtools developers. See LICENSE for details. """Compatibility support for python 2 and 3.""" __metaclass__ = type __all__ = [ '_b', '_u', 'advance_iterator', 'BytesIO', 'classtypes', 'istext', 'str_is_unicode', 'StringIO', 'reraise', 'unicode_output_stream', 'text_or_bytes', ] import codecs import io import locale import os import re import sys import traceback import unicodedata from extras import try_import, try_imports BytesIO = try_imports(['StringIO.StringIO', 'io.BytesIO']) StringIO = try_imports(['StringIO.StringIO', 'io.StringIO']) # To let setup.py work, make this a conditional import. linecache = try_import('linecache2') try: from testtools import _compat2x as _compat except SyntaxError: from testtools import _compat3x as _compat reraise = _compat.reraise __u_doc = """A function version of the 'u' prefix. This is needed becayse the u prefix is not usable in Python 3 but is required in Python 2 to get a unicode object. To migrate code that was written as u'\u1234' in Python 2 to 2+3 change it to be _u('\u1234'). The Python 3 interpreter will decode it appropriately and the no-op _u for Python 3 lets it through, in Python 2 we then call unicode-escape in the _u function. """ if sys.version_info > (3, 0): import builtins def _u(s): return s _r = ascii def _b(s): """A byte literal.""" return s.encode("latin-1") advance_iterator = next # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code. def istext(x): return isinstance(x, str) def classtypes(): return (type,) str_is_unicode = True text_or_bytes = (str, bytes) else: import __builtin__ as builtins def _u(s): # The double replace mangling going on prepares the string for # unicode-escape - \foo is preserved, \u and \U are decoded. return (s.replace("\\", "\\\\").replace("\\\\u", "\\u") .replace("\\\\U", "\\U").decode("unicode-escape")) _r = repr def _b(s): return s advance_iterator = lambda it: it.next() def istext(x): return isinstance(x, basestring) def classtypes(): import types return (type, types.ClassType) str_is_unicode = sys.platform == "cli" text_or_bytes = (unicode, str) _u.__doc__ = __u_doc # GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces, # there should be better ways to write code needing this. if not issubclass(getattr(builtins, "bytes", str), str): def _isbytes(x): return isinstance(x, bytes) else: # Never return True on Pythons that provide the name but not the real type def _isbytes(x): return False def _slow_escape(text): """Escape unicode ``text`` leaving printable characters unmodified The behaviour emulates the Python 3 implementation of repr, see unicode_repr in unicodeobject.c and isprintable definition. Because this iterates over the input a codepoint at a time, it's slow, and does not handle astral characters correctly on Python builds with 16 bit rather than 32 bit unicode type. """ output = [] for c in text: o = ord(c) if o < 256: if o < 32 or 126 < o < 161: output.append(c.encode("unicode-escape")) elif o == 92: # Separate due to bug in unicode-escape codec in Python 2.4 output.append("\\\\") else: output.append(c) else: # To get correct behaviour would need to pair up surrogates here if unicodedata.category(c)[0] in "CZ": output.append(c.encode("unicode-escape")) else: output.append(c) return "".join(output) def text_repr(text, multiline=None): """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``. """ is_py3k = sys.version_info > (3, 0) nl = _isbytes(text) and bytes((0xA,)) or "\n" if multiline is None: multiline = nl in text if not multiline and (is_py3k or not str_is_unicode and type(text) is str): # Use normal repr for single line of unicode on Python 3 or bytes return repr(text) prefix = repr(text[:0])[:-2] if multiline: # To escape multiline strings, split and process each line in turn, # making sure that quotes are not escaped. if is_py3k: offset = len(prefix) + 1 lines = [] for l in text.split(nl): r = repr(l) q = r[-1] lines.append(r[offset:-1].replace("\\" + q, q)) elif not str_is_unicode and isinstance(text, str): lines = [l.encode("string-escape").replace("\\'", "'") for l in text.split("\n")] else: lines = [_slow_escape(l) for l in text.split("\n")] # Combine the escaped lines and append two of the closing quotes, # then iterate over the result to escape triple quotes correctly. _semi_done = "\n".join(lines) + "''" p = 0 while True: p = _semi_done.find("'''", p) if p == -1: break _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]]) p += 2 return "".join([prefix, "'''\\\n", _semi_done, "'"]) escaped_text = _slow_escape(text) # Determine which quote character to use and if one gets prefixed with a # backslash following the same logic Python uses for repr() on strings quote = "'" if "'" in text: if '"' in text: escaped_text = escaped_text.replace("'", "\\'") else: quote = '"' return "".join([prefix, quote, escaped_text, quote]) def unicode_output_stream(stream): """Get wrapper for given stream that writes any unicode without exception Characters that can't be coerced to the encoding of the stream, or 'ascii' if valid encoding is not found, will be replaced. The original stream may be returned in situations where a wrapper is determined unneeded. The wrapper only allows unicode to be written, not non-ascii bytestrings, which is a good thing to ensure sanity and sanitation. """ if (sys.platform == "cli" or isinstance(stream, (io.TextIOWrapper, io.StringIO))): # Best to never encode before writing in IronPython, or if it is # already a TextIO [which in the io library has no encoding # attribute). return stream try: writer = codecs.getwriter(stream.encoding or "") except (AttributeError, LookupError): return codecs.getwriter("ascii")(stream, "replace") if writer.__module__.rsplit(".", 1)[1].startswith("utf"): # The current stream has a unicode encoding so no error handler is needed if sys.version_info > (3, 0): return stream return writer(stream) if sys.version_info > (3, 0): # Python 3 doesn't seem to make this easy, handle a common case try: return stream.__class__(stream.buffer, stream.encoding, "replace", stream.newlines, stream.line_buffering) except AttributeError: pass return writer(stream, "replace") def _get_exception_encoding(): """Return the encoding we expect messages from the OS to be encoded in""" if os.name == "nt": # GZ 2010-05-24: Really want the codepage number instead, the error # handling of standard codecs is more deterministic return "mbcs" # GZ 2010-05-23: We need this call to be after initialisation, but there's # no benefit in asking more than once as it's a global # setting that can change after the message is formatted. return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii"