Make available to log encoded strings as arguments

Pure python logging library allows to log encoded strings
but in oslo_log it causes UnicodeDecodeError for arguments
which contains unicode symbols.

Changes add additional check whether all arguments can be converted to
unicode. If it isn't possible then leave it as is.

Change-Id: I76e75c35a74b146fae52c0ea60156e1cf78540e7
Closes-Bug: #1563295
This commit is contained in:
Andrey Volkov 2016-05-31 20:14:40 +03:00
parent a22f3d40fc
commit 991d8f0a12
3 changed files with 46 additions and 25 deletions

View File

@ -26,6 +26,7 @@ from six import moves
from oslo_context import context as context_utils from oslo_context import context as context_utils
from oslo_serialization import jsonutils from oslo_serialization import jsonutils
from oslo_utils import encodeutils
def _dictify_context(context): def _dictify_context(context):
@ -76,6 +77,19 @@ def _update_record_with_context(record):
return context return context
def _ensure_unicode(msg):
"""Do our best to turn the input argument into a unicode object.
"""
if isinstance(msg, six.text_type):
return msg
if not isinstance(msg, six.binary_type):
return six.text_type(msg)
return encodeutils.safe_decode(
msg,
incoming='utf-8',
errors='xmlcharrefreplace')
class _ReplaceFalseValue(dict): class _ReplaceFalseValue(dict):
def __getitem__(self, key): def __getitem__(self, key):
return dict.get(self, key, None) or '-' return dict.get(self, key, None) or '-'
@ -189,11 +203,17 @@ class ContextFormatter(logging.Formatter):
def format(self, record): def format(self, record):
"""Uses contextstring if request_id is set, otherwise default.""" """Uses contextstring if request_id is set, otherwise default."""
# NOTE(jecarey): If msg is not unicode, coerce it into unicode if six.PY2:
# before it can get to the python logging and should_use_unicode = True
# possibly cause string encoding trouble for arg in record.args:
if not isinstance(record.msg, six.text_type): try:
record.msg = six.text_type(record.msg) six.text_type(arg)
except UnicodeDecodeError:
should_use_unicode = False
break
if (not isinstance(record.msg, six.text_type)
and should_use_unicode):
record.msg = _ensure_unicode(record.msg)
# store project info # store project info
record.project = self.project record.project = self.project

View File

@ -40,7 +40,6 @@ except ImportError:
import traceback import traceback
from oslo_config import cfg from oslo_config import cfg
from oslo_utils import encodeutils
from oslo_utils import importutils from oslo_utils import importutils
import six import six
from six import moves from six import moves
@ -92,21 +91,6 @@ class BaseLoggerAdapter(logging.LoggerAdapter):
self.log(TRACE, msg, *args, **kwargs) self.log(TRACE, msg, *args, **kwargs)
def _ensure_unicode(msg):
"""Do our best to turn the input argument into a unicode object.
"""
if not isinstance(msg, six.text_type):
if isinstance(msg, six.binary_type):
msg = encodeutils.safe_decode(
msg,
incoming='utf-8',
errors='xmlcharrefreplace',
)
else:
msg = six.text_type(msg)
return msg
class KeywordArgumentAdapter(BaseLoggerAdapter): class KeywordArgumentAdapter(BaseLoggerAdapter):
"""Logger adapter to add keyword arguments to log record's extra data """Logger adapter to add keyword arguments to log record's extra data
@ -126,7 +110,6 @@ class KeywordArgumentAdapter(BaseLoggerAdapter):
""" """
def process(self, msg, kwargs): def process(self, msg, kwargs):
msg = _ensure_unicode(msg)
# Make a new extra dictionary combining the values we were # Make a new extra dictionary combining the values we were
# given when we were constructed and anything from kwargs. # given when we were constructed and anything from kwargs.
extra = {} extra = {}

View File

@ -402,6 +402,14 @@ class JSONFormatterTestCase(LogTestBase):
self.assertTrue(extra_keys[1] in data['extra']) self.assertTrue(extra_keys[1] in data['extra'])
self.assertEqual(special_user, data['extra'][extra_keys[1]]) self.assertEqual(special_user, data['extra'][extra_keys[1]])
def test_can_process_strings(self):
expected = b'\\u2622'
if six.PY3:
# see ContextFormatterTestCase.test_can_process_strings
expected = '\\\\xe2\\\\x98\\\\xa2'
self.log.info(b'%s', u'\u2622'.encode('utf8'))
self.assertIn(expected, self.stream.getvalue())
def get_fake_datetime(retval): def get_fake_datetime(retval):
class FakeDateTime(datetime.datetime): class FakeDateTime(datetime.datetime):
@ -569,6 +577,16 @@ class ContextFormatterTestCase(LogTestBase):
self.assertEqual(expected, self.stream.getvalue()) self.assertEqual(expected, self.stream.getvalue())
def test_can_process_strings(self):
expected = b'\xe2\x98\xa2'
if six.PY3:
# in PY3 logging format string should be unicode string
# or it will fail and inserting byte string in unicode string
# causes such formatting
expected = '\\xe2\\x98\\xa2'
self.log.info(b'%s', u'\u2622'.encode('utf8'))
self.assertIn(expected, self.stream.getvalue())
class ExceptionLoggingTestCase(LogTestBase): class ExceptionLoggingTestCase(LogTestBase):
"""Test that Exceptions are logged.""" """Test that Exceptions are logged."""
@ -1238,20 +1256,20 @@ class UnicodeConversionTestCase(BaseTestCase):
def test_ascii_to_unicode(self): def test_ascii_to_unicode(self):
msg = self._MSG msg = self._MSG
enc_msg = msg.encode('utf-8') enc_msg = msg.encode('utf-8')
result = log._ensure_unicode(enc_msg) result = formatters._ensure_unicode(enc_msg)
self.assertEqual(msg, result) self.assertEqual(msg, result)
self.assertIsInstance(result, six.text_type) self.assertIsInstance(result, six.text_type)
def test_unicode_to_unicode(self): def test_unicode_to_unicode(self):
msg = self._MSG msg = self._MSG
result = log._ensure_unicode(msg) result = formatters._ensure_unicode(msg)
self.assertEqual(msg, result) self.assertEqual(msg, result)
self.assertIsInstance(result, six.text_type) self.assertIsInstance(result, six.text_type)
def test_exception_to_unicode(self): def test_exception_to_unicode(self):
msg = self._MSG msg = self._MSG
exc = Exception(msg) exc = Exception(msg)
result = log._ensure_unicode(exc) result = formatters._ensure_unicode(exc)
self.assertEqual(msg, result) self.assertEqual(msg, result)
self.assertIsInstance(result, six.text_type) self.assertIsInstance(result, six.text_type)