Decode input and encode output

Currently glanceclient doesn't support non-ASCII characters for images
names and properties (names and values as well). This patch introduces 2
functions (utils.py) that will help encoding and decoding strings in a
more "secure" way.

About the ensure_(str|unicode) functions:

    They both try to use first the encoding used in stdin (or python's
    default encoding if that's None) and fallback to utf-8 if those
    encodings fail to decode a given text.

About the changes in glanceclient:

    The major change is that all inputs will be decoded and will kept as
    such inside the client's functions and will then be encoded before
    being printed / sent out the client.

    There are other small changes, all related to encoding to str,
    around in order to avoid fails during some conversions. i.e: quoting
    url encoded parameters.

Fixes bug: 1061150

Change-Id: I5c3ea93a716edfe284d19f6291d4e36028f91eb2
This commit is contained in:
Flaper Fesp
2013-01-30 15:18:44 +01:00
parent 542a45bd28
commit 55cb4f4473
8 changed files with 198 additions and 23 deletions

View File

@@ -35,6 +35,7 @@ if not hasattr(urlparse, 'parse_qsl'):
import OpenSSL import OpenSSL
from glanceclient import exc from glanceclient import exc
from glanceclient.common import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@@ -114,7 +115,7 @@ class HTTPClient(object):
curl.append('-d \'%s\'' % kwargs['body']) curl.append('-d \'%s\'' % kwargs['body'])
curl.append('%s%s' % (self.endpoint, url)) curl.append('%s%s' % (self.endpoint, url))
LOG.debug(' '.join(curl)) LOG.debug(utils.ensure_str(' '.join(curl)))
@staticmethod @staticmethod
def log_http_response(resp, body=None): def log_http_response(resp, body=None):
@@ -124,7 +125,22 @@ class HTTPClient(object):
dump.append('') dump.append('')
if body: if body:
dump.extend([body, '']) dump.extend([body, ''])
LOG.debug('\n'.join(dump)) LOG.debug(utils.ensure_str('\n'.join(dump)))
@staticmethod
def encode_headers(headers):
"""
Encodes headers.
Note: This should be used right before
sending anything out.
:param headers: Headers to encode
:returns: Dictionary with encoded headers'
names and values
"""
to_str = utils.ensure_str
return dict([(to_str(h), to_str(v)) for h, v in headers.iteritems()])
def _http_request(self, url, method, **kwargs): def _http_request(self, url, method, **kwargs):
""" Send an http request with the specified characteristics. """ Send an http request with the specified characteristics.
@@ -141,8 +157,17 @@ class HTTPClient(object):
self.log_curl_request(method, url, kwargs) self.log_curl_request(method, url, kwargs)
conn = self.get_connection() conn = self.get_connection()
# Note(flaper87): Before letting headers / url fly,
# they should be encoded otherwise httplib will
# complain. If we decide to rely on python-request
# this wont be necessary anymore.
kwargs['headers'] = self.encode_headers(kwargs['headers'])
try: try:
conn_url = posixpath.normpath('%s/%s' % (self.endpoint_path, url)) conn_url = posixpath.normpath('%s/%s' % (self.endpoint_path, url))
# Note(flaper87): Ditto, headers / url
# encoding to make httplib happy.
conn_url = utils.ensure_str(conn_url)
if kwargs['headers'].get('Transfer-Encoding') == 'chunked': if kwargs['headers'].get('Transfer-Encoding') == 'chunked':
conn.putrequest(method, conn_url) conn.putrequest(method, conn_url)
for header, value in kwargs['headers'].items(): for header, value in kwargs['headers'].items():

View File

@@ -54,14 +54,14 @@ def print_list(objs, fields, formatters={}):
row.append(data) row.append(data)
pt.add_row(row) pt.add_row(row)
print pt.get_string() print ensure_str(pt.get_string())
def print_dict(d): def print_dict(d):
pt = prettytable.PrettyTable(['Property', 'Value'], caching=False) pt = prettytable.PrettyTable(['Property', 'Value'], caching=False)
pt.align = 'l' pt.align = 'l'
[pt.add_row(list(r)) for r in d.iteritems()] [pt.add_row(list(r)) for r in d.iteritems()]
print pt.get_string(sortby='Property') print ensure_str(pt.get_string(sortby='Property'))
def find_resource(manager, name_or_id): def find_resource(manager, name_or_id):
@@ -75,7 +75,7 @@ def find_resource(manager, name_or_id):
# now try to get entity as uuid # now try to get entity as uuid
try: try:
uuid.UUID(str(name_or_id)) uuid.UUID(ensure_str(name_or_id))
return manager.get(name_or_id) return manager.get(name_or_id)
except (ValueError, exc.NotFound): except (ValueError, exc.NotFound):
pass pass
@@ -137,7 +137,7 @@ def import_versioned_module(version, submodule=None):
def exit(msg=''): def exit(msg=''):
if msg: if msg:
print >> sys.stderr, msg print >> sys.stderr, ensure_str(msg)
sys.exit(1) sys.exit(1)
@@ -190,3 +190,76 @@ def make_size_human_readable(size):
stripped = padded.rstrip('0').rstrip('.') stripped = padded.rstrip('0').rstrip('.')
return '%s%s' % (stripped, suffix[index]) return '%s%s' % (stripped, suffix[index])
def ensure_unicode(text, incoming=None, errors='strict'):
"""
Decodes incoming objects using `incoming` if they're
not already unicode.
:param incoming: Text's current encoding
:param errors: Errors handling policy.
:returns: text or a unicode `incoming` encoded
representation of it.
"""
if isinstance(text, unicode):
return text
if not incoming:
incoming = sys.stdin.encoding or \
sys.getdefaultencoding()
# Calling `str` in case text is a non str
# object.
text = str(text)
try:
return text.decode(incoming, errors)
except UnicodeDecodeError:
# Note(flaper87) If we get here, it means that
# sys.stdin.encoding / sys.getdefaultencoding
# didn't return a suitable encoding to decode
# text. This happens mostly when global LANG
# var is not set correctly and there's no
# default encoding. In this case, most likely
# python will use ASCII or ANSI encoders as
# default encodings but they won't be capable
# of decoding non-ASCII characters.
#
# Also, UTF-8 is being used since it's an ASCII
# extension.
return text.decode('utf-8', errors)
def ensure_str(text, incoming=None,
encoding='utf-8', errors='strict'):
"""
Encodes incoming objects using `encoding`. If
incoming is not specified, text is expected to
be encoded with current python's default encoding.
(`sys.getdefaultencoding`)
:param incoming: Text's current encoding
:param encoding: Expected encoding for text (Default UTF-8)
:param errors: Errors handling policy.
:returns: text or a bytestring `encoding` encoded
representation of it.
"""
if not incoming:
incoming = sys.stdin.encoding or \
sys.getdefaultencoding()
if not isinstance(text, basestring):
# try to convert `text` to string
# This allows this method for receiving
# objs that can be converted to string
text = str(text)
if isinstance(text, unicode):
return text.encode(encoding, errors)
elif text and encoding != incoming:
# Decode text before encoding it with `encoding`
text = ensure_unicode(text, incoming, errors)
return text.encode(encoding, errors)
return text

View File

@@ -466,8 +466,7 @@ class HelpFormatter(argparse.HelpFormatter):
def main(): def main():
try: try:
OpenStackImagesShell().main(sys.argv[1:]) OpenStackImagesShell().main(map(utils.ensure_unicode, sys.argv[1:]))
except Exception, e: except Exception, e:
print >> sys.stderr, e print >> sys.stderr, e
sys.exit(1) sys.exit(1)

View File

@@ -75,10 +75,11 @@ class ImageManager(base.Manager):
def _image_meta_to_headers(self, fields): def _image_meta_to_headers(self, fields):
headers = {} headers = {}
fields_copy = copy.deepcopy(fields) fields_copy = copy.deepcopy(fields)
ensure_unicode = utils.ensure_unicode
for key, value in fields_copy.pop('properties', {}).iteritems(): for key, value in fields_copy.pop('properties', {}).iteritems():
headers['x-image-meta-property-%s' % key] = str(value) headers['x-image-meta-property-%s' % key] = ensure_unicode(value)
for key, value in fields_copy.iteritems(): for key, value in fields_copy.iteritems():
headers['x-image-meta-%s' % key] = str(value) headers['x-image-meta-%s' % key] = ensure_unicode(value)
return headers return headers
@staticmethod @staticmethod
@@ -130,6 +131,16 @@ class ImageManager(base.Manager):
absolute_limit = kwargs.get('limit') absolute_limit = kwargs.get('limit')
def paginate(qp, seen=0): def paginate(qp, seen=0):
# Note(flaper87) Url encoding should
# be moved inside http utils, at least
# shouldn't be here.
#
# Making sure all params are str before
# trying to encode them
for param, value in qp.iteritems():
if isinstance(value, basestring):
qp[param] = utils.ensure_str(value)
url = '/v1/images/detail?%s' % urllib.urlencode(qp) url = '/v1/images/detail?%s' % urllib.urlencode(qp)
images = self._list(url, "images") images = self._list(url, "images")
for image in images: for image in images:

View File

@@ -298,7 +298,8 @@ def do_image_delete(gc, args):
image = utils.find_resource(gc.images, args_image) image = utils.find_resource(gc.images, args_image)
try: try:
if args.verbose: if args.verbose:
print 'Requesting image delete for %s ...' % args_image, print 'Requesting image delete for %s ...' % \
utils.ensure_str(args_image),
gc.images.delete(image) gc.images.delete(image)

View File

@@ -26,24 +26,34 @@ from tests import utils
class TestClient(testtools.TestCase): class TestClient(testtools.TestCase):
def setUp(self):
super(TestClient, self).setUp()
self.mock = mox.Mox()
self.mock.StubOutWithMock(httplib.HTTPConnection, 'request')
self.mock.StubOutWithMock(httplib.HTTPConnection, 'getresponse')
self.endpoint = 'http://example.com:9292'
self.client = http.HTTPClient(self.endpoint, token=u'abc123')
def tearDown(self):
super(TestClient, self).tearDown()
self.mock.UnsetStubs()
def test_connection_refused(self): def test_connection_refused(self):
""" """
Should receive a CommunicationError if connection refused. Should receive a CommunicationError if connection refused.
And the error should list the host and port that refused the And the error should list the host and port that refused the
connection connection
""" """
endpoint = 'http://example.com:9292'
client = http.HTTPClient(endpoint, token=u'abc123')
m = mox.Mox()
m.StubOutWithMock(httplib.HTTPConnection, 'request')
httplib.HTTPConnection.request( httplib.HTTPConnection.request(
mox.IgnoreArg(), mox.IgnoreArg(),
mox.IgnoreArg(), mox.IgnoreArg(),
headers=mox.IgnoreArg(), headers=mox.IgnoreArg(),
).AndRaise(socket.error()) ).AndRaise(socket.error())
m.ReplayAll() self.mock.ReplayAll()
try: try:
client.json_request('GET', '/v1/images/detail?limit=20') self.client.json_request('GET', '/v1/images/detail?limit=20')
#NOTE(alaski) We expect exc.CommunicationError to be raised #NOTE(alaski) We expect exc.CommunicationError to be raised
# so we should never reach this point. try/except is used here # so we should never reach this point. try/except is used here
# rather than assertRaises() so that we can check the body of # rather than assertRaises() so that we can check the body of
@@ -51,10 +61,26 @@ class TestClient(testtools.TestCase):
self.fail('An exception should have bypassed this line.') self.fail('An exception should have bypassed this line.')
except exc.CommunicationError, comm_err: except exc.CommunicationError, comm_err:
fail_msg = ("Exception message '%s' should contain '%s'" % fail_msg = ("Exception message '%s' should contain '%s'" %
(comm_err.message, endpoint)) (comm_err.message, self.endpoint))
self.assertTrue(endpoint in comm_err.message, fail_msg) self.assertTrue(self.endpoint in comm_err.message, fail_msg)
finally:
m.UnsetStubs() def test_http_encoding(self):
httplib.HTTPConnection.request(
mox.IgnoreArg(),
mox.IgnoreArg(),
headers=mox.IgnoreArg())
# Lets fake the response
# returned by httplib
expected_response = 'Ok'
fake = utils.FakeResponse({}, StringIO.StringIO(expected_response))
httplib.HTTPConnection.getresponse().AndReturn(fake)
self.mock.ReplayAll()
headers = {"test": u'ni\xf1o'}
resp, body = self.client.raw_request('GET', '/v1/images/detail',
headers=headers)
self.assertEqual(resp, fake)
class TestResponseBodyIterator(testtools.TestCase): class TestResponseBodyIterator(testtools.TestCase):

View File

@@ -50,3 +50,36 @@ class TestUtils(testtools.TestCase):
self.assertEqual("1MB", utils.make_size_human_readable(1048576)) self.assertEqual("1MB", utils.make_size_human_readable(1048576))
self.assertEqual("1.4GB", utils.make_size_human_readable(1476395008)) self.assertEqual("1.4GB", utils.make_size_human_readable(1476395008))
self.assertEqual("9.3MB", utils.make_size_human_readable(9761280)) self.assertEqual("9.3MB", utils.make_size_human_readable(9761280))
def test_ensure_unicode(self):
ensure_unicode = utils.ensure_unicode
self.assertEqual(u'True', ensure_unicode(True))
self.assertEqual(u'ni\xf1o', ensure_unicode("ni\xc3\xb1o",
incoming="utf-8"))
self.assertEqual(u"test", ensure_unicode("dGVzdA==",
incoming='base64'))
self.assertEqual(u"strange", ensure_unicode('\x80strange',
errors='ignore'))
self.assertEqual(u'\xc0', ensure_unicode('\xc0',
incoming='iso-8859-1'))
# Forcing incoming to ascii so it falls back to utf-8
self.assertEqual(u'ni\xf1o', ensure_unicode('ni\xc3\xb1o',
incoming='ascii'))
def test_ensure_str(self):
ensure_str = utils.ensure_str
self.assertEqual("True", ensure_str(True))
self.assertEqual("ni\xc3\xb1o", ensure_str(u'ni\xf1o',
encoding="utf-8"))
self.assertEqual("dGVzdA==\n", ensure_str("test",
encoding='base64'))
self.assertEqual('ni\xf1o', ensure_str("ni\xc3\xb1o",
encoding="iso-8859-1",
incoming="utf-8"))
# Forcing incoming to ascii so it falls back to utf-8
self.assertEqual('ni\xc3\xb1o', ensure_str('ni\xc3\xb1o',
incoming='ascii'))

View File

@@ -40,13 +40,20 @@ class FakeAPI(object):
class FakeResponse(object): class FakeResponse(object):
def __init__(self, headers, body=None): def __init__(self, headers, body=None,
version=1.0, status=200, reason="Ok"):
""" """
:param headers: dict representing HTTP response headers :param headers: dict representing HTTP response headers
:param body: file-like object :param body: file-like object
:param version: HTTP Version
:param status: Response status code
:param reason: Status code related message.
""" """
self.headers = headers
self.body = body self.body = body
self.status = status
self.reason = reason
self.version = version
self.headers = headers
def getheaders(self): def getheaders(self):
return copy.deepcopy(self.headers).items() return copy.deepcopy(self.headers).items()