From 55cb4f4473a6fc429524e7c4848379013a4d2d1d Mon Sep 17 00:00:00 2001 From: Flaper Fesp Date: Wed, 30 Jan 2013 15:18:44 +0100 Subject: [PATCH] Decode input and encode output Currently glanceclient doesn't support non-ASCII characters for images names and properties (names and values as well). This patch introduces 2 functions (utils.py) that will help encoding and decoding strings in a more "secure" way. About the ensure_(str|unicode) functions: They both try to use first the encoding used in stdin (or python's default encoding if that's None) and fallback to utf-8 if those encodings fail to decode a given text. About the changes in glanceclient: The major change is that all inputs will be decoded and will kept as such inside the client's functions and will then be encoded before being printed / sent out the client. There are other small changes, all related to encoding to str, around in order to avoid fails during some conversions. i.e: quoting url encoded parameters. Fixes bug: 1061150 Change-Id: I5c3ea93a716edfe284d19f6291d4e36028f91eb2 --- glanceclient/common/http.py | 29 ++++++++++++- glanceclient/common/utils.py | 81 ++++++++++++++++++++++++++++++++++-- glanceclient/shell.py | 3 +- glanceclient/v1/images.py | 15 ++++++- glanceclient/v1/shell.py | 3 +- tests/test_http.py | 46 +++++++++++++++----- tests/test_utils.py | 33 +++++++++++++++ tests/utils.py | 11 ++++- 8 files changed, 198 insertions(+), 23 deletions(-) diff --git a/glanceclient/common/http.py b/glanceclient/common/http.py index 502edabe..6510335a 100644 --- a/glanceclient/common/http.py +++ b/glanceclient/common/http.py @@ -35,6 +35,7 @@ if not hasattr(urlparse, 'parse_qsl'): import OpenSSL from glanceclient import exc +from glanceclient.common import utils LOG = logging.getLogger(__name__) @@ -114,7 +115,7 @@ class HTTPClient(object): curl.append('-d \'%s\'' % kwargs['body']) curl.append('%s%s' % (self.endpoint, url)) - LOG.debug(' '.join(curl)) + LOG.debug(utils.ensure_str(' '.join(curl))) @staticmethod def log_http_response(resp, body=None): @@ -124,7 +125,22 @@ class HTTPClient(object): dump.append('') if body: dump.extend([body, '']) - LOG.debug('\n'.join(dump)) + LOG.debug(utils.ensure_str('\n'.join(dump))) + + @staticmethod + def encode_headers(headers): + """ + Encodes headers. + + Note: This should be used right before + sending anything out. + + :param headers: Headers to encode + :returns: Dictionary with encoded headers' + names and values + """ + to_str = utils.ensure_str + return dict([(to_str(h), to_str(v)) for h, v in headers.iteritems()]) def _http_request(self, url, method, **kwargs): """ Send an http request with the specified characteristics. @@ -141,8 +157,17 @@ class HTTPClient(object): self.log_curl_request(method, url, kwargs) conn = self.get_connection() + # Note(flaper87): Before letting headers / url fly, + # they should be encoded otherwise httplib will + # complain. If we decide to rely on python-request + # this wont be necessary anymore. + kwargs['headers'] = self.encode_headers(kwargs['headers']) + try: conn_url = posixpath.normpath('%s/%s' % (self.endpoint_path, url)) + # Note(flaper87): Ditto, headers / url + # encoding to make httplib happy. + conn_url = utils.ensure_str(conn_url) if kwargs['headers'].get('Transfer-Encoding') == 'chunked': conn.putrequest(method, conn_url) for header, value in kwargs['headers'].items(): diff --git a/glanceclient/common/utils.py b/glanceclient/common/utils.py index 0159129b..08e047b6 100644 --- a/glanceclient/common/utils.py +++ b/glanceclient/common/utils.py @@ -54,14 +54,14 @@ def print_list(objs, fields, formatters={}): row.append(data) pt.add_row(row) - print pt.get_string() + print ensure_str(pt.get_string()) def print_dict(d): pt = prettytable.PrettyTable(['Property', 'Value'], caching=False) pt.align = 'l' [pt.add_row(list(r)) for r in d.iteritems()] - print pt.get_string(sortby='Property') + print ensure_str(pt.get_string(sortby='Property')) def find_resource(manager, name_or_id): @@ -75,7 +75,7 @@ def find_resource(manager, name_or_id): # now try to get entity as uuid try: - uuid.UUID(str(name_or_id)) + uuid.UUID(ensure_str(name_or_id)) return manager.get(name_or_id) except (ValueError, exc.NotFound): pass @@ -137,7 +137,7 @@ def import_versioned_module(version, submodule=None): def exit(msg=''): if msg: - print >> sys.stderr, msg + print >> sys.stderr, ensure_str(msg) sys.exit(1) @@ -190,3 +190,76 @@ def make_size_human_readable(size): stripped = padded.rstrip('0').rstrip('.') return '%s%s' % (stripped, suffix[index]) + + +def ensure_unicode(text, incoming=None, errors='strict'): + """ + Decodes incoming objects using `incoming` if they're + not already unicode. + + :param incoming: Text's current encoding + :param errors: Errors handling policy. + :returns: text or a unicode `incoming` encoded + representation of it. + """ + if isinstance(text, unicode): + return text + + if not incoming: + incoming = sys.stdin.encoding or \ + sys.getdefaultencoding() + + # Calling `str` in case text is a non str + # object. + text = str(text) + try: + return text.decode(incoming, errors) + except UnicodeDecodeError: + # Note(flaper87) If we get here, it means that + # sys.stdin.encoding / sys.getdefaultencoding + # didn't return a suitable encoding to decode + # text. This happens mostly when global LANG + # var is not set correctly and there's no + # default encoding. In this case, most likely + # python will use ASCII or ANSI encoders as + # default encodings but they won't be capable + # of decoding non-ASCII characters. + # + # Also, UTF-8 is being used since it's an ASCII + # extension. + return text.decode('utf-8', errors) + + +def ensure_str(text, incoming=None, + encoding='utf-8', errors='strict'): + """ + Encodes incoming objects using `encoding`. If + incoming is not specified, text is expected to + be encoded with current python's default encoding. + (`sys.getdefaultencoding`) + + :param incoming: Text's current encoding + :param encoding: Expected encoding for text (Default UTF-8) + :param errors: Errors handling policy. + :returns: text or a bytestring `encoding` encoded + representation of it. + """ + + if not incoming: + incoming = sys.stdin.encoding or \ + sys.getdefaultencoding() + + if not isinstance(text, basestring): + # try to convert `text` to string + # This allows this method for receiving + # objs that can be converted to string + text = str(text) + + if isinstance(text, unicode): + return text.encode(encoding, errors) + elif text and encoding != incoming: + # Decode text before encoding it with `encoding` + text = ensure_unicode(text, incoming, errors) + return text.encode(encoding, errors) + + return text diff --git a/glanceclient/shell.py b/glanceclient/shell.py index 142c03aa..4256b145 100644 --- a/glanceclient/shell.py +++ b/glanceclient/shell.py @@ -466,8 +466,7 @@ class HelpFormatter(argparse.HelpFormatter): def main(): try: - OpenStackImagesShell().main(sys.argv[1:]) - + OpenStackImagesShell().main(map(utils.ensure_unicode, sys.argv[1:])) except Exception, e: print >> sys.stderr, e sys.exit(1) diff --git a/glanceclient/v1/images.py b/glanceclient/v1/images.py index 16ee5f01..f6a0fcd8 100644 --- a/glanceclient/v1/images.py +++ b/glanceclient/v1/images.py @@ -75,10 +75,11 @@ class ImageManager(base.Manager): def _image_meta_to_headers(self, fields): headers = {} fields_copy = copy.deepcopy(fields) + ensure_unicode = utils.ensure_unicode for key, value in fields_copy.pop('properties', {}).iteritems(): - headers['x-image-meta-property-%s' % key] = str(value) + headers['x-image-meta-property-%s' % key] = ensure_unicode(value) for key, value in fields_copy.iteritems(): - headers['x-image-meta-%s' % key] = str(value) + headers['x-image-meta-%s' % key] = ensure_unicode(value) return headers @staticmethod @@ -130,6 +131,16 @@ class ImageManager(base.Manager): absolute_limit = kwargs.get('limit') def paginate(qp, seen=0): + # Note(flaper87) Url encoding should + # be moved inside http utils, at least + # shouldn't be here. + # + # Making sure all params are str before + # trying to encode them + for param, value in qp.iteritems(): + if isinstance(value, basestring): + qp[param] = utils.ensure_str(value) + url = '/v1/images/detail?%s' % urllib.urlencode(qp) images = self._list(url, "images") for image in images: diff --git a/glanceclient/v1/shell.py b/glanceclient/v1/shell.py index 50d1e63a..74fe3146 100644 --- a/glanceclient/v1/shell.py +++ b/glanceclient/v1/shell.py @@ -298,7 +298,8 @@ def do_image_delete(gc, args): image = utils.find_resource(gc.images, args_image) try: if args.verbose: - print 'Requesting image delete for %s ...' % args_image, + print 'Requesting image delete for %s ...' % \ + utils.ensure_str(args_image), gc.images.delete(image) diff --git a/tests/test_http.py b/tests/test_http.py index 4cf8e2f7..cabcf8d2 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -26,24 +26,34 @@ from tests import utils class TestClient(testtools.TestCase): + + def setUp(self): + super(TestClient, self).setUp() + self.mock = mox.Mox() + self.mock.StubOutWithMock(httplib.HTTPConnection, 'request') + self.mock.StubOutWithMock(httplib.HTTPConnection, 'getresponse') + + self.endpoint = 'http://example.com:9292' + self.client = http.HTTPClient(self.endpoint, token=u'abc123') + + def tearDown(self): + super(TestClient, self).tearDown() + self.mock.UnsetStubs() + def test_connection_refused(self): """ Should receive a CommunicationError if connection refused. And the error should list the host and port that refused the connection """ - endpoint = 'http://example.com:9292' - client = http.HTTPClient(endpoint, token=u'abc123') - m = mox.Mox() - m.StubOutWithMock(httplib.HTTPConnection, 'request') httplib.HTTPConnection.request( mox.IgnoreArg(), mox.IgnoreArg(), headers=mox.IgnoreArg(), ).AndRaise(socket.error()) - m.ReplayAll() + self.mock.ReplayAll() try: - client.json_request('GET', '/v1/images/detail?limit=20') + self.client.json_request('GET', '/v1/images/detail?limit=20') #NOTE(alaski) We expect exc.CommunicationError to be raised # so we should never reach this point. try/except is used here # rather than assertRaises() so that we can check the body of @@ -51,10 +61,26 @@ class TestClient(testtools.TestCase): self.fail('An exception should have bypassed this line.') except exc.CommunicationError, comm_err: fail_msg = ("Exception message '%s' should contain '%s'" % - (comm_err.message, endpoint)) - self.assertTrue(endpoint in comm_err.message, fail_msg) - finally: - m.UnsetStubs() + (comm_err.message, self.endpoint)) + self.assertTrue(self.endpoint in comm_err.message, fail_msg) + + def test_http_encoding(self): + httplib.HTTPConnection.request( + mox.IgnoreArg(), + mox.IgnoreArg(), + headers=mox.IgnoreArg()) + + # Lets fake the response + # returned by httplib + expected_response = 'Ok' + fake = utils.FakeResponse({}, StringIO.StringIO(expected_response)) + httplib.HTTPConnection.getresponse().AndReturn(fake) + self.mock.ReplayAll() + + headers = {"test": u'ni\xf1o'} + resp, body = self.client.raw_request('GET', '/v1/images/detail', + headers=headers) + self.assertEqual(resp, fake) class TestResponseBodyIterator(testtools.TestCase): diff --git a/tests/test_utils.py b/tests/test_utils.py index 07a589a6..3f275b8e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -50,3 +50,36 @@ class TestUtils(testtools.TestCase): self.assertEqual("1MB", utils.make_size_human_readable(1048576)) self.assertEqual("1.4GB", utils.make_size_human_readable(1476395008)) self.assertEqual("9.3MB", utils.make_size_human_readable(9761280)) + + def test_ensure_unicode(self): + ensure_unicode = utils.ensure_unicode + self.assertEqual(u'True', ensure_unicode(True)) + self.assertEqual(u'ni\xf1o', ensure_unicode("ni\xc3\xb1o", + incoming="utf-8")) + self.assertEqual(u"test", ensure_unicode("dGVzdA==", + incoming='base64')) + + self.assertEqual(u"strange", ensure_unicode('\x80strange', + errors='ignore')) + + self.assertEqual(u'\xc0', ensure_unicode('\xc0', + incoming='iso-8859-1')) + + # Forcing incoming to ascii so it falls back to utf-8 + self.assertEqual(u'ni\xf1o', ensure_unicode('ni\xc3\xb1o', + incoming='ascii')) + + def test_ensure_str(self): + ensure_str = utils.ensure_str + self.assertEqual("True", ensure_str(True)) + self.assertEqual("ni\xc3\xb1o", ensure_str(u'ni\xf1o', + encoding="utf-8")) + self.assertEqual("dGVzdA==\n", ensure_str("test", + encoding='base64')) + self.assertEqual('ni\xf1o', ensure_str("ni\xc3\xb1o", + encoding="iso-8859-1", + incoming="utf-8")) + + # Forcing incoming to ascii so it falls back to utf-8 + self.assertEqual('ni\xc3\xb1o', ensure_str('ni\xc3\xb1o', + incoming='ascii')) diff --git a/tests/utils.py b/tests/utils.py index 3727b423..d3565baa 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -40,13 +40,20 @@ class FakeAPI(object): class FakeResponse(object): - def __init__(self, headers, body=None): + def __init__(self, headers, body=None, + version=1.0, status=200, reason="Ok"): """ :param headers: dict representing HTTP response headers :param body: file-like object + :param version: HTTP Version + :param status: Response status code + :param reason: Status code related message. """ - self.headers = headers self.body = body + self.status = status + self.reason = reason + self.version = version + self.headers = headers def getheaders(self): return copy.deepcopy(self.headers).items()