From 83bbd0aef20169953297f05389461d23bc484af5 Mon Sep 17 00:00:00 2001 From: Samuel Merritt Date: Mon, 12 Nov 2012 18:45:26 -0800 Subject: [PATCH] Fix lazy-listing of object segments. When responding to a GET request for a manifest, it was intended that the proxy server lazily fetch the pieces of the container listing. That way, a single client request doesn't immediately turn into a bunch of requests to backends. The additional requests should only get made if the client is putting in the work of receiving the object body. However, commit 156f27c accidentally changed this so that all the pieces of the container listing are eagerly fetched up-front. Better yet, if an object has more than CONTAINER_LISTING_LIMIT (default 10,000) segments, the container listing is then fetched a second time, albeit lazily, while streaming out the response. This commit restores the laziness and adds tests for it. Change-Id: I49840a7059e6f999ce19199ecb10cdb77358526b --- swift/proxy/controllers/obj.py | 53 +++++++++---- test/unit/proxy/test_server.py | 132 ++++++++++++++++++++++++++++++++- 2 files changed, 171 insertions(+), 14 deletions(-) diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index ff37d1b7eb..0e30936331 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -24,10 +24,7 @@ # These shenanigans are to ensure all related objects can be garbage # collected. We've seen objects hang around forever otherwise. -try: - import simplejson as json -except ImportError: - import json +import itertools import mimetypes import re import time @@ -41,7 +38,7 @@ from eventlet.queue import Queue from eventlet.timeout import Timeout from swift.common.utils import ContextPool, normalize_timestamp, \ - config_true_value, public + config_true_value, public, json from swift.common.bufferedhttp import http_connect from swift.common.constraints import check_metadata, check_object_creation, \ CONTAINER_LISTING_LIMIT, MAX_FILE_SIZE @@ -247,6 +244,11 @@ class ObjectController(Controller): self.object_name = unquote(object_name) def _listing_iter(self, lcontainer, lprefix, env): + for page in self._listing_pages_iter(lcontainer, lprefix, env): + for item in page: + yield item + + def _listing_pages_iter(self, lcontainer, lprefix, env): lpartition, lnodes = self.app.container_ring.get_nodes( self.account_name, lcontainer) marker = '' @@ -278,8 +280,29 @@ class ObjectController(Controller): if not sublisting: break marker = sublisting[-1]['name'] - for obj in sublisting: - yield obj + yield sublisting + + def _remaining_items(self, listing_iter): + """ + Returns an item-by-item iterator for a page-by-page iterator + of item listings. + + Swallows listing-related errors; this iterator is only used + after we've already started streaming a response to the + client, and so if we start getting errors from the container + servers now, it's too late to send an error to the client, so + we just quit looking for segments. + """ + try: + for page in listing_iter: + for item in page: + yield item + except ListingIterNotFound: + pass + except ListingIterError: + pass + except ListingIterNotAuthorized: + pass def is_good_source(self, src): """ @@ -316,16 +339,21 @@ class ObjectController(Controller): lcontainer = unquote(lcontainer) lprefix = unquote(lprefix) try: - listing = list(self._listing_iter(lcontainer, lprefix, - req.environ)) + pages_iter = iter(self._listing_pages_iter(lcontainer, lprefix, + req.environ)) + listing_page1 = pages_iter.next() + listing = itertools.chain(listing_page1, + self._remaining_items(pages_iter)) except ListingIterNotFound: return HTTPNotFound(request=req) except ListingIterNotAuthorized, err: return err.aresp except ListingIterError: return HTTPServerError(request=req) + except StopIteration: + listing_page1 = listing = () - if len(listing) > CONTAINER_LISTING_LIMIT: + if len(listing_page1) >= CONTAINER_LISTING_LIMIT: resp = Response(headers=resp.headers, request=req, conditional_response=True) if req.method == 'HEAD': @@ -344,14 +372,13 @@ class ObjectController(Controller): return head_response else: resp.app_iter = SegmentedIterable( - self, lcontainer, - self._listing_iter(lcontainer, lprefix, req.environ), - resp) + self, lcontainer, listing, resp) else: # For objects with a reasonable number of segments, we'll serve # them with a set content-length and computed etag. if listing: + listing = list(listing) content_length = sum(o['bytes'] for o in listing) last_modified = max(o['last_modified'] for o in listing) last_modified = datetime(*map(int, re.split('[^\d]', diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 089ce46859..9e4c0687cc 100755 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -20,6 +20,7 @@ from logging.handlers import SysLogHandler import os import sys import unittest +import urlparse import signal from ConfigParser import ConfigParser from contextlib import contextmanager @@ -268,6 +269,8 @@ def fake_http_connect(*code_iter, **kwargs): code_iter = iter(code_iter) static_body = kwargs.get('body', None) body_iter = kwargs.get('body_iter', None) + if body_iter: + body_iter = iter(body_iter) def connect(*args, **ckwargs): if 'give_content_type' in kwargs: @@ -873,7 +876,7 @@ class TestObjectController(unittest.TestCase): set_http_connect(200, 200, 200, 200, 200, 200, 200, 200, 201, 201, 201, 200, 200, 200, give_connect=test_connect, - body_iter=iter(body_iter), + body_iter=body_iter, headers={'x-versions-location': 'foo'}) self.app.memcache.store = {} req = Request.blank('/a/c/o', @@ -883,6 +886,133 @@ class TestObjectController(unittest.TestCase): res = controller.DELETE(req) self.assertEquals(test_errors, []) + def test_GET_manifest_no_segments(self): + response_bodies = ( + '', # HEAD /a + '', # HEAD /a/c + '', # GET manifest + simplejson.dumps([])) # GET empty listing + + with save_globals(): + controller = proxy_server.ObjectController( + self.app, 'a', 'c', 'manifest') + set_http_connect( + 200, # HEAD /a + 200, # HEAD /a/c + 200, # GET manifest + 200, # GET empty listing + headers={"X-Object-Manifest": "segments/seg"}, + body_iter=response_bodies) + + req = Request.blank('/a/c/manifest') + resp = controller.GET(req) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, '') + + def test_GET_manifest_limited_listing(self): + listing1 = [{"hash": "454dfc73af632012ce3e6217dc464241", + "last_modified": "2012-11-08T04:05:37.866820", + "bytes": 2, + "name": "seg01", + "content_type": "application/octet-stream"}, + {"hash": "474bab96c67528d42d5c0c52b35228eb", + "last_modified": "2012-11-08T04:05:37.846710", + "bytes": 2, + "name": "seg02", + "content_type": "application/octet-stream"}] + + listing2 = [{"hash": "116baa5508693d1d1ca36abdd9f9478b", + "last_modified": "2012-11-08T04:05:37.849510", + "bytes": 2, + "name": "seg03", + "content_type": "application/octet-stream"}, + {"hash": "7bd6aaa1ef6013353f0420459574ac9d", + "last_modified": "2012-11-08T04:05:37.855180", + "bytes": 2, + "name": "seg04", + "content_type": "application/octet-stream" + }] + + listing3 = [{"hash": "6605f80e3cefaa24e9823544df4edbd6", + "last_modified": "2012-11-08T04:05:37.853710", + "bytes": 2, + "name": "seg05", + "content_type": "application/octet-stream"}] + + response_bodies = ( + '', # HEAD /a + '', # HEAD /a/c + '', # GET manifest + simplejson.dumps(listing1), # GET listing1 + 'Aa', # GET seg01 + 'Bb', # GET seg02 + simplejson.dumps(listing2), # GET listing2 + 'Cc', # GET seg03 + 'Dd', # GET seg04 + simplejson.dumps(listing3), # GET listing3 + 'Ee', # GET seg05 + simplejson.dumps([])) # GET final empty listing + with save_globals(): + try: + swift.proxy.controllers.obj.CONTAINER_LISTING_LIMIT = 2 + controller = proxy_server.ObjectController( + self.app, 'a', 'c', 'manifest') + + requested = [] + def capture_requested_paths(ipaddr, port, device, partition, + method, path, headers=None, + query_string=None): + qs_dict = dict(urlparse.parse_qsl(query_string or '')) + requested.append([method, path, qs_dict]) + + set_http_connect( + 200, # HEAD /a + 200, # HEAD /a/c + 200, # GET manifest + 200, # GET listing1 + 200, # GET seg01 + 200, # GET seg02 + 200, # GET listing2 + 200, # GET seg03 + 200, # GET seg04 + 200, # GET listing3 + 200, # GET seg05 + 200, # GET final empty listing + headers={"X-Object-Manifest": "segments/seg"}, + body_iter=response_bodies, + give_connect=capture_requested_paths) + + req = Request.blank('/a/c/manifest') + resp = controller.GET(req) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, 'AaBbCcDdEe') + + self.assertEqual( + requested, + [['HEAD', '/a', {}], + ['HEAD', '/a/c', {}], + ['GET', '/a/c/manifest', {}], + ['GET', '/a/segments', + {'format': 'json', 'prefix': 'seg'}], + ['GET', '/a/segments/seg01', {}], + ['GET', '/a/segments/seg02', {}], + ['GET', '/a/segments', + {'format': 'json', 'prefix': 'seg', 'marker': 'seg02'}], + ['GET', '/a/segments/seg03', {}], + ['GET', '/a/segments/seg04', {}], + ['GET', '/a/segments', + {'format': 'json', 'prefix': 'seg', 'marker': 'seg04'}], + ['GET', '/a/segments/seg05', {}], + ['GET', '/a/segments', + {'format': 'json', 'prefix': 'seg', 'marker': 'seg05'}]]) + + finally: + # other tests in this file get very unhappy if this + # isn't set back, which leads to time-wasting + # debugging of other tests. + swift.proxy.controllers.obj.CONTAINER_LISTING_LIMIT = \ + _orig_container_listing_limit + def test_PUT_auto_content_type(self): with save_globals(): controller = proxy_server.ObjectController(self.app, 'account',