From e8b654f318cfab485a772e19db33cb2fe4c3d858 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Fri, 6 Dec 2019 23:26:37 +0000 Subject: [PATCH] Have slo tell the object-server that it wants whole manifests Otherwise, we waste a request on some 416/206 response that won't be helpful. To do this, add a new X-Backend-Ignore-Range-If-Metadata-Present header whose value is a comma-separated list of header names. Middlewares may include this header to tell object-servers to send the whole object (rather than a 206 or 416) if *any* of the metadata are present. Have dlo and symlink use it, too; it won't save us any round-trips, but it should clean up some object-server logging. Change-Id: I4ff2a178d0456e7e37d561109ef57dd0d92cbd4e --- swift/common/middleware/dlo.py | 4 ++- swift/common/middleware/slo.py | 5 ++- swift/common/middleware/symlink.py | 4 ++- swift/common/request_helpers.py | 18 ++++++++++ swift/obj/server.py | 8 +++++ swift/proxy/controllers/obj.py | 22 ++++++++++++ test/unit/common/middleware/test_slo.py | 10 ++++++ test/unit/common/middleware/test_symlink.py | 4 +++ test/unit/obj/test_server.py | 37 +++++++++++++++++++ test/unit/proxy/test_server.py | 39 +++++++++++++++++---- 10 files changed, 141 insertions(+), 10 deletions(-) diff --git a/swift/common/middleware/dlo.py b/swift/common/middleware/dlo.py index 51a7811159..34a50f7dd3 100644 --- a/swift/common/middleware/dlo.py +++ b/swift/common/middleware/dlo.py @@ -131,7 +131,8 @@ from swift.common.swob import Request, Response, \ str_to_wsgi, wsgi_to_str, wsgi_quote, wsgi_unquote, normalize_etag from swift.common.utils import get_logger, \ RateLimitedIterator, quote, close_if_possible, closing_if_possible -from swift.common.request_helpers import SegmentedIterable +from swift.common.request_helpers import SegmentedIterable, \ + update_ignore_range_header from swift.common.wsgi import WSGIContext, make_subrequest, load_app_config @@ -369,6 +370,7 @@ class GetContext(WSGIContext): Otherwise, simply pass it through. """ + update_ignore_range_header(req, 'X-Object-Manifest') resp_iter = self._app_call(req.environ) # make sure this response is for a dynamic large object manifest diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index ef3daedd56..bbe2cdca0b 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -340,7 +340,7 @@ from swift.common.utils import get_logger, config_true_value, \ Timestamp from swift.common.request_helpers import SegmentedIterable, \ get_sys_meta_prefix, update_etag_is_at_header, resolve_etag_is_at_header, \ - get_container_update_override_key + get_container_update_override_key, update_ignore_range_header from swift.common.constraints import check_utf8 from swift.common.http import HTTP_NOT_FOUND, HTTP_UNAUTHORIZED, is_success from swift.common.wsgi import WSGIContext, make_subrequest @@ -764,6 +764,9 @@ class SloGetContext(WSGIContext): # saved, we can trust the object-server to respond appropriately # to If-Match/If-None-Match requests. update_etag_is_at_header(req, SYSMETA_SLO_ETAG) + # Tell the object server that if it's a manifest, + # we want the whole thing + update_ignore_range_header(req, 'X-Static-Large-Object') resp_iter = self._app_call(req.environ) # make sure this response is for a static large object manifest diff --git a/swift/common/middleware/symlink.py b/swift/common/middleware/symlink.py index e19c176b0a..0dc75419e9 100644 --- a/swift/common/middleware/symlink.py +++ b/swift/common/middleware/symlink.py @@ -206,7 +206,8 @@ from swift.common.utils import get_logger, register_swift_info, split_path, \ from swift.common.constraints import check_account_format from swift.common.wsgi import WSGIContext, make_subrequest from swift.common.request_helpers import get_sys_meta_prefix, \ - check_path_header, get_container_update_override_key + check_path_header, get_container_update_override_key, \ + update_ignore_range_header from swift.common.swob import Request, HTTPBadRequest, HTTPTemporaryRedirect, \ HTTPException, HTTPConflict, HTTPPreconditionFailed, wsgi_quote, \ wsgi_unquote, status_map @@ -428,6 +429,7 @@ class SymlinkObjectContext(WSGIContext): :param req: HTTP GET or HEAD object request :returns: Response Iterator """ + update_ignore_range_header(req, TGT_OBJ_SYSMETA_SYMLINK_HDR) try: return self._recursive_get_head(req) except LinkIterError: diff --git a/swift/common/request_helpers.py b/swift/common/request_helpers.py index bdc234c9ea..876b8d8096 100644 --- a/swift/common/request_helpers.py +++ b/swift/common/request_helpers.py @@ -808,3 +808,21 @@ def resolve_etag_is_at_header(req, metadata): alternate_etag = metadata[name] break return alternate_etag + + +def update_ignore_range_header(req, name): + """ + Helper function to update an X-Backend-Ignore-Range-If-Metadata-Present + header whose value is a list of header names which, if any are present + on an object, mean the object server should respond with a 200 instead + of a 206 or 416. + + :param req: a swob Request + :param name: name of a header which, if found, indicates the proxy will + want the whole object + """ + if ',' in name: + # HTTP header names should not have commas but we'll check anyway + raise ValueError('Header name must not contain commas') + hdr = 'X-Backend-Ignore-Range-If-Metadata-Present' + req.headers[hdr] = csv_append(req.headers.get(hdr), name) diff --git a/swift/obj/server.py b/swift/obj/server.py index d1a1d85e93..3c684d7a06 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -1070,6 +1070,14 @@ class ObjectController(BaseStorageServer): try: with disk_file.open(current_time=req_timestamp): metadata = disk_file.get_metadata() + ignore_range_headers = set( + h.strip().lower() + for h in request.headers.get( + 'X-Backend-Ignore-Range-If-Metadata-Present', + '').split(',')) + if ignore_range_headers.intersection( + h.lower() for h in metadata): + request.headers.pop('Range', None) obj_size = int(metadata['Content-Length']) file_x_ts = Timestamp(metadata['X-Timestamp']) keep_cache = (self.keep_cache_private or diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index 9b71ed259f..81d9360ed4 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -2397,6 +2397,8 @@ class ECObjectController(BaseObjectController): safe_iter, partition, policy, buckets.get_extra_headers) + # Put this back, since we *may* need it for kickoff()/_fix_response() + # (but note that _fix_ranges() may also pop it back off before then) req.range = orig_range if best_bucket and best_bucket.shortfall <= 0 and best_bucket.durable: # headers can come from any of the getters @@ -2420,6 +2422,7 @@ class ECObjectController(BaseObjectController): conditional_response=True, app_iter=app_iter) update_headers(resp, resp_headers) + self._fix_ranges(req, resp) try: app_iter.kickoff(req, resp) except HTTPException as err_resp: @@ -2464,6 +2467,10 @@ class ECObjectController(BaseObjectController): req, statuses, reasons, bodies, 'Object', headers=headers) self._fix_response(req, resp) + + # For sure put this back before actually returning the response + # to the rest of the pipeline, so we don't modify the client headers + req.range = orig_range return resp def _fix_response(self, req, resp): @@ -2486,6 +2493,21 @@ class ECObjectController(BaseObjectController): resp.headers['Content-Range'] = 'bytes */%s' % resp.headers[ 'X-Object-Sysmeta-Ec-Content-Length'] + def _fix_ranges(self, req, resp): + # Has to be called *before* kickoff()! + if is_success(resp.status_int): + ignore_range_headers = set( + h.strip().lower() + for h in req.headers.get( + 'X-Backend-Ignore-Range-If-Metadata-Present', + '').split(',')) + if ignore_range_headers.intersection( + h.lower() for h in resp.headers): + # If we leave the Range header around, swob (or somebody) will + # try to "fix" things for us when we kickoff() the app_iter. + req.headers.pop('Range', None) + resp.app_iter.range_specs = [] + def _make_putter(self, node, part, req, headers): return MIMEPutter.connect( node, part, req.swift_entity_path, headers, diff --git a/test/unit/common/middleware/test_slo.py b/test/unit/common/middleware/test_slo.py index 84eadd7490..980352c8dc 100644 --- a/test/unit/common/middleware/test_slo.py +++ b/test/unit/common/middleware/test_slo.py @@ -2248,6 +2248,16 @@ class TestSloGetManifest(SloTestCase): 'bytes=3-', None, 'bytes=0-2']) + ignore_range_headers = [ + c[2].get('X-Backend-Ignore-Range-If-Metadata-Present') + for c in self.app.calls_with_headers] + self.assertEqual(ignore_range_headers, [ + 'X-Static-Large-Object', + None, + None, + None, + None, + None]) # we set swift.source for everything but the first request self.assertIsNone(self.app.swift_sources[0]) self.assertEqual(self.app.swift_sources[1:], diff --git a/test/unit/common/middleware/test_symlink.py b/test/unit/common/middleware/test_symlink.py index c1d83b3503..44ba9b2cf3 100644 --- a/test/unit/common/middleware/test_symlink.py +++ b/test/unit/common/middleware/test_symlink.py @@ -396,6 +396,8 @@ class TestSymlinkMiddleware(TestSymlinkMiddlewareBase): self.assertIn(('Content-Location', '/v1/a2/c1/o'), headers) calls = self.app.calls_with_headers req_headers['Host'] = 'localhost:80' + req_headers['X-Backend-Ignore-Range-If-Metadata-Present'] = \ + 'x-object-sysmeta-symlink-target' self.assertEqual(req_headers, calls[0].headers) req_headers['User-Agent'] = 'Swift' self.assertEqual(req_headers, calls[1].headers) @@ -564,6 +566,8 @@ class TestSymlinkMiddleware(TestSymlinkMiddlewareBase): self.assertIn(('Content-Location', '/v1/a2/c1/o'), headers) calls = self.app.calls_with_headers req_headers['Host'] = 'localhost:80' + req_headers['X-Backend-Ignore-Range-If-Metadata-Present'] = \ + 'x-object-sysmeta-symlink-target' self.assertEqual(req_headers, calls[0].headers) req_headers['User-Agent'] = 'Swift' self.assertEqual(req_headers, calls[1].headers) diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 3e5a5f16f7..adb43ccb51 100644 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -2986,6 +2986,43 @@ class TestObjectController(unittest.TestCase): self.assertEqual(resp.body, b'FY') self.assertEqual(resp.headers['content-length'], '2') + req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + req.range = 'bytes=100-' + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 416) + self.assertIn(b'Not Satisfiable', resp.body) + + # Proxy (SLO in particular) can say that if some metadata's present, + # it wants the whole thing + req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + req.range = 'bytes=1-3' + req.headers['X-Backend-Ignore-Range-If-Metadata-Present'] = \ + 'X-Object-Meta-1' + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, b'VERIFY') + self.assertEqual(resp.headers['content-length'], '6') + + # If it's not present, Range is still respected + req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + req.range = 'bytes=1-3' + req.headers['X-Backend-Ignore-Range-If-Metadata-Present'] = \ + 'X-Object-Meta-5' + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 206) + self.assertEqual(resp.body, b'ERI') + self.assertEqual(resp.headers['content-length'], '3') + + # Works like "any", not "all"; also works where we would've 416ed + req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + req.range = 'bytes=100-' + req.headers['X-Backend-Ignore-Range-If-Metadata-Present'] = \ + 'X-Object-Meta-1, X-Object-Meta-5' + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, b'VERIFY') + self.assertEqual(resp.headers['content-length'], '6') + objfile = os.path.join( self.testdir, 'sda1', storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 3b509c380d..117511a423 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -2374,6 +2374,14 @@ class TestReplicatedObjectController( self.assertEqual(res.status_int, 206) self.assertEqual(res.body, obj[10:201]) + req = Request.blank(path, environ={'REQUEST_METHOD': 'GET'}, headers={ + 'Content-Type': 'application/octet-stream', + 'X-Backend-Ignore-Range-If-Metadata-Present': 'Content-Type', + 'Range': 'bytes=10-200'}) + res = req.get_response(prosrv) + self.assertEqual(res.status_int, 200) + self.assertEqual(res.body, obj) + # multiple byte ranges req = Request.blank( path, @@ -8102,19 +8110,26 @@ class TestObjectECRangedGET(unittest.TestCase): assert headers[:len(exp)] == exp, \ "object PUT failed %s" % obj_name - def _get_obj(self, range_value, obj_name=None): + def _get_obj(self, range_value, obj_name=None, ignore_range_if=''): if obj_name is None: obj_name = self.obj_name + if ignore_range_if: + ignore_range_if = ( + 'X-Backend-Ignore-Range-If-Metadata-Present: %s\r\n' + % ignore_range_if) prolis = _test_sockets[0] sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile('rwb') - fd.write(('GET /v1/a/ec-con/%s HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'X-Storage-Token: t\r\n' - 'Range: %s\r\n' - '\r\n' % (obj_name, range_value)).encode('ascii')) + fd.write(( + 'GET /v1/a/ec-con/%s HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'X-Storage-Token: t\r\n' + 'Range: %s\r\n' + '%s' + '\r\n' % (obj_name, range_value, ignore_range_if) + ).encode('ascii')) fd.flush() headers = readuntil2crlfs(fd) # e.g. "HTTP/1.1 206 Partial Content\r\n..." @@ -8222,6 +8237,16 @@ class TestObjectECRangedGET(unittest.TestCase): self.assertEqual(len(gotten_obj), 4096) self.assertEqual(gotten_obj, self.aligned_obj[4096:8192]) + def test_ignore_range_if_metadata_present(self): + # Ranged GET that actually wants the whole object + status, headers, gotten_obj = self._get_obj( + "bytes=4096-8191", ignore_range_if='content-type') + self.assertEqual(status, 200) + self.assertEqual(headers['Content-Length'], str(len(self.obj))) + self.assertNotIn('Content-Range', headers) + self.assertEqual(len(gotten_obj), len(self.obj)) + self.assertEqual(gotten_obj, self.obj) + def test_byte_0(self): # Just the first byte, but it's index 0, so that's easy to get wrong status, headers, gotten_obj = self._get_obj("bytes=0-0")