From 2d25fe6ad3573b2a06b6b3e5e66493d7b0c55693 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Mon, 25 Jul 2016 15:06:23 -0700 Subject: [PATCH] Reduce backend requests for SLO If-Match / HEAD requests ... by storing SLO Etag and size in sysmeta. Previously, we had to GET the manifest for every HEAD or conditional request to an SLO. Worse, since SLO PUTs require that we HEAD every segment, we'd GET all included sub-SLO manifests. This was necessary so we could recompute the large object's Etag and content-length. Since we already know both of those during PUT, we'll now store it in object sysmeta. This allows us to: * satisfy HEAD requests based purely off of the manifest's HEAD response, and * perform the If-(None-)Match comparison on the object server, without any additional subrequests. Note that the large object content-length can't just be parsed from content-type -- with fast-POST enabled, the content-type coming out of the object-server won't necessarily include swift_bytes. Also note that we must still fall back to GETting the manifest if the sysmeta headers were not found. Otherwise, we'd break existing large objects. Change-Id: Ia6ad32354105515560b005cea750aa64a88c96f9 --- swift/common/middleware/slo.py | 144 +++++++++++++++--------- test/unit/common/middleware/helpers.py | 12 +- test/unit/common/middleware/test_slo.py | 137 ++++++++++++++++------ 3 files changed, 208 insertions(+), 85 deletions(-) diff --git a/swift/common/middleware/slo.py b/swift/common/middleware/slo.py index 02e6e0354b..24bc5a447b 100644 --- a/swift/common/middleware/slo.py +++ b/swift/common/middleware/slo.py @@ -200,15 +200,12 @@ the manifest and the segments it's referring to) in the container and account metadata which can be used for stats purposes. """ -from six.moves import range - from collections import defaultdict from datetime import datetime import json import mimetypes import re import six -from six import BytesIO from hashlib import md5 from swift.common.exceptions import ListingIterError, SegmentError from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \ @@ -219,7 +216,8 @@ from swift.common.utils import get_logger, config_true_value, \ get_valid_utf8_str, override_bytes_from_content_type, split_path, \ register_swift_info, RateLimitedIterator, quote, close_if_possible, \ closing_if_possible, LRUCache, StreamingPile -from swift.common.request_helpers import SegmentedIterable +from swift.common.request_helpers import SegmentedIterable, \ + get_sys_meta_prefix, update_etag_is_at_header from swift.common.constraints import check_utf8, MAX_BUFFERED_SLO_SEGMENTS from swift.common.http import HTTP_NOT_FOUND, HTTP_UNAUTHORIZED, is_success from swift.common.wsgi import WSGIContext, make_subrequest @@ -236,6 +234,9 @@ REQUIRED_SLO_KEYS = set(['path', 'etag', 'size_bytes']) OPTIONAL_SLO_KEYS = set(['range']) ALLOWED_SLO_KEYS = REQUIRED_SLO_KEYS | OPTIONAL_SLO_KEYS +SYSMETA_SLO_ETAG = get_sys_meta_prefix('object') + 'slo-etag' +SYSMETA_SLO_SIZE = get_sys_meta_prefix('object') + 'slo-size' + def parse_and_validate_input(req_body, req_path): """ @@ -361,25 +362,6 @@ def parse_and_validate_input(req_body, req_path): return parsed_data -class SloPutContext(WSGIContext): - def __init__(self, slo, slo_etag): - super(SloPutContext, self).__init__(slo.app) - self.slo_etag = '"' + slo_etag.hexdigest() + '"' - - def handle_slo_put(self, req, start_response): - app_resp = self._app_call(req.environ) - - for i in range(len(self._response_headers)): - if self._response_headers[i][0].lower() == 'etag': - self._response_headers[i] = ('Etag', self.slo_etag) - break - - start_response(self._response_status, - self._response_headers, - self._response_exc_info) - return app_resp - - class SloGetContext(WSGIContext): max_slo_recursion_depth = 10 @@ -539,6 +521,9 @@ class SloGetContext(WSGIContext): Note: this assumes that X-Static-Large-Object has already been found. """ if req.method == 'HEAD': + # We've already looked for SYSMETA_SLO_ETAG/SIZE in the response + # and didn't find them. We have to fetch the whole manifest and + # recompute. return True response_status = int(self._response_status[:3]) @@ -581,14 +566,31 @@ class SloGetContext(WSGIContext): what may be a static large object manifest (or may not). :param start_response: WSGI start_response callable """ + if req.params.get('multipart-manifest') != 'get': + # If this object is an SLO manifest, we may have saved off the + # large object etag during the original PUT. Send an + # X-Backend-Etag-Is-At header so that, if the SLO etag *was* + # saved, we can trust the object-server to respond appropriately + # to If-Match/If-None-Match requests. + update_etag_is_at_header(req, SYSMETA_SLO_ETAG) resp_iter = self._app_call(req.environ) # make sure this response is for a static large object manifest + slo_marker = slo_etag = slo_size = None for header, value in self._response_headers: - if (header.lower() == 'x-static-large-object' and - config_true_value(value)): + header = header.lower() + if header == SYSMETA_SLO_ETAG: + slo_etag = value + elif header == SYSMETA_SLO_SIZE: + slo_size = value + elif (header == 'x-static-large-object' and + config_true_value(value)): + slo_marker = value + + if slo_marker and slo_etag and slo_size: break - else: + + if not slo_marker: # Not a static large object manifest. Just pass it through. start_response(self._response_status, self._response_headers, @@ -614,6 +616,22 @@ class SloGetContext(WSGIContext): self._response_exc_info) return resp_iter + is_conditional = self._response_status.startswith(('304', '412')) and ( + req.if_match or req.if_none_match) + if slo_etag and slo_size and ( + req.method == 'HEAD' or is_conditional): + # Since we have length and etag, we can respond immediately + for i, (header, _value) in enumerate(self._response_headers): + lheader = header.lower() + if lheader == 'etag': + self._response_headers[i] = (header, '"%s"' % slo_etag) + elif lheader == 'content-length' and not is_conditional: + self._response_headers[i] = (header, slo_size) + start_response(self._response_status, + self._response_headers, + self._response_exc_info) + return resp_iter + if self._need_to_refetch_manifest(req): req.environ['swift.non_client_disconnect'] = True close_if_possible(resp_iter) @@ -659,8 +677,7 @@ class SloGetContext(WSGIContext): new_headers = [] for header, value in resp_headers: if header.lower() == 'content-length': - new_headers.append(('Content-Length', - len(json_data))) + new_headers.append(('Content-Length', len(json_data))) else: new_headers.append((header, value)) self._response_headers = new_headers @@ -680,23 +697,36 @@ class SloGetContext(WSGIContext): def get_or_head_response(self, req, resp_headers, resp_iter): segments = self._get_manifest_read(resp_iter) - etag = md5() - content_length = 0 - for seg_dict in segments: - if seg_dict.get('range'): - etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) - else: - etag.update(seg_dict['hash']) + slo_etag = None + content_length = None + response_headers = [] + for header, value in resp_headers: + lheader = header.lower() + if lheader == SYSMETA_SLO_ETAG: + slo_etag = value + elif lheader == SYSMETA_SLO_SIZE: + content_length = value + elif lheader not in ('etag', 'content-length'): + response_headers.append((header, value)) - if config_true_value(seg_dict.get('sub_slo')): - override_bytes_from_content_type( - seg_dict, logger=self.slo.logger) - content_length += self._segment_length(seg_dict) + if slo_etag is None or content_length is None: + etag = md5() + content_length = 0 + for seg_dict in segments: + if seg_dict.get('range'): + etag.update('%s:%s;' % (seg_dict['hash'], + seg_dict['range'])) + else: + etag.update(seg_dict['hash']) + + if config_true_value(seg_dict.get('sub_slo')): + override_bytes_from_content_type( + seg_dict, logger=self.slo.logger) + content_length += self._segment_length(seg_dict) + slo_etag = etag.hexdigest() - response_headers = [(h, v) for h, v in resp_headers - if h.lower() not in ('etag', 'content-length')] response_headers.append(('Content-Length', str(content_length))) - response_headers.append(('Etag', '"%s"' % etag.hexdigest())) + response_headers.append(('Etag', '"%s"' % slo_etag)) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) @@ -942,7 +972,6 @@ class StaticLargeObject(object): resp_body = get_response_body( out_content_type, {}, problem_segments) raise HTTPBadRequest(resp_body, content_type=out_content_type) - env = req.environ slo_etag = md5() for seg_data in data_for_storage: @@ -952,20 +981,33 @@ class StaticLargeObject(object): else: slo_etag.update(seg_data['hash']) + slo_etag = slo_etag.hexdigest() + req.headers.update({ + SYSMETA_SLO_ETAG: slo_etag, + SYSMETA_SLO_SIZE: total_size, + 'X-Static-Large-Object': 'True', + }) + + json_data = json.dumps(data_for_storage) + if six.PY3: + json_data = json_data.encode('utf-8') + req.body = json_data + + env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream' env['swift.content_type_overridden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size - env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True' - json_data = json.dumps(data_for_storage) - if six.PY3: - json_data = json_data.encode('utf-8') - env['CONTENT_LENGTH'] = str(len(json_data)) - env['wsgi.input'] = BytesIO(json_data) - slo_put_context = SloPutContext(self, slo_etag) - return slo_put_context.handle_slo_put(req, start_response) + def start_response_wrapper(status, headers, exc_info=None): + for i, (header, _value) in enumerate(headers): + if header.lower() == 'etag': + headers[i] = ('Etag', '"%s"' % slo_etag) + break + return start_response(status, headers, exc_info) + + return self.app(env, start_response_wrapper) def get_segments_to_delete_iter(self, req): """ diff --git a/test/unit/common/middleware/helpers.py b/test/unit/common/middleware/helpers.py index fa8e675c09..ea4392a0d4 100644 --- a/test/unit/common/middleware/helpers.py +++ b/test/unit/common/middleware/helpers.py @@ -154,15 +154,23 @@ class FakeSwift(object): self._calls.append( FakeSwiftCall(method, path, HeaderKeyDict(req.headers))) + backend_etag_header = req.headers.get('X-Backend-Etag-Is-At') + conditional_etag = None + if backend_etag_header and backend_etag_header in headers: + # Apply conditional etag overrides + conditional_etag = headers[backend_etag_header] + # range requests ought to work, hence conditional_response=True if isinstance(body, list): resp = resp_class( req=req, headers=headers, app_iter=body, - conditional_response=req.method in ('GET', 'HEAD')) + conditional_response=req.method in ('GET', 'HEAD'), + conditional_etag=conditional_etag) else: resp = resp_class( req=req, headers=headers, body=body, - conditional_response=req.method in ('GET', 'HEAD')) + conditional_response=req.method in ('GET', 'HEAD'), + conditional_etag=conditional_etag) wsgi_iter = resp(env, start_response) self.mark_opened(path) return LeakTrackingIter(wsgi_iter, self, path) diff --git a/test/unit/common/middleware/test_slo.py b/test/unit/common/middleware/test_slo.py index 8e61d143a8..f0aba94a12 100644 --- a/test/unit/common/middleware/test_slo.py +++ b/test/unit/common/middleware/test_slo.py @@ -414,7 +414,9 @@ class TestSloPutManifest(SloTestCase): '/v1/AUTH_test/c/man?multipart-manifest=put', environ={'REQUEST_METHOD': 'PUT'}, headers={'Accept': 'test'}, body=test_json_data) - self.assertNotIn('X-Static-Large-Object', req.headers) + for h in ('X-Static-Large-Object', 'X-Object-Sysmeta-Slo-Etag', + 'X-Object-Sysmeta-Slo-Size'): + self.assertNotIn(h, req.headers) def my_fake_start_response(*args, **kwargs): gen_etag = '"' + md5hex('etagoftheobjectsegment') + '"' @@ -423,6 +425,11 @@ class TestSloPutManifest(SloTestCase): self.slo(req.environ, my_fake_start_response) self.assertIn('X-Static-Large-Object', req.headers) self.assertEqual(req.headers['X-Static-Large-Object'], 'True') + self.assertIn('X-Object-Sysmeta-Slo-Etag', req.headers) + self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Etag'], + md5hex('etagoftheobjectsegment')) + self.assertIn('X-Object-Sysmeta-Slo-Size', req.headers) + self.assertEqual(req.headers['X-Object-Sysmeta-Slo-Size'], '100') self.assertIn('Content-Type', req.headers) self.assertTrue( req.headers['Content-Type'].endswith(';swift_bytes=100'), @@ -1078,11 +1085,11 @@ class TestSloDeleteManifest(SloTestCase): 'man-all-there?multipart-manifest=delete'))])) -class TestSloHeadManifest(SloTestCase): +class TestSloHeadOldManifest(SloTestCase): slo_etag = md5hex("seg01-hashseg02-hash") def setUp(self): - super(TestSloHeadManifest, self).setUp() + super(TestSloHeadOldManifest, self).setUp() manifest_json = json.dumps([ {'name': '/gettest/seg01', 'bytes': '100', @@ -1100,6 +1107,8 @@ class TestSloHeadManifest(SloTestCase): 'X-Static-Large-Object': 'true', 'Etag': md5hex(manifest_json)} manifest_headers.update(getattr(self, 'extra_manifest_headers', {})) + self.manifest_has_sysmeta = all(h in manifest_headers for h in ( + 'X-Object-Sysmeta-Slo-Etag', 'X-Object-Sysmeta-Slo-Size')) self.app.register( 'GET', '/v1/AUTH_test/headtest/man', swob.HTTPOk, manifest_headers, manifest_json) @@ -1116,9 +1125,9 @@ class TestSloHeadManifest(SloTestCase): self.assertIn(('Content-Type', 'test/data'), headers) self.assertEqual(body, '') # it's a HEAD request, after all - expected_app_calls = [ - ('HEAD', '/v1/AUTH_test/headtest/man'), - ('GET', '/v1/AUTH_test/headtest/man')] + expected_app_calls = [('HEAD', '/v1/AUTH_test/headtest/man')] + if not self.manifest_has_sysmeta: + expected_app_calls.append(('GET', '/v1/AUTH_test/headtest/man')) self.assertEqual(self.app.calls, expected_app_calls) def test_if_none_match_etag_matching(self): @@ -1132,9 +1141,9 @@ class TestSloHeadManifest(SloTestCase): self.assertIn(('Content-Length', '0'), headers) self.assertIn(('Content-Type', 'test/data'), headers) - expected_app_calls = [ - ('HEAD', '/v1/AUTH_test/headtest/man'), - ('GET', '/v1/AUTH_test/headtest/man')] + expected_app_calls = [('HEAD', '/v1/AUTH_test/headtest/man')] + if not self.manifest_has_sysmeta: + expected_app_calls.append(('GET', '/v1/AUTH_test/headtest/man')) self.assertEqual(self.app.calls, expected_app_calls) def test_if_match_etag_not_matching(self): @@ -1148,12 +1157,21 @@ class TestSloHeadManifest(SloTestCase): self.assertIn(('Content-Length', '0'), headers) self.assertIn(('Content-Type', 'test/data'), headers) - expected_app_calls = [ - ('HEAD', '/v1/AUTH_test/headtest/man'), - ('GET', '/v1/AUTH_test/headtest/man')] + expected_app_calls = [('HEAD', '/v1/AUTH_test/headtest/man')] + if not self.manifest_has_sysmeta: + expected_app_calls.append(('GET', '/v1/AUTH_test/headtest/man')) self.assertEqual(self.app.calls, expected_app_calls) +class TestSloHeadManifest(TestSloHeadOldManifest): + def setUp(self): + self.extra_manifest_headers = { + 'X-Object-Sysmeta-Slo-Etag': self.slo_etag, + 'X-Object-Sysmeta-Slo-Size': '300', + } + super(TestSloHeadManifest, self).setUp() + + class TestSloGetRawManifest(SloTestCase): def setUp(self): @@ -2759,7 +2777,7 @@ class TestSloGetManifest(SloTestCase): 'ERROR: An error occurred while retrieving segments')) -class TestSloConditionalGetManifest(SloTestCase): +class TestSloConditionalGetOldManifest(SloTestCase): slo_data = [ {'name': '/gettest/a_5', 'hash': md5hex("a" * 5), 'content_type': 'text/plain', 'bytes': '5'}, @@ -2772,7 +2790,7 @@ class TestSloConditionalGetManifest(SloTestCase): slo_etag = md5hex(''.join(seg['hash'] for seg in slo_data)) def setUp(self): - super(TestSloConditionalGetManifest, self).setUp() + super(TestSloConditionalGetOldManifest, self).setUp() # some plain old objects self.app.register( @@ -2816,6 +2834,8 @@ class TestSloConditionalGetManifest(SloTestCase): 'X-Static-Large-Object': 'true', 'Etag': md5hex(_abcd_manifest_json)} manifest_headers.update(getattr(self, 'extra_manifest_headers', {})) + self.manifest_has_sysmeta = all(h in manifest_headers for h in ( + 'X-Object-Sysmeta-Slo-Etag', 'X-Object-Sysmeta-Slo-Size')) self.app.register( 'GET', '/v1/AUTH_test/gettest/manifest-abcd', swob.HTTPOk, manifest_headers, @@ -2833,13 +2853,16 @@ class TestSloConditionalGetManifest(SloTestCase): self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) self.assertEqual(body, '') - expected_app_calls = [ - ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), - # Need to verify the first segment - ('GET', '/v1/AUTH_test/gettest/manifest-bc'), - ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), - ] + expected_app_calls = [('GET', '/v1/AUTH_test/gettest/manifest-abcd')] + if not self.manifest_has_sysmeta: + # We *still* verify the first segment + expected_app_calls.extend([ + ('GET', '/v1/AUTH_test/gettest/manifest-bc'), + ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), + ]) self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual(self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'x-object-sysmeta-slo-etag') def test_if_none_match_does_not_match(self): req = Request.blank( @@ -2863,6 +2886,8 @@ class TestSloConditionalGetManifest(SloTestCase): ('GET', '/v1/AUTH_test/gettest/d_20?multipart-manifest=get'), ] self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual(self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'x-object-sysmeta-slo-etag') def test_if_match_matches(self): req = Request.blank( @@ -2877,17 +2902,21 @@ class TestSloConditionalGetManifest(SloTestCase): self.assertEqual( body, 'aaaaabbbbbbbbbbcccccccccccccccdddddddddddddddddddd') - expected_app_calls = [ - ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), + expected_app_calls = [('GET', '/v1/AUTH_test/gettest/manifest-abcd')] + if not self.manifest_has_sysmeta: # Manifest never matches -> got back a 412; need to re-fetch - ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), + expected_app_calls.append( + ('GET', '/v1/AUTH_test/gettest/manifest-abcd')) + expected_app_calls.extend([ ('GET', '/v1/AUTH_test/gettest/manifest-bc'), ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), ('GET', '/v1/AUTH_test/gettest/b_10?multipart-manifest=get'), ('GET', '/v1/AUTH_test/gettest/c_15?multipart-manifest=get'), ('GET', '/v1/AUTH_test/gettest/d_20?multipart-manifest=get'), - ] + ]) self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual(self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'x-object-sysmeta-slo-etag') def test_if_match_does_not_match(self): req = Request.blank( @@ -2901,15 +2930,18 @@ class TestSloConditionalGetManifest(SloTestCase): self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) self.assertEqual(body, '') - expected_app_calls = [ - ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), - # Manifest never matches -> got back a 412; need to re-fetch - ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), - # We need to verify the first segment - ('GET', '/v1/AUTH_test/gettest/manifest-bc'), - ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), - ] + expected_app_calls = [('GET', '/v1/AUTH_test/gettest/manifest-abcd')] + if not self.manifest_has_sysmeta: + # We *still* verify the first segment + expected_app_calls.extend([ + # Manifest never matches -> got back a 412; need to re-fetch + ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), + ('GET', '/v1/AUTH_test/gettest/manifest-bc'), + ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), + ]) self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual(self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'x-object-sysmeta-slo-etag') def test_if_match_matches_and_range(self): req = Request.blank( @@ -2935,6 +2967,47 @@ class TestSloConditionalGetManifest(SloTestCase): ('GET', '/v1/AUTH_test/gettest/b_10?multipart-manifest=get'), ] self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual(self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'x-object-sysmeta-slo-etag') + + def test_if_match_matches_passthrough(self): + # first fetch and stash the manifest etag + req = Request.blank( + '/v1/AUTH_test/gettest/manifest-abcd?multipart-manifest=get', + environ={'REQUEST_METHOD': 'GET'}) + status, headers, body = self.call_slo(req) + + self.assertEqual(status, '200 OK') + headers = HeaderKeyDict(headers) + self.assertEqual('application/json; charset=utf-8', + headers['Content-Type']) + manifest_etag = headers['Etag'] + + # now use it as a condition and expect to match + req = Request.blank( + '/v1/AUTH_test/gettest/manifest-abcd?multipart-manifest=get', + environ={'REQUEST_METHOD': 'GET'}, + headers={'If-Match': manifest_etag}) + status, headers, body = self.call_slo(req) + self.assertEqual(status, '200 OK') + headers = HeaderKeyDict(headers) + self.assertEqual(manifest_etag, headers['Etag']) + + expected_app_calls = [ + ('GET', + '/v1/AUTH_test/gettest/manifest-abcd?multipart-manifest=get')] * 2 + self.assertEqual(self.app.calls, expected_app_calls) + self.assertNotIn('X-Backend-Etag-Is-At', self.app.headers[0]) + self.assertNotIn('X-Backend-Etag-Is-At', self.app.headers[1]) + + +class TestSloConditionalGetNewManifest(TestSloConditionalGetOldManifest): + def setUp(self): + self.extra_manifest_headers = { + 'X-Object-Sysmeta-Slo-Etag': self.slo_etag, + 'X-Object-Sysmeta-Slo-Size': '50', + } + super(TestSloConditionalGetNewManifest, self).setUp() class TestSloBulkLogger(unittest.TestCase):