From 7a50972104095478c91a477f5c5499dda4372711 Mon Sep 17 00:00:00 2001 From: John Dickinson Date: Thu, 9 Jun 2016 11:22:37 -0700 Subject: [PATCH 1/7] update .gitreview Change-Id: I9593e453891c137fd430a44306e17268ba45fd12 --- .gitreview | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitreview b/.gitreview index d7c52c0593..8dcf603328 100644 --- a/.gitreview +++ b/.gitreview @@ -2,3 +2,4 @@ host=review.openstack.org port=29418 project=openstack/swift.git +defaultbranch=feature/crypto-review From 928c4790ebce3782f42d239faa9758941a8dd296 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Tue, 7 Jun 2016 13:41:55 +0100 Subject: [PATCH 2/7] Refactor tests and add tests Relocates some test infrastructure in preparation for use with encryption tests, in particular moves the test server setup code from test/unit/proxy/test_server.py to a new helpers.py so that it can be re-used, and adds ability to specify additional config options for the test servers (used in encryption tests). Adds unit test coverage for extract_swift_bytes and functional test coverage for container listings. Adds a check on the content and metadata of reconciled objects in probe tests. Change-Id: I9bfbf4e47cb0eb370e7a74d18c78d67b6b9d6645 --- test/functional/swift_test_client.py | 1 + test/functional/tests.py | 24 ++ test/probe/brain.py | 6 +- .../test_container_merge_policy_index.py | 84 +++--- test/unit/common/middleware/helpers.py | 10 + .../common/middleware/test_proxy_logging.py | 7 +- test/unit/common/test_utils.py | 18 ++ test/unit/helpers.py | 271 ++++++++++++++++++ test/unit/proxy/test_server.py | 226 ++------------- 9 files changed, 393 insertions(+), 254 deletions(-) create mode 100644 test/unit/helpers.py diff --git a/test/functional/swift_test_client.py b/test/functional/swift_test_client.py index 3c9bb0b5e2..98262f5892 100644 --- a/test/functional/swift_test_client.py +++ b/test/functional/swift_test_client.py @@ -585,6 +585,7 @@ class Container(Base): file_item['name'] = file_item['name'].encode('utf-8') file_item['content_type'] = file_item['content_type'].\ encode('utf-8') + file_item['bytes'] = int(file_item['bytes']) return files else: content = self.conn.response.read() diff --git a/test/functional/tests.py b/test/functional/tests.py index d083aa10c2..78f1f33be1 100644 --- a/test/functional/tests.py +++ b/test/functional/tests.py @@ -744,6 +744,30 @@ class TestContainer(Base): for file_item in files: self.assertIn(file_item, self.env.files) + def _testContainerFormattedFileList(self, format_type): + expected = {} + for name in self.env.files: + expected[name] = self.env.container.file(name).info() + + file_list = self.env.container.files(parms={'format': format_type}) + self.assert_status(200) + for actual in file_list: + name = actual['name'] + self.assertIn(name, expected) + self.assertEqual(expected[name]['etag'], actual['hash']) + self.assertEqual( + expected[name]['content_type'], actual['content_type']) + self.assertEqual( + expected[name]['content_length'], actual['bytes']) + expected.pop(name) + self.assertFalse(expected) # sanity check + + def testContainerJsonFileList(self): + self._testContainerFormattedFileList('json') + + def testContainerXmlFileList(self): + self._testContainerFormattedFileList('xml') + def testMarkerLimitFileList(self): for format_type in [None, 'json', 'xml']: for marker in ['0', 'A', 'I', 'R', 'Z', 'a', 'i', 'r', 'z', diff --git a/test/probe/brain.py b/test/probe/brain.py index 9f90ed8d8b..3a63b18565 100644 --- a/test/probe/brain.py +++ b/test/probe/brain.py @@ -164,12 +164,12 @@ class BrainSplitter(object): client.delete_container(self.url, self.token, self.container_name) @command - def put_object(self, headers=None): + def put_object(self, headers=None, contents=None): """ - issue put for zero byte test object + issue put for test object """ client.put_object(self.url, self.token, self.container_name, - self.object_name, headers=headers) + self.object_name, headers=headers, contents=contents) @command def delete_object(self): diff --git a/test/probe/test_container_merge_policy_index.py b/test/probe/test_container_merge_policy_index.py index 829329a7eb..cd60e6dead 100644 --- a/test/probe/test_container_merge_policy_index.py +++ b/test/probe/test_container_merge_policy_index.py @@ -46,6 +46,24 @@ class TestContainerMergePolicyIndex(ReplProbeTest): self.brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'container') + def _get_object_patiently(self, policy_index): + # use proxy to access object (bad container info might be cached...) + timeout = time.time() + TIMEOUT + while time.time() < timeout: + try: + return client.get_object(self.url, self.token, + self.container_name, + self.object_name) + except ClientException as err: + if err.http_status != HTTP_NOT_FOUND: + raise + time.sleep(1) + else: + self.fail('could not HEAD /%s/%s/%s/ from policy %s ' + 'after %s seconds.' % ( + self.account, self.container_name, self.object_name, + int(policy_index), TIMEOUT)) + def test_merge_storage_policy_index(self): # generic split brain self.brain.stop_primary_half() @@ -53,7 +71,8 @@ class TestContainerMergePolicyIndex(ReplProbeTest): self.brain.start_primary_half() self.brain.stop_handoff_half() self.brain.put_container() - self.brain.put_object() + self.brain.put_object(headers={'x-object-meta-test': 'custom-meta'}, + contents='VERIFY') self.brain.start_handoff_half() # make sure we have some manner of split brain container_part, container_nodes = self.container_ring.get_nodes( @@ -127,24 +146,10 @@ class TestContainerMergePolicyIndex(ReplProbeTest): self.fail('Found /%s/%s/%s in %s' % ( self.account, self.container_name, self.object_name, orig_policy_index)) - # use proxy to access object (bad container info might be cached...) - timeout = time.time() + TIMEOUT - while time.time() < timeout: - try: - metadata = client.head_object(self.url, self.token, - self.container_name, - self.object_name) - except ClientException as err: - if err.http_status != HTTP_NOT_FOUND: - raise - time.sleep(1) - else: - break - else: - self.fail('could not HEAD /%s/%s/%s/ from policy %s ' - 'after %s seconds.' % ( - self.account, self.container_name, self.object_name, - expected_policy_index, TIMEOUT)) + # verify that the object data read by external client is correct + headers, data = self._get_object_patiently(expected_policy_index) + self.assertEqual('VERIFY', data) + self.assertEqual('custom-meta', headers['x-object-meta-test']) def test_reconcile_delete(self): # generic split brain @@ -399,17 +404,18 @@ class TestContainerMergePolicyIndex(ReplProbeTest): self.assertEqual(2, len(old_container_node_ids)) # hopefully memcache still has the new policy cached - self.brain.put_object() + self.brain.put_object(headers={'x-object-meta-test': 'custom-meta'}, + contents='VERIFY') # double-check object correctly written to new policy conf_files = [] for server in Manager(['container-reconciler']).servers: conf_files.extend(server.conf_files()) conf_file = conf_files[0] - client = InternalClient(conf_file, 'probe-test', 3) - client.get_object_metadata( + int_client = InternalClient(conf_file, 'probe-test', 3) + int_client.get_object_metadata( self.account, self.container_name, self.object_name, headers={'X-Backend-Storage-Policy-Index': int(new_policy)}) - client.get_object_metadata( + int_client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4,), headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) @@ -423,9 +429,9 @@ class TestContainerMergePolicyIndex(ReplProbeTest): tuple(server.once(number=n + 1) for n in old_container_node_ids) # verify entry in the queue for the "misplaced" new_policy - for container in client.iter_containers('.misplaced_objects'): - for obj in client.iter_objects('.misplaced_objects', - container['name']): + for container in int_client.iter_containers('.misplaced_objects'): + for obj in int_client.iter_objects('.misplaced_objects', + container['name']): expected = '%d:/%s/%s/%s' % (new_policy, self.account, self.container_name, self.object_name) @@ -434,12 +440,12 @@ class TestContainerMergePolicyIndex(ReplProbeTest): Manager(['container-reconciler']).once() # verify object in old_policy - client.get_object_metadata( + int_client.get_object_metadata( self.account, self.container_name, self.object_name, headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) # verify object is *not* in new_policy - client.get_object_metadata( + int_client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4,), headers={'X-Backend-Storage-Policy-Index': int(new_policy)}) @@ -447,10 +453,9 @@ class TestContainerMergePolicyIndex(ReplProbeTest): self.get_to_final_state() # verify entry in the queue - client = InternalClient(conf_file, 'probe-test', 3) - for container in client.iter_containers('.misplaced_objects'): - for obj in client.iter_objects('.misplaced_objects', - container['name']): + for container in int_client.iter_containers('.misplaced_objects'): + for obj in int_client.iter_objects('.misplaced_objects', + container['name']): expected = '%d:/%s/%s/%s' % (old_policy, self.account, self.container_name, self.object_name) @@ -459,21 +464,26 @@ class TestContainerMergePolicyIndex(ReplProbeTest): Manager(['container-reconciler']).once() # and now it flops back - client.get_object_metadata( + int_client.get_object_metadata( self.account, self.container_name, self.object_name, headers={'X-Backend-Storage-Policy-Index': int(new_policy)}) - client.get_object_metadata( + int_client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4,), headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) # make sure the queue is settled self.get_to_final_state() - for container in client.iter_containers('.misplaced_objects'): - for obj in client.iter_objects('.misplaced_objects', - container['name']): + for container in int_client.iter_containers('.misplaced_objects'): + for obj in int_client.iter_objects('.misplaced_objects', + container['name']): self.fail('Found unexpected object %r in the queue' % obj) + # verify that the object data read by external client is correct + headers, data = self._get_object_patiently(int(new_policy)) + self.assertEqual('VERIFY', data) + self.assertEqual('custom-meta', headers['x-object-meta-test']) + if __name__ == "__main__": unittest.main() diff --git a/test/unit/common/middleware/helpers.py b/test/unit/common/middleware/helpers.py index bcd3c4c2ec..e542818967 100644 --- a/test/unit/common/middleware/helpers.py +++ b/test/unit/common/middleware/helpers.py @@ -168,3 +168,13 @@ class FakeSwift(object): def register_responses(self, method, path, responses): self._responses[(method, path)] = list(responses) + + +class FakeAppThatExcepts(object): + MESSAGE = "We take exception to that!" + + def __init__(self, exception_class=Exception): + self.exception_class = exception_class + + def __call__(self, env, start_response): + raise self.exception_class(self.MESSAGE) diff --git a/test/unit/common/middleware/test_proxy_logging.py b/test/unit/common/middleware/test_proxy_logging.py index 19866cb793..2282a9f1b7 100644 --- a/test/unit/common/middleware/test_proxy_logging.py +++ b/test/unit/common/middleware/test_proxy_logging.py @@ -27,6 +27,7 @@ from swift.common.swob import Request, Response from swift.common import constraints from swift.common.storage_policy import StoragePolicy from test.unit import patch_policies +from test.unit.common.middleware.helpers import FakeAppThatExcepts class FakeApp(object): @@ -59,12 +60,6 @@ class FakeApp(object): return self.body -class FakeAppThatExcepts(object): - - def __call__(self, env, start_response): - raise Exception("We take exception to that!") - - class FakeAppNoContentLengthNoTransferEncoding(object): def __init__(self, body=None): diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 14e826c908..446abfc1fa 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -3210,6 +3210,24 @@ cluster_dfw1 = http://dfw1.host/v1/ self.assertEqual(listing_dict['content_type'], 'text/plain;hello="world"') + def test_extract_swift_bytes(self): + scenarios = { + # maps input value -> expected returned tuple + '': ('', None), + 'text/plain': ('text/plain', None), + 'text/plain; other=thing': ('text/plain;other=thing', None), + 'text/plain; swift_bytes=123': ('text/plain', '123'), + 'text/plain; other=thing;swift_bytes=123': + ('text/plain;other=thing', '123'), + 'text/plain; swift_bytes=123; other=thing': + ('text/plain;other=thing', '123'), + 'text/plain; swift_bytes=123; swift_bytes=456': + ('text/plain', '456'), + 'text/plain; swift_bytes=123; other=thing;swift_bytes=456': + ('text/plain;other=thing', '456')} + for test_value, expected in scenarios.items(): + self.assertEqual(expected, utils.extract_swift_bytes(test_value)) + def test_clean_content_type(self): subtests = { '': '', 'text/plain': 'text/plain', diff --git a/test/unit/helpers.py b/test/unit/helpers.py new file mode 100644 index 0000000000..46f4b80b1e --- /dev/null +++ b/test/unit/helpers.py @@ -0,0 +1,271 @@ +# Copyright (c) 2010-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Provides helper functions for unit tests. + +This cannot be in test/unit/__init__.py because that module is imported by the +py34 unit test job and there are imports here that end up importing modules +that are not yet ported to py34, such wsgi.py which import mimetools. +""" +import os +from contextlib import closing +from gzip import GzipFile +from tempfile import mkdtemp +import time + + +from eventlet import listen, spawn, wsgi +import mock +from shutil import rmtree +import six.moves.cPickle as pickle + +import swift +from swift.account import server as account_server +from swift.common import storage_policy +from swift.common.ring import RingData +from swift.common.storage_policy import StoragePolicy, ECStoragePolicy +from swift.common.middleware import proxy_logging +from swift.common import utils +from swift.common.utils import mkdirs, normalize_timestamp, NullLogger +from swift.container import server as container_server +from swift.obj import server as object_server +from swift.proxy import server as proxy_server +import swift.proxy.controllers.obj + +from test.unit import write_fake_ring, DEFAULT_TEST_EC_TYPE, debug_logger, \ + connect_tcp, readuntil2crlfs + + +def setup_servers(the_object_server=object_server, extra_conf=None): + """ + Setup proxy, account, container and object servers using a set of fake + rings and policies. + + :param the_object_server: The object server module to use (optional, + defaults to swift.obj.server) + :param extra_conf: A dict of config options that will update the basic + config passed to all server instances. + :returns: A dict containing the following entries: + orig_POLICIES: the value of storage_policy.POLICIES prior to + it being patched with fake policies + orig_SysLogHandler: the value of utils.SysLogHandler prior to + it being patched + testdir: root directory used for test files + test_POLICIES: a StoragePolicyCollection of fake policies + test_servers: a tuple of test server instances + test_sockets: a tuple of sockets used by test servers + test_coros: a tuple of greenthreads in which test servers are + running + """ + context = { + "orig_POLICIES": storage_policy._POLICIES, + "orig_SysLogHandler": utils.SysLogHandler} + + utils.HASH_PATH_SUFFIX = 'endcap' + utils.SysLogHandler = mock.MagicMock() + # Since we're starting up a lot here, we're going to test more than + # just chunked puts; we're also going to test parts of + # proxy_server.Application we couldn't get to easily otherwise. + context["testdir"] = _testdir = \ + os.path.join(mkdtemp(), 'tmp_test_proxy_server_chunked') + mkdirs(_testdir) + rmtree(_testdir) + for drive in ('sda1', 'sdb1', 'sdc1', 'sdd1', 'sde1', + 'sdf1', 'sdg1', 'sdh1', 'sdi1'): + mkdirs(os.path.join(_testdir, drive, 'tmp')) + conf = {'devices': _testdir, 'swift_dir': _testdir, + 'mount_check': 'false', 'allowed_headers': + 'content-encoding, x-object-manifest, content-disposition, foo', + 'allow_versions': 't'} + if extra_conf: + conf.update(extra_conf) + prolis = listen(('localhost', 0)) + acc1lis = listen(('localhost', 0)) + acc2lis = listen(('localhost', 0)) + con1lis = listen(('localhost', 0)) + con2lis = listen(('localhost', 0)) + obj1lis = listen(('localhost', 0)) + obj2lis = listen(('localhost', 0)) + obj3lis = listen(('localhost', 0)) + objsocks = [obj1lis, obj2lis, obj3lis] + context["test_sockets"] = \ + (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, obj2lis, obj3lis) + account_ring_path = os.path.join(_testdir, 'account.ring.gz') + account_devs = [ + {'port': acc1lis.getsockname()[1]}, + {'port': acc2lis.getsockname()[1]}, + ] + write_fake_ring(account_ring_path, *account_devs) + container_ring_path = os.path.join(_testdir, 'container.ring.gz') + container_devs = [ + {'port': con1lis.getsockname()[1]}, + {'port': con2lis.getsockname()[1]}, + ] + write_fake_ring(container_ring_path, *container_devs) + storage_policy._POLICIES = storage_policy.StoragePolicyCollection([ + StoragePolicy(0, 'zero', True), + StoragePolicy(1, 'one', False), + StoragePolicy(2, 'two', False), + ECStoragePolicy(3, 'ec', ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=2, ec_nparity=1, ec_segment_size=4096)]) + obj_rings = { + 0: ('sda1', 'sdb1'), + 1: ('sdc1', 'sdd1'), + 2: ('sde1', 'sdf1'), + # sdg1, sdh1, sdi1 taken by policy 3 (see below) + } + for policy_index, devices in obj_rings.items(): + policy = storage_policy.POLICIES[policy_index] + obj_ring_path = os.path.join(_testdir, policy.ring_name + '.ring.gz') + obj_devs = [ + {'port': objsock.getsockname()[1], 'device': dev} + for objsock, dev in zip(objsocks, devices)] + write_fake_ring(obj_ring_path, *obj_devs) + + # write_fake_ring can't handle a 3-element ring, and the EC policy needs + # at least 3 devs to work with, so we do it manually + devs = [{'id': 0, 'zone': 0, 'device': 'sdg1', 'ip': '127.0.0.1', + 'port': obj1lis.getsockname()[1]}, + {'id': 1, 'zone': 0, 'device': 'sdh1', 'ip': '127.0.0.1', + 'port': obj2lis.getsockname()[1]}, + {'id': 2, 'zone': 0, 'device': 'sdi1', 'ip': '127.0.0.1', + 'port': obj3lis.getsockname()[1]}] + pol3_replica2part2dev_id = [[0, 1, 2, 0], + [1, 2, 0, 1], + [2, 0, 1, 2]] + obj3_ring_path = os.path.join( + _testdir, storage_policy.POLICIES[3].ring_name + '.ring.gz') + part_shift = 30 + with closing(GzipFile(obj3_ring_path, 'wb')) as fh: + pickle.dump(RingData(pol3_replica2part2dev_id, devs, part_shift), fh) + + prosrv = proxy_server.Application(conf, logger=debug_logger('proxy')) + for policy in storage_policy.POLICIES: + # make sure all the rings are loaded + prosrv.get_object_ring(policy.idx) + # don't lose this one! + context["test_POLICIES"] = storage_policy._POLICIES + acc1srv = account_server.AccountController( + conf, logger=debug_logger('acct1')) + acc2srv = account_server.AccountController( + conf, logger=debug_logger('acct2')) + con1srv = container_server.ContainerController( + conf, logger=debug_logger('cont1')) + con2srv = container_server.ContainerController( + conf, logger=debug_logger('cont2')) + obj1srv = the_object_server.ObjectController( + conf, logger=debug_logger('obj1')) + obj2srv = the_object_server.ObjectController( + conf, logger=debug_logger('obj2')) + obj3srv = the_object_server.ObjectController( + conf, logger=debug_logger('obj3')) + context["test_servers"] = \ + (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, obj2srv, obj3srv) + nl = NullLogger() + logging_prosv = proxy_logging.ProxyLoggingMiddleware(prosrv, conf, + logger=prosrv.logger) + prospa = spawn(wsgi.server, prolis, logging_prosv, nl) + acc1spa = spawn(wsgi.server, acc1lis, acc1srv, nl) + acc2spa = spawn(wsgi.server, acc2lis, acc2srv, nl) + con1spa = spawn(wsgi.server, con1lis, con1srv, nl) + con2spa = spawn(wsgi.server, con2lis, con2srv, nl) + obj1spa = spawn(wsgi.server, obj1lis, obj1srv, nl) + obj2spa = spawn(wsgi.server, obj2lis, obj2srv, nl) + obj3spa = spawn(wsgi.server, obj3lis, obj3srv, nl) + context["test_coros"] = \ + (prospa, acc1spa, acc2spa, con1spa, con2spa, obj1spa, obj2spa, obj3spa) + # Create account + ts = normalize_timestamp(time.time()) + partition, nodes = prosrv.account_ring.get_nodes('a') + for node in nodes: + conn = swift.proxy.controllers.obj.http_connect(node['ip'], + node['port'], + node['device'], + partition, 'PUT', '/a', + {'X-Timestamp': ts, + 'x-trans-id': 'test'}) + resp = conn.getresponse() + assert(resp.status == 201) + # Create another account + # used for account-to-account tests + ts = normalize_timestamp(time.time()) + partition, nodes = prosrv.account_ring.get_nodes('a1') + for node in nodes: + conn = swift.proxy.controllers.obj.http_connect(node['ip'], + node['port'], + node['device'], + partition, 'PUT', + '/a1', + {'X-Timestamp': ts, + 'x-trans-id': 'test'}) + resp = conn.getresponse() + assert(resp.status == 201) + # Create containers, 1 per test policy + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/c HTTP/1.1\r\nHost: localhost\r\n' + 'Connection: close\r\nX-Auth-Token: t\r\n' + 'Content-Length: 0\r\n\r\n') + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + assert headers[:len(exp)] == exp, "Expected '%s', encountered '%s'" % ( + exp, headers[:len(exp)]) + # Create container in other account + # used for account-to-account tests + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a1/c1 HTTP/1.1\r\nHost: localhost\r\n' + 'Connection: close\r\nX-Auth-Token: t\r\n' + 'Content-Length: 0\r\n\r\n') + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + assert headers[:len(exp)] == exp, "Expected '%s', encountered '%s'" % ( + exp, headers[:len(exp)]) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write( + 'PUT /v1/a/c1 HTTP/1.1\r\nHost: localhost\r\n' + 'Connection: close\r\nX-Auth-Token: t\r\nX-Storage-Policy: one\r\n' + 'Content-Length: 0\r\n\r\n') + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + assert headers[:len(exp)] == exp, \ + "Expected '%s', encountered '%s'" % (exp, headers[:len(exp)]) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write( + 'PUT /v1/a/c2 HTTP/1.1\r\nHost: localhost\r\n' + 'Connection: close\r\nX-Auth-Token: t\r\nX-Storage-Policy: two\r\n' + 'Content-Length: 0\r\n\r\n') + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + assert headers[:len(exp)] == exp, \ + "Expected '%s', encountered '%s'" % (exp, headers[:len(exp)]) + return context + + +def teardown_servers(context): + for server in context["test_coros"]: + server.kill() + rmtree(os.path.dirname(context["testdir"])) + utils.SysLogHandler = context["orig_SysLogHandler"] + storage_policy._POLICIES = context["orig_POLICIES"] diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 7aac742c19..6ae48bc605 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -20,12 +20,10 @@ import logging import json import math import os -import pickle import sys import traceback import unittest -from contextlib import closing, contextmanager -from gzip import GzipFile +from contextlib import contextmanager from shutil import rmtree import gc import time @@ -55,13 +53,11 @@ from swift.common.utils import hash_path, storage_directory, \ iter_multipart_mime_documents, public from test.unit import ( - connect_tcp, readuntil2crlfs, FakeLogger, fake_http_connect, FakeRing, + connect_tcp, readuntil2crlfs, FakeLogger, FakeRing, fake_http_connect, FakeMemcache, debug_logger, patch_policies, write_fake_ring, mocked_http_conn, DEFAULT_TEST_EC_TYPE) from swift.proxy import server as proxy_server from swift.proxy.controllers.obj import ReplicatedObjectController -from swift.account import server as account_server -from swift.container import server as container_server from swift.obj import server as object_server from swift.common.middleware import proxy_logging, versioned_writes, \ copy @@ -69,8 +65,7 @@ from swift.common.middleware.acl import parse_acl, format_acl from swift.common.exceptions import ChunkReadTimeout, DiskFileNotExist, \ APIVersionError, ChunkWriteTimeout from swift.common import utils, constraints -from swift.common.ring import RingData -from swift.common.utils import mkdirs, normalize_timestamp, NullLogger +from swift.common.utils import mkdirs, NullLogger from swift.common.wsgi import monkey_patch_mimetools, loadapp from swift.proxy.controllers import base as proxy_base from swift.proxy.controllers.base import get_cache_key, cors_validation, \ @@ -80,212 +75,31 @@ import swift.proxy.controllers.obj from swift.common.header_key_dict import HeaderKeyDict from swift.common.swob import Request, Response, HTTPUnauthorized, \ HTTPException, HTTPBadRequest -from swift.common import storage_policy -from swift.common.storage_policy import StoragePolicy, ECStoragePolicy, \ - StoragePolicyCollection, POLICIES +from swift.common.storage_policy import StoragePolicy, POLICIES import swift.common.request_helpers from swift.common.request_helpers import get_sys_meta_prefix +from test.unit.helpers import setup_servers, teardown_servers + # mocks logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) STATIC_TIME = time.time() -_test_coros = _test_servers = _test_sockets = _orig_container_listing_limit = \ - _testdir = _orig_SysLogHandler = _orig_POLICIES = _test_POLICIES = None +_test_context = _test_servers = _test_sockets = _testdir = \ + _test_POLICIES = None -def do_setup(the_object_server): - utils.HASH_PATH_SUFFIX = 'endcap' - global _testdir, _test_servers, _test_sockets, \ - _orig_container_listing_limit, _test_coros, _orig_SysLogHandler, \ - _orig_POLICIES, _test_POLICIES - _orig_POLICIES = storage_policy._POLICIES - _orig_SysLogHandler = utils.SysLogHandler - utils.SysLogHandler = mock.MagicMock() +def do_setup(object_server): + # setup test context and break out some globals for convenience + global _test_context, _testdir, _test_servers, _test_sockets, \ + _test_POLICIES monkey_patch_mimetools() - # Since we're starting up a lot here, we're going to test more than - # just chunked puts; we're also going to test parts of - # proxy_server.Application we couldn't get to easily otherwise. - _testdir = \ - os.path.join(mkdtemp(), 'tmp_test_proxy_server_chunked') - mkdirs(_testdir) - rmtree(_testdir) - for drive in ('sda1', 'sdb1', 'sdc1', 'sdd1', 'sde1', - 'sdf1', 'sdg1', 'sdh1', 'sdi1'): - mkdirs(os.path.join(_testdir, drive, 'tmp')) - conf = {'devices': _testdir, 'swift_dir': _testdir, - 'mount_check': 'false', 'allowed_headers': - 'content-encoding, x-object-manifest, content-disposition, foo', - 'allow_versions': 't'} - prolis = listen(('localhost', 0)) - acc1lis = listen(('localhost', 0)) - acc2lis = listen(('localhost', 0)) - con1lis = listen(('localhost', 0)) - con2lis = listen(('localhost', 0)) - obj1lis = listen(('localhost', 0)) - obj2lis = listen(('localhost', 0)) - obj3lis = listen(('localhost', 0)) - objsocks = [obj1lis, obj2lis, obj3lis] - _test_sockets = \ - (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, obj2lis, obj3lis) - account_ring_path = os.path.join(_testdir, 'account.ring.gz') - account_devs = [ - {'port': acc1lis.getsockname()[1]}, - {'port': acc2lis.getsockname()[1]}, - ] - write_fake_ring(account_ring_path, *account_devs) - container_ring_path = os.path.join(_testdir, 'container.ring.gz') - container_devs = [ - {'port': con1lis.getsockname()[1]}, - {'port': con2lis.getsockname()[1]}, - ] - write_fake_ring(container_ring_path, *container_devs) - storage_policy._POLICIES = StoragePolicyCollection([ - StoragePolicy(0, 'zero', True), - StoragePolicy(1, 'one', False), - StoragePolicy(2, 'two', False), - ECStoragePolicy(3, 'ec', ec_type=DEFAULT_TEST_EC_TYPE, - ec_ndata=2, ec_nparity=1, ec_segment_size=4096)]) - obj_rings = { - 0: ('sda1', 'sdb1'), - 1: ('sdc1', 'sdd1'), - 2: ('sde1', 'sdf1'), - # sdg1, sdh1, sdi1 taken by policy 3 (see below) - } - for policy_index, devices in obj_rings.items(): - policy = POLICIES[policy_index] - obj_ring_path = os.path.join(_testdir, policy.ring_name + '.ring.gz') - obj_devs = [ - {'port': objsock.getsockname()[1], 'device': dev} - for objsock, dev in zip(objsocks, devices)] - write_fake_ring(obj_ring_path, *obj_devs) - - # write_fake_ring can't handle a 3-element ring, and the EC policy needs - # at least 3 devs to work with, so we do it manually - devs = [{'id': 0, 'zone': 0, 'device': 'sdg1', 'ip': '127.0.0.1', - 'port': obj1lis.getsockname()[1]}, - {'id': 1, 'zone': 0, 'device': 'sdh1', 'ip': '127.0.0.1', - 'port': obj2lis.getsockname()[1]}, - {'id': 2, 'zone': 0, 'device': 'sdi1', 'ip': '127.0.0.1', - 'port': obj3lis.getsockname()[1]}] - pol3_replica2part2dev_id = [[0, 1, 2, 0], - [1, 2, 0, 1], - [2, 0, 1, 2]] - obj3_ring_path = os.path.join(_testdir, POLICIES[3].ring_name + '.ring.gz') - part_shift = 30 - with closing(GzipFile(obj3_ring_path, 'wb')) as fh: - pickle.dump(RingData(pol3_replica2part2dev_id, devs, part_shift), fh) - - prosrv = proxy_server.Application(conf, FakeMemcacheReturnsNone(), - logger=debug_logger('proxy')) - for policy in POLICIES: - # make sure all the rings are loaded - prosrv.get_object_ring(policy.idx) - # don't lose this one! - _test_POLICIES = storage_policy._POLICIES - acc1srv = account_server.AccountController( - conf, logger=debug_logger('acct1')) - acc2srv = account_server.AccountController( - conf, logger=debug_logger('acct2')) - con1srv = container_server.ContainerController( - conf, logger=debug_logger('cont1')) - con2srv = container_server.ContainerController( - conf, logger=debug_logger('cont2')) - obj1srv = the_object_server.ObjectController( - conf, logger=debug_logger('obj1')) - obj2srv = the_object_server.ObjectController( - conf, logger=debug_logger('obj2')) - obj3srv = the_object_server.ObjectController( - conf, logger=debug_logger('obj3')) - _test_servers = \ - (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, obj2srv, obj3srv) - nl = NullLogger() - logging_prosv = proxy_logging.ProxyLoggingMiddleware(prosrv, conf, - logger=prosrv.logger) - prospa = spawn(wsgi.server, prolis, logging_prosv, nl) - acc1spa = spawn(wsgi.server, acc1lis, acc1srv, nl) - acc2spa = spawn(wsgi.server, acc2lis, acc2srv, nl) - con1spa = spawn(wsgi.server, con1lis, con1srv, nl) - con2spa = spawn(wsgi.server, con2lis, con2srv, nl) - obj1spa = spawn(wsgi.server, obj1lis, obj1srv, nl) - obj2spa = spawn(wsgi.server, obj2lis, obj2srv, nl) - obj3spa = spawn(wsgi.server, obj3lis, obj3srv, nl) - _test_coros = \ - (prospa, acc1spa, acc2spa, con1spa, con2spa, obj1spa, obj2spa, obj3spa) - # Create account - ts = normalize_timestamp(time.time()) - partition, nodes = prosrv.account_ring.get_nodes('a') - for node in nodes: - conn = swift.proxy.controllers.obj.http_connect(node['ip'], - node['port'], - node['device'], - partition, 'PUT', '/a', - {'X-Timestamp': ts, - 'x-trans-id': 'test'}) - resp = conn.getresponse() - assert(resp.status == 201) - # Create another account - # used for account-to-account tests - ts = normalize_timestamp(time.time()) - partition, nodes = prosrv.account_ring.get_nodes('a1') - for node in nodes: - conn = swift.proxy.controllers.obj.http_connect(node['ip'], - node['port'], - node['device'], - partition, 'PUT', - '/a1', - {'X-Timestamp': ts, - 'x-trans-id': 'test'}) - resp = conn.getresponse() - assert(resp.status == 201) - # Create containers, 1 per test policy - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/c HTTP/1.1\r\nHost: localhost\r\n' - 'Connection: close\r\nX-Auth-Token: t\r\n' - 'Content-Length: 0\r\n\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - assert headers[:len(exp)] == exp, "Expected '%s', encountered '%s'" % ( - exp, headers[:len(exp)]) - # Create container in other account - # used for account-to-account tests - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a1/c1 HTTP/1.1\r\nHost: localhost\r\n' - 'Connection: close\r\nX-Auth-Token: t\r\n' - 'Content-Length: 0\r\n\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - assert headers[:len(exp)] == exp, "Expected '%s', encountered '%s'" % ( - exp, headers[:len(exp)]) - - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write( - 'PUT /v1/a/c1 HTTP/1.1\r\nHost: localhost\r\n' - 'Connection: close\r\nX-Auth-Token: t\r\nX-Storage-Policy: one\r\n' - 'Content-Length: 0\r\n\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - assert headers[:len(exp)] == exp, \ - "Expected '%s', encountered '%s'" % (exp, headers[:len(exp)]) - - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write( - 'PUT /v1/a/c2 HTTP/1.1\r\nHost: localhost\r\n' - 'Connection: close\r\nX-Auth-Token: t\r\nX-Storage-Policy: two\r\n' - 'Content-Length: 0\r\n\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - assert headers[:len(exp)] == exp, \ - "Expected '%s', encountered '%s'" % (exp, headers[:len(exp)]) + _test_context = setup_servers(object_server) + _testdir = _test_context["testdir"] + _test_servers = _test_context["test_servers"] + _test_sockets = _test_context["test_sockets"] + _test_POLICIES = _test_context["test_POLICIES"] def unpatch_policies(f): @@ -308,11 +122,7 @@ def setup(): def teardown(): - for server in _test_coros: - server.kill() - rmtree(os.path.dirname(_testdir)) - utils.SysLogHandler = _orig_SysLogHandler - storage_policy._POLICIES = _orig_POLICIES + teardown_servers(_test_context) def sortHeaderNames(headerNames): From 03b762e80a9b3d33ce13b8222f4cd2b549171c51 Mon Sep 17 00:00:00 2001 From: Janie Richling Date: Mon, 6 Jun 2016 17:19:48 +0100 Subject: [PATCH 3/7] Support for http footers - Replication and EC Before this patch, the proxy ObjectController supported sending metadata from the proxy server to object servers in "footers" that trail the body of HTTP PUT requests, but this support was for EC policies only. The encryption feature requires that footers are sent with both EC and replicated policy requests in order to persist encryption specific sysmeta, and to override container update headers with an encrypted Etag value. This patch: - Moves most of the functionality of ECPutter into a generic Putter class that is used for replicated object PUTs without footers. - Creates a MIMEPutter subclass to support multipart and multiphase behaviour required for any replicated object PUT with footers and all EC PUTs. - Modifies ReplicatedObjectController to use Putter objects in place of raw connection objects. - Refactors the _get_put_connections method and _put_connect_node methods so that more code is in the BaseObjectController class and therefore shared by [EC|Replicated]ObjectController classes. - Adds support to call a callback that middleware may have placed in the environ, so the callback can set footers. The x-object-sysmeta-ec- namespace is reserved and any footer values set by middleware in that namespace will not be forwarded to object servers. In addition this patch enables more than one value to be added to the X-Backend-Etag-Is-At header. This header is used to point to an (optional) alternative sysmeta header whose value should be used when evaluating conditional requests with If-[None-]Match headers. This is already used with EC policies when the ECObjectController has calculated the actual body Etag and sent it using a footer (X-Object-Sysmeta-EC-Etag). X-Backend-Etag-Is-At is in that case set to X-Object-Sysmeta-Ec-Etag so as to point to the actual body Etag value rather than the EC fragment Etag. Encryption will also need to add a pointer to an encrypted Etag value. However, the referenced sysmeta may not exist, for example if the object was created before encryption was enabled. The X-Backend-Etag-Is-At value is therefore changed to support a list of possible locations for alternate Etag values. Encryption will place its expected alternative Etag location on this list, as will the ECObjectController, and the object server will look for the first object metadata to match an entry on the list when matching conditional requests. That way, if the object was not encrypted then the object server will fall through to using the EC Etag value, or in the case of a replicated policy will fall through to using the normal Etag metadata. If your proxy has a third-party middleware that uses X-Backend-Etag-Is-At and it upgrades before an object server it's talking to then conditional requests may be broken. UpgradeImpact Co-Authored-By: Alistair Coles Co-Authored-By: Thiago da Silva Co-Authored-By: Samuel Merritt Co-Authored-By: Kota Tsuyuzaki Closes-Bug: #1594739 Change-Id: I12a6e41150f90de746ce03623032b83ed1987ee1 --- swift/common/request_helpers.py | 66 +- swift/common/swob.py | 4 +- swift/obj/server.py | 12 +- swift/proxy/controllers/obj.py | 945 ++++++++++++----------- test/unit/__init__.py | 15 +- test/unit/common/middleware/helpers.py | 25 +- test/unit/common/test_request_helpers.py | 74 +- test/unit/obj/test_server.py | 34 + test/unit/proxy/controllers/test_obj.py | 480 +++++++++++- test/unit/proxy/test_server.py | 4 +- 10 files changed, 1165 insertions(+), 494 deletions(-) diff --git a/swift/common/request_helpers.py b/swift/common/request_helpers.py index 07e34d8b46..71a32106af 100644 --- a/swift/common/request_helpers.py +++ b/swift/common/request_helpers.py @@ -27,6 +27,7 @@ import time import six from six.moves.urllib.parse import unquote +from swift.common.header_key_dict import HeaderKeyDict from swift import gettext_ as _ from swift.common.storage_policy import POLICIES @@ -38,7 +39,7 @@ from swift.common.swob import HTTPBadRequest, HTTPNotAcceptable, \ from swift.common.utils import split_path, validate_device_partition, \ close_if_possible, maybe_multipart_byteranges_to_document_iters, \ multipart_byteranges_to_document_iters, parse_content_type, \ - parse_content_range + parse_content_range, csv_append, list_from_csv from swift.common.wsgi import make_subrequest @@ -544,3 +545,66 @@ def http_response_to_document_iters(response, read_chunk_size=4096): params = dict(params_list) return multipart_byteranges_to_document_iters( response, params['boundary'], read_chunk_size) + + +def update_etag_is_at_header(req, name): + """ + Helper function to update an X-Backend-Etag-Is-At header whose value is a + list of alternative header names at which the actual object etag may be + found. This informs the object server where to look for the actual object + etag when processing conditional requests. + + Since the proxy server and/or middleware may set alternative etag header + names, the value of X-Backend-Etag-Is-At is a comma separated list which + the object server inspects in order until it finds an etag value. + + :param req: a swob Request + :param name: name of a sysmeta where alternative etag may be found + """ + if ',' in name: + # HTTP header names should not have commas but we'll check anyway + raise ValueError('Header name must not contain commas') + existing = req.headers.get("X-Backend-Etag-Is-At") + req.headers["X-Backend-Etag-Is-At"] = csv_append( + existing, name) + + +def resolve_etag_is_at_header(req, metadata): + """ + Helper function to resolve an alternative etag value that may be stored in + metadata under an alternate name. + + The value of the request's X-Backend-Etag-Is-At header (if it exists) is a + comma separated list of alternate names in the metadata at which an + alternate etag value may be found. This list is processed in order until an + alternate etag is found. + + The left most value in X-Backend-Etag-Is-At will have been set by the left + most middleware, or if no middleware, by ECObjectController, if an EC + policy is in use. The left most middleware is assumed to be the authority + on what the etag value of the object content is. + + The resolver will work from left to right in the list until it finds a + value that is a name in the given metadata. So the left most wins, IF it + exists in the metadata. + + By way of example, assume the encrypter middleware is installed. If an + object is *not* encrypted then the resolver will not find the encrypter + middleware's alternate etag sysmeta (X-Object-Sysmeta-Crypto-Etag) but will + then find the EC alternate etag (if EC policy). But if the object *is* + encrypted then X-Object-Sysmeta-Crypto-Etag is found and used, which is + correct because it should be preferred over X-Object-Sysmeta-Crypto-Etag. + + :param req: a swob Request + :param metadata: a dict containing object metadata + :return: an alternate etag value if any is found, otherwise None + """ + alternate_etag = None + metadata = HeaderKeyDict(metadata) + if "X-Backend-Etag-Is-At" in req.headers: + names = list_from_csv(req.headers["X-Backend-Etag-Is-At"]) + for name in names: + if name in metadata: + alternate_etag = metadata[name] + break + return alternate_etag diff --git a/swift/common/swob.py b/swift/common/swob.py index 2ba5d5e6a4..aa11ec01f2 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -1140,8 +1140,8 @@ class Response(object): conditional requests. It's most effectively used with X-Backend-Etag-Is-At which would - define the additional Metadata key where the original ETag of the - clear-form client request data. + define the additional Metadata key(s) where the original ETag of the + clear-form client request data may be found. """ if self._conditional_etag is not None: return self._conditional_etag diff --git a/swift/obj/server.py b/swift/obj/server.py index c3fde72525..99083800eb 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -46,7 +46,7 @@ from swift.common.http import is_success from swift.common.base_storage_server import BaseStorageServer from swift.common.header_key_dict import HeaderKeyDict from swift.common.request_helpers import get_name_and_placement, \ - is_user_meta, is_sys_or_user_meta + is_user_meta, is_sys_or_user_meta, resolve_etag_is_at_header from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPCreated, \ HTTPInternalServerError, HTTPNoContent, HTTPNotFound, \ HTTPPreconditionFailed, HTTPRequestTimeout, HTTPUnprocessableEntity, \ @@ -832,10 +832,7 @@ class ObjectController(BaseStorageServer): keep_cache = (self.keep_cache_private or ('X-Auth-Token' not in request.headers and 'X-Storage-Token' not in request.headers)) - conditional_etag = None - if 'X-Backend-Etag-Is-At' in request.headers: - conditional_etag = metadata.get( - request.headers['X-Backend-Etag-Is-At']) + conditional_etag = resolve_etag_is_at_header(request, metadata) response = Response( app_iter=disk_file.reader(keep_cache=keep_cache), request=request, conditional_response=True, @@ -889,10 +886,7 @@ class ObjectController(BaseStorageServer): headers['X-Backend-Timestamp'] = e.timestamp.internal return HTTPNotFound(request=request, headers=headers, conditional_response=True) - conditional_etag = None - if 'X-Backend-Etag-Is-At' in request.headers: - conditional_etag = metadata.get( - request.headers['X-Backend-Etag-Is-At']) + conditional_etag = resolve_etag_is_at_header(request, metadata) response = Response(request=request, conditional_response=True, conditional_etag=conditional_etag) response.headers['Content-Type'] = metadata.get( diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index 6f8559063a..af6b9368d7 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -71,6 +71,8 @@ from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPNotFound, \ HTTPServerError, HTTPServiceUnavailable, HTTPClientDisconnect, \ HTTPUnprocessableEntity, Response, HTTPException, \ HTTPRequestedRangeNotSatisfiable, Range, HTTPInternalServerError +from swift.common.request_helpers import update_etag_is_at_header, \ + resolve_etag_is_at_header def check_content_type(req): @@ -289,71 +291,111 @@ class BaseObjectController(Controller): return headers - def _await_response(self, conn, **kwargs): - with Timeout(self.app.node_timeout): - if conn.resp: - return conn.resp - else: - return conn.getresponse() - - def _get_conn_response(self, conn, req, logger_thread_locals, **kwargs): + def _get_conn_response(self, putter, path, logger_thread_locals, + final_phase, **kwargs): self.app.logger.thread_locals = logger_thread_locals try: - resp = self._await_response(conn, **kwargs) - return (conn, resp) + resp = putter.await_response( + self.app.node_timeout, not final_phase) except (Exception, Timeout): + resp = None + if final_phase: + status_type = 'final' + else: + status_type = 'commit' self.app.exception_occurred( - conn.node, _('Object'), - _('Trying to get final status of PUT to %s') % req.path) - return (None, None) + putter.node, _('Object'), + _('Trying to get %(status_type)s status of PUT to %(path)s') % + {'status_type': status_type, 'path': path}) + return (putter, resp) - def _get_put_responses(self, req, conns, nodes, **kwargs): + def _have_adequate_put_responses(self, statuses, num_nodes, min_responses): """ - Collect replicated object responses. + Test for sufficient PUT responses from backend nodes to proceed with + PUT handling. + + :param statuses: a list of response statuses. + :param num_nodes: number of backend nodes to which PUT requests may be + issued. + :param min_responses: (optional) minimum number of nodes required to + have responded with satisfactory status code. + :return: True if sufficient backend responses have returned a + satisfactory status code. + """ + raise NotImplementedError + + def _get_put_responses(self, req, putters, num_nodes, final_phase=True, + min_responses=None): + """ + Collect object responses to a PUT request and determine if a + satisfactory number of nodes have returned success. Returns + lists of accumulated status codes, reasons, bodies and etags. + + :param req: the request + :param putters: list of putters for the request + :param num_nodes: number of nodes involved + :param final_phase: boolean indicating if this is the last phase + :param min_responses: minimum needed when not requiring quorum + :return: a tuple of lists of status codes, reasons, bodies and etags. + The list of bodies and etags is only populated for the final + phase of a PUT transaction. """ statuses = [] reasons = [] bodies = [] etags = set() - pile = GreenAsyncPile(len(conns)) - for conn in conns: - pile.spawn(self._get_conn_response, conn, - req, self.app.logger.thread_locals) + pile = GreenAsyncPile(len(putters)) + for putter in putters: + if putter.failed: + continue + pile.spawn(self._get_conn_response, putter, req, + self.app.logger.thread_locals, final_phase=final_phase) - def _handle_response(conn, response): + def _handle_response(putter, response): statuses.append(response.status) reasons.append(response.reason) - bodies.append(response.read()) + if final_phase: + body = response.read() + else: + body = '' + bodies.append(body) if response.status == HTTP_INSUFFICIENT_STORAGE: - self.app.error_limit(conn.node, + putter.failed = True + self.app.error_limit(putter.node, _('ERROR Insufficient Storage')) elif response.status >= HTTP_INTERNAL_SERVER_ERROR: + putter.failed = True self.app.error_occurred( - conn.node, + putter.node, _('ERROR %(status)d %(body)s From Object Server ' 're: %(path)s') % {'status': response.status, - 'body': bodies[-1][:1024], 'path': req.path}) + 'body': body[:1024], 'path': req.path}) elif is_success(response.status): etags.add(response.getheader('etag').strip('"')) - for (conn, response) in pile: + for (putter, response) in pile: if response: - _handle_response(conn, response) - if self.have_quorum(statuses, len(nodes)): + _handle_response(putter, response) + if self._have_adequate_put_responses( + statuses, num_nodes, min_responses): break + else: + putter.failed = True # give any pending requests *some* chance to finish finished_quickly = pile.waitall(self.app.post_quorum_timeout) - for (conn, response) in finished_quickly: + for (putter, response) in finished_quickly: if response: - _handle_response(conn, response) + _handle_response(putter, response) + + if final_phase: + while len(statuses) < num_nodes: + statuses.append(HTTP_SERVICE_UNAVAILABLE) + reasons.append('') + bodies.append('') - while len(statuses) < len(nodes): - statuses.append(HTTP_SERVICE_UNAVAILABLE) - reasons.append('') - bodies.append('') return statuses, reasons, bodies, etags def _config_obj_expiration(self, req): @@ -406,12 +448,17 @@ class BaseObjectController(Controller): req.headers['X-Timestamp'] = Timestamp(time.time()).internal return None - def _check_failure_put_connections(self, conns, req, nodes, min_conns): + def _check_failure_put_connections(self, putters, req, min_conns): """ Identify any failed connections and check minimum connection count. + + :param putters: a list of Putter instances + :param req: request + :param min_conns: minimum number of putter connections required """ if req.if_none_match is not None and '*' in req.if_none_match: - statuses = [conn.resp.status for conn in conns if conn.resp] + statuses = [ + putter.resp.status for putter in putters if putter.resp] if HTTP_PRECONDITION_FAILED in statuses: # If we find any copy of the file, it shouldn't be uploaded self.app.logger.debug( @@ -419,14 +466,14 @@ class BaseObjectController(Controller): {'statuses': statuses}) raise HTTPPreconditionFailed(request=req) - if any(conn for conn in conns if conn.resp and - conn.resp.status == HTTP_CONFLICT): + if any(putter for putter in putters if putter.resp and + putter.resp.status == HTTP_CONFLICT): status_times = ['%(status)s (%(timestamp)s)' % { - 'status': conn.resp.status, + 'status': putter.resp.status, 'timestamp': HeaderKeyDict( - conn.resp.getheaders()).get( + putter.resp.getheaders()).get( 'X-Backend-Timestamp', 'unknown') - } for conn in conns if conn.resp] + } for putter in putters if putter.resp] self.app.logger.debug( _('Object PUT returning 202 for 409: ' '%(req_timestamp)s <= %(timestamps)r'), @@ -434,32 +481,61 @@ class BaseObjectController(Controller): 'timestamps': ', '.join(status_times)}) raise HTTPAccepted(request=req) - self._check_min_conn(req, conns, min_conns) + self._check_min_conn(req, putters, min_conns) - def _connect_put_node(self, nodes, part, path, headers, + def _make_putter(self, node, part, req, headers): + """ + Returns a putter object for handling streaming of object to object + servers. + + Subclasses must implement this method. + + :param node: a storage node + :param part: ring partition number + :param req: a swob Request + :param headers: request headers + :return: an instance of a Putter + """ + raise NotImplementedError + + def _connect_put_node(self, nodes, part, req, headers, logger_thread_locals): """ Make connection to storage nodes - Connects to the first working node that it finds in nodes iter - and sends over the request headers. Returns an HTTPConnection - object to handle the rest of the streaming. - - This method must be implemented by each policy ObjectController. + Connects to the first working node that it finds in nodes iter and + sends over the request headers. Returns a Putter to handle the rest of + the streaming, or None if no working nodes were found. :param nodes: an iterator of the target storage nodes - :param partition: ring partition number - :param path: the object path to send to the storage node + :param part: ring partition number + :param req: a swob Request :param headers: request headers :param logger_thread_locals: The thread local values to be set on the self.app.logger to retain transaction logging information. - :return: HTTPConnection object + :return: an instance of a Putter """ - raise NotImplementedError() + self.app.logger.thread_locals = logger_thread_locals + for node in nodes: + try: + putter = self._make_putter(node, part, req, headers) + self.app.set_node_timing(node, putter.connect_duration) + return putter + except InsufficientStorage: + self.app.error_limit(node, _('ERROR Insufficient Storage')) + except PutterConnectError as e: + self.app.error_occurred( + node, _('ERROR %(status)d Expect: 100-continue ' + 'From Object Server') % { + 'status': e.status}) + except (Exception, Timeout): + self.app.exception_occurred( + node, _('Object'), + _('Expect: 100-continue on %s') % req.swift_entity_path) def _get_put_connections(self, req, nodes, partition, outgoing_headers, - policy, expect): + policy): """ Establish connections to storage nodes for PUT request """ @@ -469,25 +545,32 @@ class BaseObjectController(Controller): pile = GreenPile(len(nodes)) for nheaders in outgoing_headers: - if expect: + # RFC2616:8.2.3 disallows 100-continue without a body + if (req.content_length > 0) or req.is_chunked: nheaders['Expect'] = '100-continue' pile.spawn(self._connect_put_node, node_iter, partition, - req.swift_entity_path, nheaders, - self.app.logger.thread_locals) + req, nheaders, self.app.logger.thread_locals) - conns = [conn for conn in pile if conn] + putters = [putter for putter in pile if putter] - return conns + return putters - def _check_min_conn(self, req, conns, min_conns, msg=None): - msg = msg or 'Object PUT returning 503, %(conns)s/%(nodes)s ' \ - 'required connections' + def _check_min_conn(self, req, putters, min_conns, msg=None): + msg = msg or _('Object PUT returning 503, %(conns)s/%(nodes)s ' + 'required connections') - if len(conns) < min_conns: + if len(putters) < min_conns: self.app.logger.error((msg), - {'conns': len(conns), 'nodes': min_conns}) + {'conns': len(putters), 'nodes': min_conns}) raise HTTPServiceUnavailable(request=req) + def _get_footers(self, req): + footers = HeaderKeyDict() + footer_callback = req.environ.get( + 'swift.callback.update_footers', lambda _footer: None) + footer_callback(footers) + return footers + def _store_object(self, req, data_source, nodes, partition, outgoing_headers): """ @@ -659,115 +742,81 @@ class ReplicatedObjectController(BaseObjectController): req.swift_entity_path, concurrency) return resp - def _connect_put_node(self, nodes, part, path, headers, - logger_thread_locals): - """ - Make a connection for a replicated object. + def _make_putter(self, node, part, req, headers): + if req.environ.get('swift.callback.update_footers'): + putter = MIMEPutter.connect( + node, part, req.swift_entity_path, headers, + conn_timeout=self.app.conn_timeout, + node_timeout=self.app.node_timeout, + logger=self.app.logger, + need_multiphase=False) + else: + putter = Putter.connect( + node, part, req.swift_entity_path, headers, + conn_timeout=self.app.conn_timeout, + node_timeout=self.app.node_timeout, + logger=self.app.logger, + chunked=req.is_chunked) + return putter - Connects to the first working node that it finds in node_iter - and sends over the request headers. Returns an HTTPConnection - object to handle the rest of the streaming. - """ - self.app.logger.thread_locals = logger_thread_locals - for node in nodes: - try: - start_time = time.time() - with ConnectionTimeout(self.app.conn_timeout): - conn = http_connect( - node['ip'], node['port'], node['device'], part, 'PUT', - path, headers) - self.app.set_node_timing(node, time.time() - start_time) - with Timeout(self.app.node_timeout): - resp = conn.getexpect() - if resp.status == HTTP_CONTINUE: - conn.resp = None - conn.node = node - return conn - elif (is_success(resp.status) - or resp.status in (HTTP_CONFLICT, - HTTP_UNPROCESSABLE_ENTITY)): - conn.resp = resp - conn.node = node - return conn - elif headers['If-None-Match'] is not None and \ - resp.status == HTTP_PRECONDITION_FAILED: - conn.resp = resp - conn.node = node - return conn - elif resp.status == HTTP_INSUFFICIENT_STORAGE: - self.app.error_limit(node, _('ERROR Insufficient Storage')) - elif is_server_error(resp.status): - self.app.error_occurred( - node, - _('ERROR %(status)d Expect: 100-continue ' - 'From Object Server') % { - 'status': resp.status}) - except (Exception, Timeout): - self.app.exception_occurred( - node, _('Object'), - _('Expect: 100-continue on %s') % path) - - def _send_file(self, conn, path): - """Method for a file PUT coro""" - while True: - chunk = conn.queue.get() - if not conn.failed: - try: - with ChunkWriteTimeout(self.app.node_timeout): - conn.send(chunk) - except (Exception, ChunkWriteTimeout): - conn.failed = True - self.app.exception_occurred( - conn.node, _('Object'), - _('Trying to write to %s') % path) - conn.queue.task_done() - - def _transfer_data(self, req, data_source, conns, nodes): + def _transfer_data(self, req, data_source, putters, nodes): """ Transfer data for a replicated object. This method was added in the PUT method extraction change """ - min_conns = quorum_size(len(nodes)) bytes_transferred = 0 + + def send_chunk(chunk): + for putter in list(putters): + if not putter.failed: + putter.send_chunk(chunk) + else: + putter.close() + putters.remove(putter) + self._check_min_conn( + req, putters, min_conns, + msg=_('Object PUT exceptions during send, ' + '%(conns)s/%(nodes)s required connections')) + + min_conns = quorum_size(len(nodes)) try: with ContextPool(len(nodes)) as pool: - for conn in conns: - conn.failed = False - conn.queue = Queue(self.app.put_queue_depth) - pool.spawn(self._send_file, conn, req.path) + for putter in putters: + putter.spawn_sender_greenthread( + pool, self.app.put_queue_depth, self.app.node_timeout, + self.app.exception_occurred) while True: with ChunkReadTimeout(self.app.client_timeout): try: chunk = next(data_source) except StopIteration: - if req.is_chunked: - for conn in conns: - conn.queue.put('0\r\n\r\n') break bytes_transferred += len(chunk) if bytes_transferred > constraints.MAX_FILE_SIZE: raise HTTPRequestEntityTooLarge(request=req) - for conn in list(conns): - if not conn.failed: - conn.queue.put( - '%x\r\n%s\r\n' % (len(chunk), chunk) - if req.is_chunked else chunk) - else: - conn.close() - conns.remove(conn) - self._check_min_conn( - req, conns, min_conns, - msg='Object PUT exceptions during' - ' send, %(conns)s/%(nodes)s required connections') - for conn in conns: - if conn.queue.unfinished_tasks: - conn.queue.join() - conns = [conn for conn in conns if not conn.failed] - self._check_min_conn( - req, conns, min_conns, - msg='Object PUT exceptions after last send, ' - '%(conns)s/%(nodes)s required connections') + + send_chunk(chunk) + + if req.content_length and ( + bytes_transferred < req.content_length): + req.client_disconnect = True + self.app.logger.warning( + _('Client disconnected without sending enough data')) + self.app.logger.increment('client_disconnects') + raise HTTPClientDisconnect(request=req) + + trail_md = self._get_footers(req) + for putter in putters: + # send any footers set by middleware + putter.end_of_object_data(footer_metadata=trail_md) + + for putter in putters: + putter.wait() + self._check_min_conn( + req, [p for p in putters if not p.failed], min_conns, + msg=_('Object PUT exceptions after last send, ' + '%(conns)s/%(nodes)s required connections')) except ChunkReadTimeout as err: self.app.logger.warning( _('ERROR Client read timeout (%ss)'), err.seconds) @@ -790,12 +839,9 @@ class ReplicatedObjectController(BaseObjectController): _('ERROR Exception transferring data to object servers %s'), {'path': req.path}) raise HTTPInternalServerError(request=req) - if req.content_length and bytes_transferred < req.content_length: - req.client_disconnect = True - self.app.logger.warning( - _('Client disconnected without sending enough data')) - self.app.logger.increment('client_disconnects') - raise HTTPClientDisconnect(request=req) + + def _have_adequate_put_responses(self, statuses, num_nodes, min_responses): + return self.have_quorum(statuses, num_nodes) def _store_object(self, req, data_source, nodes, partition, outgoing_headers): @@ -812,30 +858,25 @@ class ReplicatedObjectController(BaseObjectController): if not nodes: return HTTPNotFound() - # RFC2616:8.2.3 disallows 100-continue without a body - if (req.content_length > 0) or req.is_chunked: - expect = True - else: - expect = False - conns = self._get_put_connections(req, nodes, partition, - outgoing_headers, policy, expect) + putters = self._get_put_connections( + req, nodes, partition, outgoing_headers, policy) min_conns = quorum_size(len(nodes)) try: # check that a minimum number of connections were established and # meet all the correct conditions set in the request - self._check_failure_put_connections(conns, req, nodes, min_conns) + self._check_failure_put_connections(putters, req, min_conns) # transfer data - self._transfer_data(req, data_source, conns, nodes) + self._transfer_data(req, data_source, putters, nodes) # get responses - statuses, reasons, bodies, etags = self._get_put_responses( - req, conns, nodes) + statuses, reasons, bodies, etags = \ + self._get_put_responses(req, putters, len(nodes)) except HTTPException as resp: return resp finally: - for conn in conns: - conn.close() + for putter in putters: + putter.close() if len(etags) > 1: self.app.logger.error( @@ -1380,33 +1421,38 @@ DATA_ACKED = 4 COMMIT_SENT = 5 -class ECPutter(object): +class Putter(object): """ - This is here mostly to wrap up the fact that all EC PUTs are - chunked because of the mime boundary footer trick and the first - half of the two-phase PUT conversation handling. + Putter for backend PUT requests. - An HTTP PUT request that supports streaming. + Encapsulates all the actions required to establish a connection with a + storage node and stream data to that node. - Probably deserves more docs than this, but meh. + :param conn: an HTTPConnection instance + :param node: dict describing storage node + :param resp: an HTTPResponse instance if connect() received final response + :param path: the object path to send to the storage node + :param connect_duration: time taken to initiate the HTTPConnection + :param logger: a Logger instance + :param chunked: boolean indicating if the request encoding is chunked """ - def __init__(self, conn, node, resp, path, connect_duration, - mime_boundary): + def __init__(self, conn, node, resp, path, connect_duration, logger, + chunked=False): # Note: you probably want to call Putter.connect() instead of # instantiating one of these directly. self.conn = conn self.node = node - self.resp = resp + self.resp = self.final_resp = resp self.path = path self.connect_duration = connect_duration # for handoff nodes node_index is None self.node_index = node.get('index') - self.mime_boundary = mime_boundary - self.chunk_hasher = md5() self.failed = False self.queue = None self.state = NO_DATA_SENT + self.chunked = chunked + self.logger = logger def await_response(self, timeout, informational=False): """ @@ -1419,16 +1465,20 @@ class ECPutter(object): a 100 Continue response and sent up the PUT request's body, then we'll actually read the 2xx-5xx response off the network here. + :param timeout: time to wait for a response + :param informational: if True then try to get a 100-continue response, + otherwise try to get a final response. :returns: HTTPResponse :raises: Timeout if the response took too long """ - conn = self.conn with Timeout(timeout): - if not conn.resp: + # don't do this update of self.resp if the Expect response during + # conenct() was actually a final response + if not self.final_resp: if informational: - self.resp = conn.getexpect() + self.resp = self.conn.getexpect() else: - self.resp = conn.getresponse() + self.resp = self.conn.getresponse() return self.resp def spawn_sender_greenthread(self, pool, queue_depth, write_timeout, @@ -1441,9 +1491,10 @@ class ECPutter(object): if self.queue.unfinished_tasks: self.queue.join() - def _start_mime_doc_object_body(self): - self.queue.put("--%s\r\nX-Document: object body\r\n\r\n" % - (self.mime_boundary,)) + def _start_object_data(self): + # Called immediately before the first chunk of object data is sent. + # Subclasses may implement custom behaviour + pass def send_chunk(self, chunk): if not chunk: @@ -1455,30 +1506,148 @@ class ECPutter(object): elif self.state == DATA_SENT: raise ValueError("called send_chunk after end_of_object_data") - if self.state == NO_DATA_SENT and self.mime_boundary: - # We're sending the object plus other stuff in the same request - # body, all wrapped up in multipart MIME, so we'd better start - # off the MIME document before sending any object data. - self._start_mime_doc_object_body() + if self.state == NO_DATA_SENT: + self._start_object_data() self.state = SENDING_DATA self.queue.put(chunk) - def end_of_object_data(self, footer_metadata): + def end_of_object_data(self, **kwargs): + """ + Call when there is no more data to send. + """ + if self.state == DATA_SENT: + raise ValueError("called end_of_object_data twice") + + self.queue.put('') + self.state = DATA_SENT + + def _send_file(self, write_timeout, exception_handler): + """ + Method for a file PUT coroutine. Takes chunks from a queue and sends + them down a socket. + + If something goes wrong, the "failed" attribute will be set to true + and the exception handler will be called. + """ + while True: + chunk = self.queue.get() + if not self.failed: + if self.chunked: + to_send = "%x\r\n%s\r\n" % (len(chunk), chunk) + else: + to_send = chunk + try: + with ChunkWriteTimeout(write_timeout): + self.conn.send(to_send) + except (Exception, ChunkWriteTimeout): + self.failed = True + exception_handler(self.node, _('Object'), + _('Trying to write to %s') % self.path) + + self.queue.task_done() + + def close(self): + # release reference to response to ensure connection really does close, + # see bug https://bugs.launchpad.net/swift/+bug/1594739 + self.resp = self.final_resp = None + self.conn.close() + + @classmethod + def _make_connection(cls, node, part, path, headers, conn_timeout, + node_timeout): + start_time = time.time() + with ConnectionTimeout(conn_timeout): + conn = http_connect(node['ip'], node['port'], node['device'], + part, 'PUT', path, headers) + connect_duration = time.time() - start_time + + with ResponseTimeout(node_timeout): + resp = conn.getexpect() + + if resp.status == HTTP_INSUFFICIENT_STORAGE: + raise InsufficientStorage + + if is_server_error(resp.status): + raise PutterConnectError(resp.status) + + final_resp = None + if (is_success(resp.status) or + resp.status in (HTTP_CONFLICT, HTTP_UNPROCESSABLE_ENTITY) or + (headers.get('If-None-Match', None) is not None and + resp.status == HTTP_PRECONDITION_FAILED)): + final_resp = resp + + return conn, resp, final_resp, connect_duration + + @classmethod + def connect(cls, node, part, path, headers, conn_timeout, node_timeout, + logger=None, chunked=False, **kwargs): + """ + Connect to a backend node and send the headers. + + :returns: Putter instance + + :raises: ConnectionTimeout if initial connection timed out + :raises: ResponseTimeout if header retrieval timed out + :raises: InsufficientStorage on 507 response from node + :raises: PutterConnectError on non-507 server error response from node + """ + conn, expect_resp, final_resp, connect_duration = cls._make_connection( + node, part, path, headers, conn_timeout, node_timeout) + return cls(conn, node, final_resp, path, connect_duration, logger, + chunked=chunked) + + +class MIMEPutter(Putter): + """ + Putter for backend PUT requests that use MIME. + + This is here mostly to wrap up the fact that all multipart PUTs are + chunked because of the mime boundary footer trick and the first + half of the two-phase PUT conversation handling. + + An HTTP PUT request that supports streaming. + """ + def __init__(self, conn, node, resp, req, connect_duration, + logger, mime_boundary, multiphase=False): + super(MIMEPutter, self).__init__(conn, node, resp, req, + connect_duration, logger) + # Note: you probably want to call MimePutter.connect() instead of + # instantiating one of these directly. + self.chunked = True # MIME requests always send chunked body + self.mime_boundary = mime_boundary + self.multiphase = multiphase + + def _start_object_data(self): + # We're sending the object plus other stuff in the same request + # body, all wrapped up in multipart MIME, so we'd better start + # off the MIME document before sending any object data. + self.queue.put("--%s\r\nX-Document: object body\r\n\r\n" % + (self.mime_boundary,)) + + def end_of_object_data(self, footer_metadata=None): """ Call when there is no more data to send. + Overrides superclass implementation to send any footer metadata + after object data. + :param footer_metadata: dictionary of metadata items + to be sent as footers. """ if self.state == DATA_SENT: raise ValueError("called end_of_object_data twice") elif self.state == NO_DATA_SENT and self.mime_boundary: - self._start_mime_doc_object_body() + self._start_object_data() footer_body = json.dumps(footer_metadata) footer_md5 = md5(footer_body).hexdigest() tail_boundary = ("--%s" % (self.mime_boundary,)) + if not self.multiphase: + # this will be the last part sent + tail_boundary = tail_boundary + "--" message_parts = [ ("\r\n--%s\r\n" % self.mime_boundary), @@ -1498,6 +1667,9 @@ class ECPutter(object): Call when there are > quorum 2XX responses received. Send commit confirmations to all object nodes to finalize the PUT. """ + if not self.multiphase: + raise ValueError( + "called send_commit_confirmation but multiphase is False") if self.state == COMMIT_SENT: raise ValueError("called send_commit_confirmation twice") @@ -1517,79 +1689,49 @@ class ECPutter(object): self.queue.put('') self.state = COMMIT_SENT - def _send_file(self, write_timeout, exception_handler): - """ - Method for a file PUT coro. Takes chunks from a queue and sends them - down a socket. - - If something goes wrong, the "failed" attribute will be set to true - and the exception handler will be called. - """ - while True: - chunk = self.queue.get() - if not self.failed: - to_send = "%x\r\n%s\r\n" % (len(chunk), chunk) - try: - with ChunkWriteTimeout(write_timeout): - self.conn.send(to_send) - except (Exception, ChunkWriteTimeout): - self.failed = True - exception_handler(self.conn.node, _('Object'), - _('Trying to write to %s') % self.path) - self.queue.task_done() - @classmethod - def connect(cls, node, part, path, headers, conn_timeout, node_timeout, - chunked=False, expected_frag_archive_size=None): + def connect(cls, node, part, req, headers, conn_timeout, node_timeout, + logger=None, need_multiphase=True, **kwargs): """ Connect to a backend node and send the headers. - :returns: Putter instance + Override superclass method to notify object of need for support for + multipart body with footers and optionally multiphase commit, and + verify object server's capabilities. - :raises: ConnectionTimeout if initial connection timed out - :raises: ResponseTimeout if header retrieval timed out - :raises: InsufficientStorage on 507 response from node - :raises: PutterConnectError on non-507 server error response from node + :param need_multiphase: if True then multiphase support is required of + the object server :raises: FooterNotSupported if need_metadata_footer is set but backend node can't process footers - :raises: MultiphasePUTNotSupported if need_multiphase_support is - set but backend node can't handle multiphase PUT + :raises: MultiphasePUTNotSupported if need_multiphase is set but + backend node can't handle multiphase PUT """ mime_boundary = "%.64x" % random.randint(0, 16 ** 64) headers = HeaderKeyDict(headers) + # when using a multipart mime request to backend the actual + # content-length is not equal to the object content size, so move the + # object content size to X-Backend-Obj-Content-Length if that has not + # already been set by the EC PUT path. + headers.setdefault('X-Backend-Obj-Content-Length', + headers.pop('Content-Length', None)) # We're going to be adding some unknown amount of data to the # request, so we can't use an explicit content length, and thus # we must use chunked encoding. headers['Transfer-Encoding'] = 'chunked' headers['Expect'] = '100-continue' - # make sure this isn't there - headers.pop('Content-Length') - headers['X-Backend-Obj-Content-Length'] = expected_frag_archive_size - headers['X-Backend-Obj-Multipart-Mime-Boundary'] = mime_boundary headers['X-Backend-Obj-Metadata-Footer'] = 'yes' - headers['X-Backend-Obj-Multiphase-Commit'] = 'yes' + if need_multiphase: + headers['X-Backend-Obj-Multiphase-Commit'] = 'yes' - start_time = time.time() - with ConnectionTimeout(conn_timeout): - conn = http_connect(node['ip'], node['port'], node['device'], - part, 'PUT', path, headers) - connect_duration = time.time() - start_time + conn, expect_resp, final_resp, connect_duration = cls._make_connection( + node, part, req, headers, conn_timeout, node_timeout) - with ResponseTimeout(node_timeout): - resp = conn.getexpect() - - if resp.status == HTTP_INSUFFICIENT_STORAGE: - raise InsufficientStorage - - if is_server_error(resp.status): - raise PutterConnectError(resp.status) - - if is_informational(resp.status): - continue_headers = HeaderKeyDict(resp.getheaders()) + if is_informational(expect_resp.status): + continue_headers = HeaderKeyDict(expect_resp.getheaders()) can_send_metadata_footer = config_true_value( continue_headers.get('X-Obj-Metadata-Footer', 'no')) can_handle_multiphase_put = config_true_value( @@ -1598,18 +1740,11 @@ class ECPutter(object): if not can_send_metadata_footer: raise FooterNotSupported() - if not can_handle_multiphase_put: + if need_multiphase and not can_handle_multiphase_put: raise MultiphasePUTNotSupported() - conn.node = node - conn.resp = None - if is_success(resp.status) or resp.status == HTTP_CONFLICT: - conn.resp = resp - elif (headers.get('If-None-Match', None) is not None and - resp.status == HTTP_PRECONDITION_FAILED): - conn.resp = resp - - return cls(conn, node, resp, path, connect_duration, mime_boundary) + return cls(conn, node, final_resp, req, connect_duration, logger, + mime_boundary, multiphase=need_multiphase) def chunk_transformer(policy, nstreams): @@ -1674,7 +1809,7 @@ def chunk_transformer(policy, nstreams): def trailing_metadata(policy, client_obj_hasher, bytes_transferred_from_client, fragment_archive_index): - return { + return HeaderKeyDict({ # etag and size values are being added twice here. # The container override header is used to update the container db # with these values as they represent the correct etag and size for @@ -1692,7 +1827,7 @@ def trailing_metadata(policy, client_obj_hasher, # AKA "what is this thing?" 'X-Object-Sysmeta-EC-Scheme': policy.ec_scheme_description, 'X-Object-Sysmeta-EC-Segment-Size': str(policy.ec_segment_size), - } + }) @ObjectControllerRouter.register(EC_POLICY) @@ -1764,8 +1899,7 @@ class ECObjectController(BaseObjectController): return range_specs def _get_or_head_response(self, req, node_iter, partition, policy): - req.headers.setdefault("X-Backend-Etag-Is-At", - "X-Object-Sysmeta-Ec-Etag") + update_etag_is_at_header(req, "X-Object-Sysmeta-Ec-Etag") if req.method == 'HEAD': # no fancy EC decoding here, just one plain old HEAD request to @@ -1862,14 +1996,18 @@ class ECObjectController(BaseObjectController): resp = self.best_response( req, statuses, reasons, bodies, 'Object', headers=headers) - self._fix_response(resp) + self._fix_response(req, resp) return resp - def _fix_response(self, resp): + def _fix_response(self, req, resp): # EC fragment archives each have different bytes, hence different # etags. However, they all have the original object's etag stored in # sysmeta, so we copy that here (if it exists) so the client gets it. resp.headers['Etag'] = resp.headers.get('X-Object-Sysmeta-Ec-Etag') + # We're about to invoke conditional response checking so set the + # correct conditional etag from wherever X-Backend-Etag-Is-At points, + # if it exists at all. + resp._conditional_etag = resolve_etag_is_at_header(req, resp.headers) if (is_success(resp.status_int) or is_redirection(resp.status_int) or resp.status_int == HTTP_REQUESTED_RANGE_NOT_SATISFIABLE): resp.accept_ranges = 'bytes' @@ -1878,66 +2016,13 @@ class ECObjectController(BaseObjectController): 'X-Object-Sysmeta-Ec-Content-Length') resp.fix_conditional_response() - def _connect_put_node(self, node_iter, part, path, headers, - logger_thread_locals): - """ - Make a connection for a erasure encoded object. - - Connects to the first working node that it finds in node_iter and sends - over the request headers. Returns a Putter to handle the rest of the - streaming, or None if no working nodes were found. - """ - # the object server will get different bytes, so these - # values do not apply (Content-Length might, in general, but - # in the specific case of replication vs. EC, it doesn't). - client_cl = headers.pop('Content-Length', None) - headers.pop('Etag', None) - - expected_frag_size = None - if client_cl: - policy_index = int(headers.get('X-Backend-Storage-Policy-Index')) - policy = POLICIES.get_by_index(policy_index) - # TODO: PyECLib <= 1.2.0 looks to return the segment info - # different from the input for aligned data efficiency but - # Swift never does. So calculate the fragment length Swift - # will actually send to object sever by making two different - # get_segment_info calls (until PyECLib fixed). - # policy.fragment_size makes the call using segment size, - # and the next call is to get info for the last segment - - # get number of fragments except the tail - use truncation // - num_fragments = int(client_cl) // policy.ec_segment_size - expected_frag_size = policy.fragment_size * num_fragments - - # calculate the tail fragment_size by hand and add it to - # expected_frag_size - last_segment_size = int(client_cl) % policy.ec_segment_size - if last_segment_size: - last_info = policy.pyeclib_driver.get_segment_info( - last_segment_size, policy.ec_segment_size) - expected_frag_size += last_info['fragment_size'] - - self.app.logger.thread_locals = logger_thread_locals - for node in node_iter: - try: - putter = ECPutter.connect( - node, part, path, headers, - conn_timeout=self.app.conn_timeout, - node_timeout=self.app.node_timeout, - expected_frag_archive_size=expected_frag_size) - self.app.set_node_timing(node, putter.connect_duration) - return putter - except InsufficientStorage: - self.app.error_limit(node, _('ERROR Insufficient Storage')) - except PutterConnectError as e: - self.app.error_occurred( - node, _('ERROR %(status)d Expect: 100-continue ' - 'From Object Server') % { - 'status': e.status}) - except (Exception, Timeout): - self.app.exception_occurred( - node, _('Object'), - _('Expect: 100-continue on %s') % path) + def _make_putter(self, node, part, req, headers): + return MIMEPutter.connect( + node, part, req.swift_entity_path, headers, + conn_timeout=self.app.conn_timeout, + node_timeout=self.app.node_timeout, + logger=self.app.logger, + need_multiphase=True) def _determine_chunk_destinations(self, putters): """ @@ -1985,8 +2070,16 @@ class ECObjectController(BaseObjectController): bytes_transferred = 0 chunk_transform = chunk_transformer(policy, len(nodes)) chunk_transform.send(None) + chunk_hashers = collections.defaultdict(md5) def send_chunk(chunk): + # Note: there's two different hashers in here. etag_hasher is + # hashing the original object so that we can validate the ETag + # that the client sent (and etag_hasher is None if the client + # didn't send one). The hasher in chunk_hashers is hashing the + # fragment archive being sent to the client; this lets us guard + # against data corruption on the network between proxy and + # object server. if etag_hasher: etag_hasher.update(chunk) backend_chunks = chunk_transform.send(chunk) @@ -1996,15 +2089,18 @@ class ECObjectController(BaseObjectController): return for putter in list(putters): - backend_chunk = backend_chunks[chunk_index[putter]] + ci = chunk_index[putter] + backend_chunk = backend_chunks[ci] if not putter.failed: - putter.chunk_hasher.update(backend_chunk) + chunk_hashers[ci].update(backend_chunk) putter.send_chunk(backend_chunk) else: + putter.close() putters.remove(putter) self._check_min_conn( - req, putters, min_conns, msg='Object PUT exceptions during' - ' send, %(conns)s/%(nodes)s required connections') + req, putters, min_conns, + msg=_('Object PUT exceptions during send, ' + '%(conns)s/%(nodes)s required connections')) try: with ContextPool(len(putters)) as pool: @@ -2047,14 +2143,26 @@ class ECObjectController(BaseObjectController): send_chunk('') # flush out any buffered data + footers = self._get_footers(req) + received_etag = footers.get( + 'etag', '').strip('"') + if (computed_etag and received_etag and + computed_etag != received_etag): + raise HTTPUnprocessableEntity(request=req) + + # Remove any EC reserved metadata names from footers + footers = {(k, v) for k, v in footers.items() + if not k.lower().startswith('x-object-sysmeta-ec-')} for putter in putters: + ci = chunk_index[putter] + # Update any footers set by middleware with EC footers trail_md = trailing_metadata( policy, etag_hasher, - bytes_transferred, - chunk_index[putter]) - trail_md['Etag'] = \ - putter.chunk_hasher.hexdigest() - putter.end_of_object_data(trail_md) + bytes_transferred, ci) + trail_md.update(footers) + # Etag footer must always be hash of what we sent + trail_md['Etag'] = chunk_hashers[ci].hexdigest() + putter.end_of_object_data(footer_metadata=trail_md) for putter in putters: putter.wait() @@ -2065,12 +2173,12 @@ class ECObjectController(BaseObjectController): # object data and metadata commit and is a necessary # condition to be met before starting 2nd PUT phase final_phase = False - need_quorum = True - statuses, reasons, bodies, _junk, quorum = \ + statuses, reasons, bodies, _junk = \ self._get_put_responses( - req, putters, len(nodes), final_phase, - min_conns, need_quorum=need_quorum) - if not quorum: + req, putters, len(nodes), final_phase=final_phase, + min_responses=min_conns) + if not self.have_quorum( + statuses, len(nodes), quorum=min_conns): self.app.logger.error( _('Not enough object servers ack\'ed (got %d)'), statuses.count(HTTP_CONTINUE)) @@ -2153,109 +2261,15 @@ class ECObjectController(BaseObjectController): return self._have_adequate_responses( statuses, min_responses, is_informational) - def _await_response(self, conn, final_phase): - return conn.await_response( - self.app.node_timeout, not final_phase) - - def _get_conn_response(self, conn, req, logger_thread_locals, - final_phase, **kwargs): - self.app.logger.thread_locals = logger_thread_locals - try: - resp = self._await_response(conn, final_phase=final_phase, - **kwargs) - except (Exception, Timeout): - resp = None - if final_phase: - status_type = 'final' - else: - status_type = 'commit' - self.app.exception_occurred( - conn.node, _('Object'), - _('Trying to get %(status_type)s status of PUT to %(path)s') % - {'status_type': status_type, 'path': req.path}) - return (conn, resp) - - def _get_put_responses(self, req, putters, num_nodes, final_phase, - min_responses, need_quorum=True): - """ - Collect erasure coded object responses. - - Collect object responses to a PUT request and determine if - satisfactory number of nodes have returned success. Return - statuses, quorum result if indicated by 'need_quorum' and - etags if this is a final phase or a multiphase PUT transaction. - - :param req: the request - :param putters: list of putters for the request - :param num_nodes: number of nodes involved - :param final_phase: boolean indicating if this is the last phase - :param min_responses: minimum needed when not requiring quorum - :param need_quorum: boolean indicating if quorum is required - """ - statuses = [] - reasons = [] - bodies = [] - etags = set() - - pile = GreenAsyncPile(len(putters)) - for putter in putters: - if putter.failed: - continue - pile.spawn(self._get_conn_response, putter, req, - self.app.logger.thread_locals, final_phase=final_phase) - - def _handle_response(putter, response): - statuses.append(response.status) - reasons.append(response.reason) - if final_phase: - body = response.read() - else: - body = '' - bodies.append(body) - if response.status == HTTP_INSUFFICIENT_STORAGE: - putter.failed = True - self.app.error_limit(putter.node, - _('ERROR Insufficient Storage')) - elif response.status >= HTTP_INTERNAL_SERVER_ERROR: - putter.failed = True - self.app.error_occurred( - putter.node, - _('ERROR %(status)d %(body)s From Object Server ' - 're: %(path)s') % - {'status': response.status, - 'body': body[:1024], 'path': req.path}) - elif is_success(response.status): - etags.add(response.getheader('etag').strip('"')) - - quorum = False - for (putter, response) in pile: - if response: - _handle_response(putter, response) - if self._have_adequate_successes(statuses, min_responses): - break - else: - putter.failed = True - - # give any pending requests *some* chance to finish - finished_quickly = pile.waitall(self.app.post_quorum_timeout) - for (putter, response) in finished_quickly: - if response: - _handle_response(putter, response) - - if need_quorum: - if final_phase: - while len(statuses) < num_nodes: - statuses.append(HTTP_SERVICE_UNAVAILABLE) - reasons.append('') - bodies.append('') - else: - # intermediate response phase - set return value to true only - # if there are responses having same value of *any* status - # except 5xx - if self.have_quorum(statuses, num_nodes, quorum=min_responses): - quorum = True - - return statuses, reasons, bodies, etags, quorum + def _have_adequate_put_responses(self, statuses, num_nodes, min_responses): + # For an EC PUT we require a quorum of responses with success statuses + # in order to move on to next phase of PUT request handling without + # having to wait for *all* responses. + # TODO: this implies that in the first phase of the backend PUTs when + # we are actually expecting 1xx responses that we will end up waiting + # for *all* responses. That seems inefficient since we only need a + # quorum of 1xx responses to proceed. + return self._have_adequate_successes(statuses, min_responses) def _store_object(self, req, data_source, nodes, partition, outgoing_headers): @@ -2264,6 +2278,35 @@ class ECObjectController(BaseObjectController): """ policy_index = int(req.headers.get('X-Backend-Storage-Policy-Index')) policy = POLICIES.get_by_index(policy_index) + + expected_frag_size = None + if req.content_length: + # TODO: PyECLib <= 1.2.0 looks to return the segment info + # different from the input for aligned data efficiency but + # Swift never does. So calculate the fragment length Swift + # will actually send to object sever by making two different + # get_segment_info calls (until PyECLib fixed). + # policy.fragment_size makes the call using segment size, + # and the next call is to get info for the last segment + + # get number of fragments except the tail - use truncation // + num_fragments = req.content_length // policy.ec_segment_size + expected_frag_size = policy.fragment_size * num_fragments + + # calculate the tail fragment_size by hand and add it to + # expected_frag_size + last_segment_size = req.content_length % policy.ec_segment_size + if last_segment_size: + last_info = policy.pyeclib_driver.get_segment_info( + last_segment_size, policy.ec_segment_size) + expected_frag_size += last_info['fragment_size'] + for headers in outgoing_headers: + headers['X-Backend-Obj-Content-Length'] = expected_frag_size + # the object server will get different bytes, so these + # values do not apply. + headers.pop('Content-Length', None) + headers.pop('Etag', None) + # Since the request body sent from client -> proxy is not # the same as the request body sent proxy -> object, we # can't rely on the object-server to do the etag checking - @@ -2272,18 +2315,15 @@ class ECObjectController(BaseObjectController): min_conns = policy.quorum putters = self._get_put_connections( - req, nodes, partition, outgoing_headers, - policy, expect=True) + req, nodes, partition, outgoing_headers, policy) try: # check that a minimum number of connections were established and # meet all the correct conditions set in the request - self._check_failure_put_connections(putters, req, nodes, min_conns) + self._check_failure_put_connections(putters, req, min_conns) self._transfer_data(req, policy, data_source, putters, nodes, min_conns, etag_hasher) - final_phase = True - need_quorum = False # The .durable file will propagate in a replicated fashion; if # one exists, the reconstructor will spread it around. # In order to avoid successfully writing an object, but refusing @@ -2292,15 +2332,16 @@ class ECObjectController(BaseObjectController): # writes as quorum fragment writes. If object servers are in the # future able to serve their non-durable fragment archives we may # be able to reduce this quorum count if needed. - min_conns = policy.quorum - putters = [p for p in putters if not p.failed] - # ignore response etags, and quorum boolean - statuses, reasons, bodies, _etags, _quorum = \ + # ignore response etags + statuses, reasons, bodies, _etags = \ self._get_put_responses(req, putters, len(nodes), - final_phase, min_conns, - need_quorum=need_quorum) + final_phase=True, + min_responses=min_conns) except HTTPException as resp: return resp + finally: + for putter in putters: + putter.close() etag = etag_hasher.hexdigest() resp = self.best_response(req, statuses, reasons, bodies, diff --git a/test/unit/__init__.py b/test/unit/__init__.py index c4c833a79c..acc3c8612f 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -32,6 +32,8 @@ import eventlet from eventlet.green import socket from tempfile import mkdtemp from shutil import rmtree + + from swift.common.utils import Timestamp, NOTICE from test import get_config from swift.common import utils @@ -848,7 +850,7 @@ def fake_http_connect(*code_iter, **kwargs): def __init__(self, status, etag=None, body='', timestamp='1', headers=None, expect_headers=None, connection_id=None, - give_send=None): + give_send=None, give_expect=None): if not isinstance(status, FakeStatus): status = FakeStatus(status) self._status = status @@ -864,6 +866,8 @@ def fake_http_connect(*code_iter, **kwargs): self.timestamp = timestamp self.connection_id = connection_id self.give_send = give_send + self.give_expect = give_expect + self.closed = False if 'slow' in kwargs and isinstance(kwargs['slow'], list): try: self._next_sleep = kwargs['slow'].pop(0) @@ -884,6 +888,8 @@ def fake_http_connect(*code_iter, **kwargs): return self def getexpect(self): + if self.give_expect: + self.give_expect(self) expect_status = self._status.get_expect_status() headers = dict(self.expect_headers) if expect_status == 409: @@ -953,7 +959,7 @@ def fake_http_connect(*code_iter, **kwargs): def send(self, amt=None): if self.give_send: - self.give_send(self.connection_id, amt) + self.give_send(self, amt) am_slow, value = self.get_slow() if am_slow: if self.received < 4: @@ -964,7 +970,7 @@ def fake_http_connect(*code_iter, **kwargs): return HeaderKeyDict(self.getheaders()).get(name, default) def close(self): - pass + self.closed = True timestamps_iter = iter(kwargs.get('timestamps') or ['1'] * len(code_iter)) etag_iter = iter(kwargs.get('etags') or [None] * len(code_iter)) @@ -1017,7 +1023,8 @@ def fake_http_connect(*code_iter, **kwargs): body = next(body_iter) return FakeConn(status, etag, body=body, timestamp=timestamp, headers=headers, expect_headers=expect_headers, - connection_id=i, give_send=kwargs.get('give_send')) + connection_id=i, give_send=kwargs.get('give_send'), + give_expect=kwargs.get('give_expect')) connect.code_iter = code_iter diff --git a/test/unit/common/middleware/helpers.py b/test/unit/common/middleware/helpers.py index e542818967..8b8fff3b3d 100644 --- a/test/unit/common/middleware/helpers.py +++ b/test/unit/common/middleware/helpers.py @@ -16,7 +16,6 @@ # This stuff can't live in test/unit/__init__.py due to its swob dependency. from collections import defaultdict -from copy import deepcopy from hashlib import md5 from swift.common import swob from swift.common.header_key_dict import HeaderKeyDict @@ -113,24 +112,34 @@ class FakeSwift(object): raise KeyError("Didn't find %r in allowed responses" % ( (method, path),)) - self._calls.append((method, path, req_headers)) - # simulate object PUT if method == 'PUT' and obj: - input = env['wsgi.input'].read() + input = ''.join(iter(env['wsgi.input'].read, '')) + if 'swift.callback.update_footers' in env: + footers = HeaderKeyDict() + env['swift.callback.update_footers'](footers) + req_headers.update(footers) etag = md5(input).hexdigest() headers.setdefault('Etag', etag) headers.setdefault('Content-Length', len(input)) # keep it for subsequent GET requests later - self.uploaded[path] = (deepcopy(headers), input) + self.uploaded[path] = (dict(req_headers), input) if "CONTENT_TYPE" in env: self.uploaded[path][0]['Content-Type'] = env["CONTENT_TYPE"] - # range requests ought to work, which require conditional_response=True + self._calls.append((method, path, HeaderKeyDict(req_headers))) + + # range requests ought to work, hence conditional_response=True req = swob.Request(env) - resp = resp_class(req=req, headers=headers, body=body, - conditional_response=req.method in ('GET', 'HEAD')) + if isinstance(body, list): + resp = resp_class( + req=req, headers=headers, app_iter=body, + conditional_response=req.method in ('GET', 'HEAD')) + else: + resp = resp_class( + req=req, headers=headers, body=body, + conditional_response=req.method in ('GET', 'HEAD')) wsgi_iter = resp(env, start_response) self.mark_opened(path) return LeakTrackingIter(wsgi_iter, self, path) diff --git a/test/unit/common/test_request_helpers.py b/test/unit/common/test_request_helpers.py index c13bc03ca9..1c39e9f0af 100644 --- a/test/unit/common/test_request_helpers.py +++ b/test/unit/common/test_request_helpers.py @@ -21,7 +21,8 @@ from swift.common.storage_policy import POLICIES, EC_POLICY, REPL_POLICY from swift.common.request_helpers import is_sys_meta, is_user_meta, \ is_sys_or_user_meta, strip_sys_meta_prefix, strip_user_meta_prefix, \ remove_items, copy_header_subset, get_name_and_placement, \ - http_response_to_document_iters + http_response_to_document_iters, update_etag_is_at_header, \ + resolve_etag_is_at_header from test.unit import patch_policies from test.unit.common.test_utils import FakeResponse @@ -273,3 +274,74 @@ class TestHTTPResponseToDocumentIters(unittest.TestCase): self.assertEqual(body.read(), 'ches') self.assertRaises(StopIteration, next, doc_iters) + + def test_update_etag_is_at_header(self): + # start with no existing X-Backend-Etag-Is-At + req = Request.blank('/v/a/c/o') + update_etag_is_at_header(req, 'X-Object-Sysmeta-My-Etag') + self.assertEqual('X-Object-Sysmeta-My-Etag', + req.headers['X-Backend-Etag-Is-At']) + # add another alternate + update_etag_is_at_header(req, 'X-Object-Sysmeta-Ec-Etag') + self.assertEqual('X-Object-Sysmeta-My-Etag,X-Object-Sysmeta-Ec-Etag', + req.headers['X-Backend-Etag-Is-At']) + with self.assertRaises(ValueError) as cm: + update_etag_is_at_header(req, 'X-Object-Sysmeta-,-Bad') + self.assertEqual('Header name must not contain commas', + cm.exception.message) + + def test_resolve_etag_is_at_header(self): + def do_test(): + req = Request.blank('/v/a/c/o') + # ok to have no X-Backend-Etag-Is-At + self.assertIsNone(resolve_etag_is_at_header(req, metadata)) + + # ok to have no matching metadata + req.headers['X-Backend-Etag-Is-At'] = 'X-Not-There' + self.assertIsNone(resolve_etag_is_at_header(req, metadata)) + + # selects from metadata + req.headers['X-Backend-Etag-Is-At'] = 'X-Object-Sysmeta-Ec-Etag' + self.assertEqual('an etag value', + resolve_etag_is_at_header(req, metadata)) + req.headers['X-Backend-Etag-Is-At'] = 'X-Object-Sysmeta-My-Etag' + self.assertEqual('another etag value', + resolve_etag_is_at_header(req, metadata)) + + # first in list takes precedence + req.headers['X-Backend-Etag-Is-At'] = \ + 'X-Object-Sysmeta-My-Etag,X-Object-Sysmeta-Ec-Etag' + self.assertEqual('another etag value', + resolve_etag_is_at_header(req, metadata)) + + # non-existent alternates are passed over + req.headers['X-Backend-Etag-Is-At'] = \ + 'X-Bogus,X-Object-Sysmeta-My-Etag,X-Object-Sysmeta-Ec-Etag' + self.assertEqual('another etag value', + resolve_etag_is_at_header(req, metadata)) + + # spaces in list are ok + alts = 'X-Foo, X-Object-Sysmeta-My-Etag , X-Object-Sysmeta-Ec-Etag' + req.headers['X-Backend-Etag-Is-At'] = alts + self.assertEqual('another etag value', + resolve_etag_is_at_header(req, metadata)) + + # lower case in list is ok + alts = alts.lower() + req.headers['X-Backend-Etag-Is-At'] = alts + self.assertEqual('another etag value', + resolve_etag_is_at_header(req, metadata)) + + # upper case in list is ok + alts = alts.upper() + req.headers['X-Backend-Etag-Is-At'] = alts + self.assertEqual('another etag value', + resolve_etag_is_at_header(req, metadata)) + + metadata = {'X-Object-Sysmeta-Ec-Etag': 'an etag value', + 'X-Object-Sysmeta-My-Etag': 'another etag value'} + do_test() + metadata = dict((k.lower(), v) for k, v in metadata.items()) + do_test() + metadata = dict((k.upper(), v) for k, v in metadata.items()) + do_test() diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 24eba9956a..b85230f395 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -2385,6 +2385,7 @@ class TestObjectController(unittest.TestCase): 'X-Timestamp': utils.Timestamp(time()).internal, 'Content-Type': 'application/octet-stream', 'X-Object-Meta-Xtag': 'madeup', + 'X-Object-Sysmeta-Xtag': 'alternate madeup', } req = Request.blank('/sda1/p/a/c/o', method='PUT', headers=headers) @@ -2400,6 +2401,39 @@ class TestObjectController(unittest.TestCase): resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 200) + # match x-backend-etag-is-at, using first in list of alternates + req = Request.blank('/sda1/p/a/c/o', headers={ + 'If-Match': 'madeup', + 'X-Backend-Etag-Is-At': + 'X-Object-Meta-Xtag,X-Object-Sysmeta-Z'}) + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + + # match x-backend-etag-is-at, using second in list of alternates + alts = 'X-Object-Sysmeta-Y,X-Object-Meta-Xtag,X-Object-Sysmeta-Z' + req = Request.blank('/sda1/p/a/c/o', headers={ + 'If-Match': 'madeup', + 'X-Backend-Etag-Is-At': alts}) + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + + # match x-backend-etag-is-at, choosing first of multiple alternates + alts = 'X-Object-Sysmeta-Y,X-Object-Meta-Xtag,X-Object-Sysmeta-Xtag' + req = Request.blank('/sda1/p/a/c/o', headers={ + 'If-Match': 'madeup', + 'X-Backend-Etag-Is-At': alts}) + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + + # match x-backend-etag-is-at, choosing first of multiple alternates + # (switches order of second two alternates from previous assertion) + alts = 'X-Object-Sysmeta-Y,X-Object-Sysmeta-Xtag,X-Object-Meta-Xtag' + req = Request.blank('/sda1/p/a/c/o', headers={ + 'If-Match': 'alternate madeup', + 'X-Backend-Etag-Is-At': alts}) + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + # no match x-backend-etag-is-at req = Request.blank('/sda1/p/a/c/o', headers={ 'If-Match': real_etag, diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py index be0893dbb2..4495fb0c68 100755 --- a/test/unit/proxy/controllers/test_obj.py +++ b/test/unit/proxy/controllers/test_obj.py @@ -122,6 +122,27 @@ class PatchedObjControllerApp(proxy_server.Application): PatchedObjControllerApp, self).__call__(*args, **kwargs) +def make_footers_callback(body=None): + # helper method to create a footers callback that will generate some fake + # footer metadata + cont_etag = 'container update etag may differ' + crypto_etag = '20242af0cd21dd7195a10483eb7472c9' + etag_crypto_meta = \ + '{"cipher": "AES_CTR_256", "iv": "sD+PSw/DfqYwpsVGSo0GEw=="}' + etag = md5(body).hexdigest() if body is not None else None + footers_to_add = { + 'X-Object-Sysmeta-Container-Update-Override-Etag': cont_etag, + 'X-Object-Sysmeta-Crypto-Etag': crypto_etag, + 'X-Object-Sysmeta-Crypto-Meta-Etag': etag_crypto_meta, + 'X-I-Feel-Lucky': 'Not blocked', + 'Etag': etag} + + def footers_callback(footers): + footers.update(footers_to_add) + + return footers_callback + + class BaseObjectControllerMixin(object): container_info = { 'status': 200, @@ -253,10 +274,11 @@ class BaseObjectControllerMixin(object): def test_connect_put_node_timeout(self): controller = self.controller_cls( self.app, 'a', 'c', 'o') + req = swift.common.swob.Request.blank('/v1/a/c/o') self.app.conn_timeout = 0.05 with set_http_connect(slow_connect=True): nodes = [dict(ip='', port='', device='')] - res = controller._connect_put_node(nodes, '', '', {}, ('', '')) + res = controller._connect_put_node(nodes, '', req, {}, ('', '')) self.assertTrue(res is None) def test_DELETE_simple(self): @@ -564,6 +586,163 @@ class TestReplicatedObjController(BaseObjectControllerMixin, resp = req.get_response(self.app) self.assertEqual(resp.status_int, 201) + def test_PUT_error_with_footers(self): + footers_callback = make_footers_callback('') + env = {'swift.callback.update_footers': footers_callback} + req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT', + environ=env) + req.headers['content-length'] = '0' + codes = [503] * self.replicas() + expect_headers = { + 'X-Obj-Metadata-Footer': 'yes' + } + + with set_http_connect(*codes, expect_headers=expect_headers): + resp = req.get_response(self.app) + self.assertEqual(resp.status_int, 503) + + def _test_PUT_with_no_footers(self, test_body='', chunked=False): + # verify that when no footers are required then the PUT uses a regular + # single part body + req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT', + body=test_body) + if chunked: + req.headers['Transfer-Encoding'] = 'chunked' + etag = md5(test_body).hexdigest() + req.headers['Etag'] = etag + + put_requests = defaultdict( + lambda: {'headers': None, 'chunks': [], 'connection': None}) + + def capture_body(conn, chunk): + put_requests[conn.connection_id]['chunks'].append(chunk) + put_requests[conn.connection_id]['connection'] = conn + + def capture_headers(ip, port, device, part, method, path, headers, + **kwargs): + conn_id = kwargs['connection_id'] + put_requests[conn_id]['headers'] = headers + + codes = [201] * self.replicas() + expect_headers = {'X-Obj-Metadata-Footer': 'yes'} + with set_http_connect(*codes, expect_headers=expect_headers, + give_send=capture_body, + give_connect=capture_headers): + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 201) + for connection_id, info in put_requests.items(): + body = ''.join(info['chunks']) + headers = info['headers'] + if chunked: + body = unchunk_body(body) + self.assertEqual('100-continue', headers['Expect']) + self.assertEqual('chunked', headers['Transfer-Encoding']) + else: + self.assertNotIn('Transfer-Encoding', headers) + if body: + self.assertEqual('100-continue', headers['Expect']) + else: + self.assertNotIn('Expect', headers) + self.assertNotIn('X-Backend-Obj-Multipart-Mime-Boundary', headers) + self.assertNotIn('X-Backend-Obj-Metadata-Footer', headers) + self.assertNotIn('X-Backend-Obj-Multiphase-Commit', headers) + self.assertEqual(etag, headers['Etag']) + + self.assertEqual(test_body, body) + self.assertTrue(info['connection'].closed) + + def test_PUT_with_chunked_body_and_no_footers(self): + self._test_PUT_with_no_footers(test_body='asdf', chunked=True) + + def test_PUT_with_body_and_no_footers(self): + self._test_PUT_with_no_footers(test_body='asdf', chunked=False) + + def test_PUT_with_no_body_and_no_footers(self): + self._test_PUT_with_no_footers(test_body='', chunked=False) + + def _test_PUT_with_footers(self, test_body=''): + # verify that when footers are required the PUT body is multipart + # and the footers are appended + footers_callback = make_footers_callback(test_body) + env = {'swift.callback.update_footers': footers_callback} + req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT', + environ=env) + req.body = test_body + # send bogus Etag header to differentiate from footer value + req.headers['Etag'] = 'header_etag' + codes = [201] * self.replicas() + expect_headers = { + 'X-Obj-Metadata-Footer': 'yes' + } + + put_requests = defaultdict( + lambda: {'headers': None, 'chunks': [], 'connection': None}) + + def capture_body(conn, chunk): + put_requests[conn.connection_id]['chunks'].append(chunk) + put_requests[conn.connection_id]['connection'] = conn + + def capture_headers(ip, port, device, part, method, path, headers, + **kwargs): + conn_id = kwargs['connection_id'] + put_requests[conn_id]['headers'] = headers + + with set_http_connect(*codes, expect_headers=expect_headers, + give_send=capture_body, + give_connect=capture_headers): + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 201) + for connection_id, info in put_requests.items(): + body = unchunk_body(''.join(info['chunks'])) + headers = info['headers'] + boundary = headers['X-Backend-Obj-Multipart-Mime-Boundary'] + self.assertTrue(boundary is not None, + "didn't get boundary for conn %r" % ( + connection_id,)) + self.assertEqual('chunked', headers['Transfer-Encoding']) + self.assertEqual('100-continue', headers['Expect']) + self.assertEqual('yes', headers['X-Backend-Obj-Metadata-Footer']) + self.assertNotIn('X-Backend-Obj-Multiphase-Commit', headers) + self.assertEqual('header_etag', headers['Etag']) + + # email.parser.FeedParser doesn't know how to take a multipart + # message and boundary together and parse it; it only knows how + # to take a string, parse the headers, and figure out the + # boundary on its own. + parser = email.parser.FeedParser() + parser.feed( + "Content-Type: multipart/nobodycares; boundary=%s\r\n\r\n" % + boundary) + parser.feed(body) + message = parser.close() + + self.assertTrue(message.is_multipart()) # sanity check + mime_parts = message.get_payload() + # notice, no commit confirmation + self.assertEqual(len(mime_parts), 2) + obj_part, footer_part = mime_parts + + self.assertEqual(obj_part['X-Document'], 'object body') + self.assertEqual(test_body, obj_part.get_payload()) + + # validate footer metadata + self.assertEqual(footer_part['X-Document'], 'object metadata') + footer_metadata = json.loads(footer_part.get_payload()) + self.assertTrue(footer_metadata) + expected = {} + footers_callback(expected) + self.assertDictEqual(expected, footer_metadata) + + self.assertTrue(info['connection'].closed) + + def test_PUT_with_body_and_footers(self): + self._test_PUT_with_footers(test_body='asdf') + + def test_PUT_with_no_body_and_footers(self): + self._test_PUT_with_footers() + def test_txn_id_logging_on_PUT(self): req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT') self.app.logger.txn_id = req.environ['swift.trans_id'] = 'test-txn-id' @@ -585,11 +764,15 @@ class TestReplicatedObjController(BaseObjectControllerMixin, req.headers['Content-Length'] = '0' req.headers['Etag'] = '"catbus"' - # The 2-tuple here makes getexpect() return 422, not 100. For - # objects that are >0 bytes, you get a 100 Continue and then a 422 - # Unprocessable Entity after sending the body. For zero-byte - # objects, though, you get the 422 right away. - codes = [FakeStatus((422, 422)) + # The 2-tuple here makes getexpect() return 422, not 100. For objects + # that are >0 bytes, you get a 100 Continue and then a 422 + # Unprocessable Entity after sending the body. For zero-byte objects, + # though, you get the 422 right away because no Expect header is sent + # with zero-byte PUT. The second status in the tuple should not be + # consumed, it's just there to make the FakeStatus treat the first as + # an expect status, but we'll make it something other than a 422 so + # that if it is consumed then the test should fail. + codes = [FakeStatus((422, 200)) for _junk in range(self.replicas())] with set_http_connect(*codes): @@ -707,16 +890,24 @@ class TestReplicatedObjController(BaseObjectControllerMixin, class FakeReader(object): def read(self, size): raise Timeout() + conns = [] + + def capture_expect(conn): + # stash connections so that we can verify they all get closed + conns.append(conn) req = swob.Request.blank('/v1/a/c/o.jpg', method='PUT', body='test body') req.environ['wsgi.input'] = FakeReader() req.headers['content-length'] = '6' - with set_http_connect(201, 201, 201): + with set_http_connect(201, 201, 201, give_expect=capture_expect): resp = req.get_response(self.app) self.assertEqual(resp.status_int, 499) + self.assertEqual(self.replicas(), len(conns)) + for conn in conns: + self.assertTrue(conn.closed) def test_PUT_exception_during_transfer_data(self): class FakeReader(object): @@ -1131,6 +1322,108 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): self.assertEqual(resp.status_int, 200) self.assertIn('Accept-Ranges', resp.headers) + def _test_if_match(self, method): + num_responses = self.policy.ec_ndata if method == 'GET' else 1 + + def _do_test(match_value, backend_status, + etag_is_at='X-Object-Sysmeta-Does-Not-Exist'): + req = swift.common.swob.Request.blank( + '/v1/a/c/o', method=method, + headers={'If-Match': match_value, + 'X-Backend-Etag-Is-At': etag_is_at}) + get_resp = [backend_status] * num_responses + resp_headers = {'Etag': 'frag_etag', + 'X-Object-Sysmeta-Ec-Etag': 'data_etag', + 'X-Object-Sysmeta-Alternate-Etag': 'alt_etag'} + with set_http_connect(*get_resp, headers=resp_headers): + resp = req.get_response(self.app) + self.assertEqual('data_etag', resp.headers['Etag']) + return resp + + # wildcard + resp = _do_test('*', 200) + self.assertEqual(resp.status_int, 200) + + # match + resp = _do_test('"data_etag"', 200) + self.assertEqual(resp.status_int, 200) + + # no match + resp = _do_test('"frag_etag"', 412) + self.assertEqual(resp.status_int, 412) + + # match wildcard against an alternate etag + resp = _do_test('*', 200, + etag_is_at='X-Object-Sysmeta-Alternate-Etag') + self.assertEqual(resp.status_int, 200) + + # match against an alternate etag + resp = _do_test('"alt_etag"', 200, + etag_is_at='X-Object-Sysmeta-Alternate-Etag') + self.assertEqual(resp.status_int, 200) + + # no match against an alternate etag + resp = _do_test('"data_etag"', 412, + etag_is_at='X-Object-Sysmeta-Alternate-Etag') + self.assertEqual(resp.status_int, 412) + + def test_GET_if_match(self): + self._test_if_match('GET') + + def test_HEAD_if_match(self): + self._test_if_match('HEAD') + + def _test_if_none_match(self, method): + num_responses = self.policy.ec_ndata if method == 'GET' else 1 + + def _do_test(match_value, backend_status, + etag_is_at='X-Object-Sysmeta-Does-Not-Exist'): + req = swift.common.swob.Request.blank( + '/v1/a/c/o', method=method, + headers={'If-None-Match': match_value, + 'X-Backend-Etag-Is-At': etag_is_at}) + get_resp = [backend_status] * num_responses + resp_headers = {'Etag': 'frag_etag', + 'X-Object-Sysmeta-Ec-Etag': 'data_etag', + 'X-Object-Sysmeta-Alternate-Etag': 'alt_etag'} + with set_http_connect(*get_resp, headers=resp_headers): + resp = req.get_response(self.app) + self.assertEqual('data_etag', resp.headers['Etag']) + return resp + + # wildcard + resp = _do_test('*', 304) + self.assertEqual(resp.status_int, 304) + + # match + resp = _do_test('"data_etag"', 304) + self.assertEqual(resp.status_int, 304) + + # no match + resp = _do_test('"frag_etag"', 200) + self.assertEqual(resp.status_int, 200) + + # match wildcard against an alternate etag + resp = _do_test('*', 304, + etag_is_at='X-Object-Sysmeta-Alternate-Etag') + self.assertEqual(resp.status_int, 304) + + # match against an alternate etag + resp = _do_test('"alt_etag"', 304, + etag_is_at='X-Object-Sysmeta-Alternate-Etag') + self.assertEqual(resp.status_int, 304) + + # no match against an alternate etag + resp = _do_test('"data_etag"', 200, + etag_is_at='X-Object-Sysmeta-Alternate-Etag') + self.assertEqual(resp.status_int, 200) + + def test_GET_if_none_match(self): + self._test_if_none_match('GET') + + def test_HEAD_if_none_match(self): + self._test_if_none_match('HEAD') + def test_GET_simple_x_newest(self): req = swift.common.swob.Request.blank('/v1/a/c/o', headers={'X-Newest': 'true'}) @@ -1194,6 +1487,42 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): resp = req.get_response(self.app) self.assertEqual(resp.status_int, 201) + def test_PUT_with_body_and_bad_etag(self): + segment_size = self.policy.ec_segment_size + test_body = ('asdf' * segment_size)[:-10] + codes = [201] * self.replicas() + expect_headers = { + 'X-Obj-Metadata-Footer': 'yes', + 'X-Obj-Multiphase-Commit': 'yes' + } + conns = [] + + def capture_expect(conn): + # stash the backend connection so we can verify that it is closed + # (no data will be sent) + conns.append(conn) + + # send a bad etag in the request headers + headers = {'Etag': 'bad etag'} + req = swift.common.swob.Request.blank( + '/v1/a/c/o', method='PUT', headers=headers, body=test_body) + with set_http_connect(*codes, expect_headers=expect_headers, + give_expect=capture_expect): + resp = req.get_response(self.app) + self.assertEqual(422, resp.status_int) + self.assertEqual(self.replicas(), len(conns)) + for conn in conns: + self.assertTrue(conn.closed) + + # make the footers callback send a bad Etag footer + footers_callback = make_footers_callback('not the test body') + env = {'swift.callback.update_footers': footers_callback} + req = swift.common.swob.Request.blank( + '/v1/a/c/o', method='PUT', environ=env, body=test_body) + with set_http_connect(*codes, expect_headers=expect_headers): + resp = req.get_response(self.app) + self.assertEqual(422, resp.status_int) + def test_txn_id_logging_ECPUT(self): req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT', body='') @@ -1399,9 +1728,15 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): self.assertEqual(resp.status_int, 500) def test_PUT_with_body(self): - req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT') segment_size = self.policy.ec_segment_size test_body = ('asdf' * segment_size)[:-10] + # make the footers callback not include Etag footer so that we can + # verify that the correct EC-calculated Etag is included in footers + # sent to backend + footers_callback = make_footers_callback() + env = {'swift.callback.update_footers': footers_callback} + req = swift.common.swob.Request.blank( + '/v1/a/c/o', method='PUT', environ=env) etag = md5(test_body).hexdigest() size = len(test_body) req.body = test_body @@ -1413,8 +1748,8 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): put_requests = defaultdict(lambda: {'boundary': None, 'chunks': []}) - def capture_body(conn_id, chunk): - put_requests[conn_id]['chunks'].append(chunk) + def capture_body(conn, chunk): + put_requests[conn.connection_id]['chunks'].append(chunk) def capture_headers(ip, port, device, part, method, path, headers, **kwargs): @@ -1471,13 +1806,16 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): self.assertEqual(footer_part['X-Document'], 'object metadata') footer_metadata = json.loads(footer_part.get_payload()) self.assertTrue(footer_metadata) - expected = { - 'X-Object-Sysmeta-EC-Content-Length': str(size), + expected = {} + # update expected with footers from the callback... + footers_callback(expected) + expected.update({ + 'X-Object-Sysmeta-Ec-Content-Length': str(size), 'X-Backend-Container-Update-Override-Size': str(size), - 'X-Object-Sysmeta-EC-Etag': etag, + 'X-Object-Sysmeta-Ec-Etag': etag, 'X-Backend-Container-Update-Override-Etag': etag, - 'X-Object-Sysmeta-EC-Segment-Size': str(segment_size), - } + 'X-Object-Sysmeta-Ec-Segment-Size': str(segment_size), + 'Etag': md5(obj_part.get_payload()).hexdigest()}) for header, value in expected.items(): self.assertEqual(footer_metadata[header], value) @@ -1504,6 +1842,118 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): self.assertEqual(len(test_body), len(expected_body)) self.assertEqual(test_body, expected_body) + def test_PUT_with_footers(self): + # verify footers supplied by a footers callback being added to + # trailing metadata + segment_size = self.policy.ec_segment_size + test_body = ('asdf' * segment_size)[:-10] + etag = md5(test_body).hexdigest() + size = len(test_body) + codes = [201] * self.replicas() + expect_headers = { + 'X-Obj-Metadata-Footer': 'yes', + 'X-Obj-Multiphase-Commit': 'yes' + } + + def do_test(footers_to_add, expect_added): + put_requests = defaultdict( + lambda: {'boundary': None, 'chunks': []}) + + def capture_body(conn, chunk): + put_requests[conn.connection_id]['chunks'].append(chunk) + + def capture_headers(ip, port, device, part, method, path, headers, + **kwargs): + conn_id = kwargs['connection_id'] + put_requests[conn_id]['boundary'] = headers[ + 'X-Backend-Obj-Multipart-Mime-Boundary'] + + def footers_callback(footers): + footers.update(footers_to_add) + env = {'swift.callback.update_footers': footers_callback} + req = swift.common.swob.Request.blank( + '/v1/a/c/o', method='PUT', environ=env, body=test_body) + + with set_http_connect(*codes, expect_headers=expect_headers, + give_send=capture_body, + give_connect=capture_headers): + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 201) + for connection_id, info in put_requests.items(): + body = unchunk_body(''.join(info['chunks'])) + # email.parser.FeedParser doesn't know how to take a multipart + # message and boundary together and parse it; it only knows how + # to take a string, parse the headers, and figure out the + # boundary on its own. + parser = email.parser.FeedParser() + parser.feed( + "Content-Type: multipart/nobodycares; boundary=%s\r\n\r\n" + % info['boundary']) + parser.feed(body) + message = parser.close() + + self.assertTrue(message.is_multipart()) # sanity check + mime_parts = message.get_payload() + self.assertEqual(len(mime_parts), 3) + obj_part, footer_part, commit_part = mime_parts + + # validate EC footer metadata - should always be present + self.assertEqual(footer_part['X-Document'], 'object metadata') + footer_metadata = json.loads(footer_part.get_payload()) + self.assertIsNotNone( + footer_metadata.pop('X-Object-Sysmeta-Ec-Frag-Index')) + expected = { + 'X-Object-Sysmeta-Ec-Scheme': + self.policy.ec_scheme_description, + 'X-Object-Sysmeta-Ec-Content-Length': str(size), + 'X-Object-Sysmeta-Ec-Etag': etag, + 'X-Object-Sysmeta-Ec-Segment-Size': str(segment_size), + 'Etag': md5(obj_part.get_payload()).hexdigest()} + expected.update(expect_added) + for header, value in expected.items(): + self.assertIn(header, footer_metadata) + self.assertEqual(value, footer_metadata[header]) + footer_metadata.pop(header) + self.assertFalse(footer_metadata) + + # sanity check - middleware sets no footer, expect EC overrides + footers_to_add = {} + expect_added = { + 'X-Backend-Container-Update-Override-Size': str(size), + 'X-Backend-Container-Update-Override-Etag': etag} + do_test(footers_to_add, expect_added) + + # middleware cannot overwrite any EC sysmeta + footers_to_add = { + 'X-Object-Sysmeta-Ec-Content-Length': str(size + 1), + 'X-Object-Sysmeta-Ec-Etag': 'other etag', + 'X-Object-Sysmeta-Ec-Segment-Size': str(segment_size + 1), + 'X-Object-Sysmeta-Ec-Unused-But-Reserved': 'ignored'} + do_test(footers_to_add, expect_added) + + # middleware can add x-object-sysmeta- headers including + # x-object-sysmeta-container-update-override headers + footers_to_add = { + 'X-Object-Sysmeta-Foo': 'bar', + 'X-Object-Sysmeta-Container-Update-Override-Size': + str(size + 1), + 'X-Object-Sysmeta-Container-Update-Override-Etag': 'other etag', + 'X-Object-Sysmeta-Container-Update-Override-Ping': 'pong' + } + expect_added.update(footers_to_add) + do_test(footers_to_add, expect_added) + + # middleware can also overwrite x-backend-container-update-override + # headers + override_footers = { + 'X-Backend-Container-Update-Override-Wham': 'bam', + 'X-Backend-Container-Update-Override-Size': str(size + 2), + 'X-Backend-Container-Update-Override-Etag': 'another etag'} + footers_to_add.update(override_footers) + expect_added.update(override_footers) + do_test(footers_to_add, expect_added) + def test_PUT_old_obj_server(self): req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT', body='') diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 6ae48bc605..f43ca5778e 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -2011,7 +2011,7 @@ class TestObjectController(unittest.TestCase): call_count[0] += 1 commit_confirmation = \ - 'swift.proxy.controllers.obj.ECPutter.send_commit_confirmation' + 'swift.proxy.controllers.obj.MIMEPutter.send_commit_confirmation' with mock.patch('swift.obj.server.md5', busted_md5_constructor), \ mock.patch(commit_confirmation, mock_committer): @@ -2062,7 +2062,7 @@ class TestObjectController(unittest.TestCase): read_footer = \ 'swift.obj.server.ObjectController._read_metadata_footer' commit_confirmation = \ - 'swift.proxy.controllers.obj.ECPutter.send_commit_confirmation' + 'swift.proxy.controllers.obj.MIMEPutter.send_commit_confirmation' with mock.patch(read_footer) as read_footer_call, \ mock.patch(commit_confirmation, mock_committer): From fa7d80029b53391a7877aeb6438c98a45bab42a7 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Mon, 6 Jun 2016 18:16:11 +0100 Subject: [PATCH 4/7] Make container update override headers persistent Whatever container update override etag is sent to the object server with a PUT must be used in container updates for subsequent POSTs. Unfortunately the current container update override headers (x-backend-container-update-override-*) are not persisted with the object metadata so are not available when handling a POST. For EC there is an ugly hack in the object server to use the x-object-sysmeta-ec-[etag,size] values when doing a container update for a POST. With crypto, the encryption middleware needs to override the etag (possibly overriding the already overridden EC etag value) with an encrypted etag value. We therefore have a similar problem that this override value is not persisted at the object server. This patch introduces a new namespace for container override headers, x-object-sysmeta-container-update-override-*, which uses object sysmeta so that override values are persisted. This allows a general mechanism in the object server to apply the override values (if any have been set) from object sysmeta when constructing a container update for a PUT or a POST. Middleware should use the x-object-sysmeta-container-update-override-* namespace when setting container update overrides. Middleware should be aware that other middleware may have already set container override headers, in which case consideration should be given to whether any existing value should take precedence. For backwards compatibility the existing x-backend-container-update-override-* style headers are still supported in the object server for EC override values, and the ugly hack for EC etag/size override in POST updates remains in the object server. That allows an older proxy server to be used with an upgraded object server. The proxy server continues to use the x-backend-container-update-override-* style headers for EC values so that an older object server will continue to work with an upgraded proxy server. x-object-sysmeta-container-update-override-* headers take precedence over x-backend-container-update-override-* headers and the use of x-backend-container-update-override-* headers by middleware is deprecated. Existing third party middleware that is using x-backend-container-update-override-* headers should be modified to use x-object-sysmeta-container-update-override-* headers in order to be compatible with other middleware such as encryption and to ensure that container updates during POST requests carry correct values. If targeting multiple versions of Swift object servers it may be necessary to send headers from both namespaces. However, in general it is recommended to upgrade all backend servers, then upgrade proxy servers before finally upgrading third party middleware. Co-Authored-By: Tim Burke UpgradeImpact Change-Id: Ib80b4db57dfc2d37ea8ed3745084a3981d082784 --- swift/common/middleware/copy.py | 21 ++- swift/obj/server.py | 31 +++- swift/proxy/controllers/obj.py | 5 + test/probe/test_object_async_update.py | 104 +++++++++--- test/unit/common/middleware/helpers.py | 2 + test/unit/common/middleware/test_copy.py | 188 +++++++++++++++++++++- test/unit/obj/test_server.py | 196 ++++++++++++++++++----- 7 files changed, 471 insertions(+), 76 deletions(-) diff --git a/swift/common/middleware/copy.py b/swift/common/middleware/copy.py index b446b1b7b3..a5fc44ca2d 100644 --- a/swift/common/middleware/copy.py +++ b/swift/common/middleware/copy.py @@ -142,7 +142,7 @@ from swift.common.utils import get_logger, \ from swift.common.swob import Request, HTTPPreconditionFailed, \ HTTPRequestEntityTooLarge, HTTPBadRequest from swift.common.http import HTTP_MULTIPLE_CHOICES, HTTP_CREATED, \ - is_success + is_success, HTTP_OK from swift.common.constraints import check_account_format, MAX_FILE_SIZE from swift.common.request_helpers import copy_header_subset, remove_items, \ is_sys_meta, is_sys_or_user_meta @@ -474,7 +474,24 @@ class ServerSideCopyMiddleware(object): # Set data source, content length and etag for the PUT request sink_req.environ['wsgi.input'] = FileLikeIter(source_resp.app_iter) sink_req.content_length = source_resp.content_length - sink_req.etag = source_resp.etag + if (source_resp.status_int == HTTP_OK and + 'X-Static-Large-Object' not in source_resp.headers and + ('X-Object-Manifest' not in source_resp.headers or + req.params.get('multipart-manifest') == 'get')): + # copy source etag so that copied content is verified, unless: + # - not a 200 OK response: source etag may not match the actual + # content, for example with a 206 Partial Content response to a + # ranged request + # - SLO manifest: etag cannot be specified in manifest PUT; SLO + # generates its own etag value which may differ from source + # - SLO: etag in SLO response is not hash of actual content + # - DLO: etag in DLO response is not hash of actual content + sink_req.headers['Etag'] = source_resp.etag + else: + # since we're not copying the source etag, make sure that any + # container update override values are not copied. + remove_items(source_resp.headers, lambda k: k.startswith( + 'X-Object-Sysmeta-Container-Update-Override-')) # We no longer need these headers sink_req.headers.pop('X-Copy-From', None) diff --git a/swift/obj/server.py b/swift/obj/server.py index 99083800eb..7193b73e70 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -447,11 +447,32 @@ class ObjectController(BaseStorageServer): raise HTTPBadRequest("invalid JSON for footer doc") def _check_container_override(self, update_headers, metadata): - for key, val in metadata.items(): - override_prefix = 'x-backend-container-update-override-' - if key.lower().startswith(override_prefix): - override = key.lower().replace(override_prefix, 'x-') - update_headers[override] = val + """ + Applies any overrides to the container update headers. + + Overrides may be in the x-object-sysmeta-container-update- namespace or + the x-backend-container-update-override- namespace. The former is + preferred and is used by proxy middlewares. The latter is historical + but is still used with EC policy PUT requests; for backwards + compatibility the header names used with EC policy requests have not + been changed to the sysmeta namespace - that way the EC PUT path of a + newer proxy will remain compatible with an object server that pre-dates + the introduction of the x-object-sysmeta-container-update- namespace + and vice-versa. + + :param update_headers: a dict of headers used in the container update + :param metadata: a dict that may container override items + """ + # the order of this list is significant: + # x-object-sysmeta-container-update-override-* headers take precedence + # over x-backend-container-update-override-* headers + override_prefixes = ['x-backend-container-update-override-', + 'x-object-sysmeta-container-update-override-'] + for override_prefix in override_prefixes: + for key, val in metadata.items(): + if key.lower().startswith(override_prefix): + override = key.lower().replace(override_prefix, 'x-') + update_headers[override] = val def _preserve_slo_manifest(self, update_metadata, orig_metadata): if 'X-Static-Large-Object' in orig_metadata: diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index af6b9368d7..962cf1bec6 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -1818,6 +1818,11 @@ def trailing_metadata(policy, client_obj_hasher, 'X-Object-Sysmeta-EC-Etag': client_obj_hasher.hexdigest(), 'X-Object-Sysmeta-EC-Content-Length': str(bytes_transferred_from_client), + # older style x-backend-container-update-override-* headers are used + # here (rather than x-object-sysmeta-container-update-override-* + # headers) for backwards compatibility: the request may be to an object + # server that has not yet been upgraded to accept the newer style + # x-object-sysmeta-container-update-override- headers. 'X-Backend-Container-Update-Override-Etag': client_obj_hasher.hexdigest(), 'X-Backend-Container-Update-Override-Size': diff --git a/test/probe/test_object_async_update.py b/test/probe/test_object_async_update.py index b831bbeb72..bab7286424 100755 --- a/test/probe/test_object_async_update.py +++ b/test/probe/test_object_async_update.py @@ -62,7 +62,7 @@ class TestObjectAsyncUpdate(ReplProbeTest): class TestUpdateOverrides(ReplProbeTest): """ Use an internal client to PUT an object to proxy server, - bypassing gatekeeper so that X-Backend- headers can be included. + bypassing gatekeeper so that X-Object-Sysmeta- headers can be included. Verify that the update override headers take effect and override values propagate to the container server. """ @@ -71,10 +71,10 @@ class TestUpdateOverrides(ReplProbeTest): int_client = self.make_internal_client() headers = { 'Content-Type': 'text/plain', - 'X-Backend-Container-Update-Override-Etag': 'override-etag', - 'X-Backend-Container-Update-Override-Content-Type': + 'X-Object-Sysmeta-Container-Update-Override-Etag': 'override-etag', + 'X-Object-Sysmeta-Container-Update-Override-Content-Type': 'override-type', - 'X-Backend-Container-Update-Override-Size': '1999' + 'X-Object-Sysmeta-Container-Update-Override-Size': '1999' } client.put_container(self.url, self.token, 'c1', headers={'X-Storage-Policy': @@ -117,7 +117,8 @@ class TestUpdateOverridesEC(ECProbeTest): # an async update to it kill_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server) content = u'stuff' - client.put_object(self.url, self.token, 'c1', 'o1', contents=content) + client.put_object(self.url, self.token, 'c1', 'o1', contents=content, + content_type='test/ctype') meta = client.head_object(self.url, self.token, 'c1', 'o1') # re-start the container server and assert that it does not yet know @@ -129,11 +130,26 @@ class TestUpdateOverridesEC(ECProbeTest): # Run the object-updaters to be sure updates are done Manager(['object-updater']).once() - # check the re-started container server has update with override values - obj = direct_client.direct_get_container( - cnodes[0], cpart, self.account, 'c1')[1][0] - self.assertEqual(meta['etag'], obj['hash']) - self.assertEqual(len(content), obj['bytes']) + # check the re-started container server got same update as others. + # we cannot assert the actual etag value because it may be encrypted + listing_etags = set() + for cnode in cnodes: + listing = direct_client.direct_get_container( + cnode, cpart, self.account, 'c1')[1] + self.assertEqual(1, len(listing)) + self.assertEqual(len(content), listing[0]['bytes']) + self.assertEqual('test/ctype', listing[0]['content_type']) + listing_etags.add(listing[0]['hash']) + self.assertEqual(1, len(listing_etags)) + + # check that listing meta returned to client is consistent with object + # meta returned to client + hdrs, listing = client.get_container(self.url, self.token, 'c1') + self.assertEqual(1, len(listing)) + self.assertEqual('o1', listing[0]['name']) + self.assertEqual(len(content), listing[0]['bytes']) + self.assertEqual(meta['etag'], listing[0]['hash']) + self.assertEqual('test/ctype', listing[0]['content_type']) def test_update_during_POST_only(self): # verify correct update values when PUT update is missed but then a @@ -147,7 +163,8 @@ class TestUpdateOverridesEC(ECProbeTest): # an async update to it kill_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server) content = u'stuff' - client.put_object(self.url, self.token, 'c1', 'o1', contents=content) + client.put_object(self.url, self.token, 'c1', 'o1', contents=content, + content_type='test/ctype') meta = client.head_object(self.url, self.token, 'c1', 'o1') # re-start the container server and assert that it does not yet know @@ -165,20 +182,39 @@ class TestUpdateOverridesEC(ECProbeTest): int_client.get_object_metadata(self.account, 'c1', 'o1') ['x-object-meta-fruit']) # sanity - # check the re-started container server has update with override values - obj = direct_client.direct_get_container( - cnodes[0], cpart, self.account, 'c1')[1][0] - self.assertEqual(meta['etag'], obj['hash']) - self.assertEqual(len(content), obj['bytes']) + # check the re-started container server got same update as others. + # we cannot assert the actual etag value because it may be encrypted + listing_etags = set() + for cnode in cnodes: + listing = direct_client.direct_get_container( + cnode, cpart, self.account, 'c1')[1] + self.assertEqual(1, len(listing)) + self.assertEqual(len(content), listing[0]['bytes']) + self.assertEqual('test/ctype', listing[0]['content_type']) + listing_etags.add(listing[0]['hash']) + self.assertEqual(1, len(listing_etags)) + + # check that listing meta returned to client is consistent with object + # meta returned to client + hdrs, listing = client.get_container(self.url, self.token, 'c1') + self.assertEqual(1, len(listing)) + self.assertEqual('o1', listing[0]['name']) + self.assertEqual(len(content), listing[0]['bytes']) + self.assertEqual(meta['etag'], listing[0]['hash']) + self.assertEqual('test/ctype', listing[0]['content_type']) # Run the object-updaters to send the async pending from the PUT Manager(['object-updater']).once() # check container listing metadata is still correct - obj = direct_client.direct_get_container( - cnodes[0], cpart, self.account, 'c1')[1][0] - self.assertEqual(meta['etag'], obj['hash']) - self.assertEqual(len(content), obj['bytes']) + for cnode in cnodes: + listing = direct_client.direct_get_container( + cnode, cpart, self.account, 'c1')[1] + self.assertEqual(1, len(listing)) + self.assertEqual(len(content), listing[0]['bytes']) + self.assertEqual('test/ctype', listing[0]['content_type']) + listing_etags.add(listing[0]['hash']) + self.assertEqual(1, len(listing_etags)) def test_async_updates_after_PUT_and_POST(self): # verify correct update values when PUT update and POST updates are @@ -192,7 +228,8 @@ class TestUpdateOverridesEC(ECProbeTest): # we force async updates to it kill_server((cnodes[0]['ip'], cnodes[0]['port']), self.ipport2server) content = u'stuff' - client.put_object(self.url, self.token, 'c1', 'o1', contents=content) + client.put_object(self.url, self.token, 'c1', 'o1', contents=content, + content_type='test/ctype') meta = client.head_object(self.url, self.token, 'c1', 'o1') # use internal client for POST so we can force fast-post mode @@ -213,11 +250,26 @@ class TestUpdateOverridesEC(ECProbeTest): # Run the object-updaters to send the async pendings Manager(['object-updater']).once() - # check container listing metadata is still correct - obj = direct_client.direct_get_container( - cnodes[0], cpart, self.account, 'c1')[1][0] - self.assertEqual(meta['etag'], obj['hash']) - self.assertEqual(len(content), obj['bytes']) + # check the re-started container server got same update as others. + # we cannot assert the actual etag value because it may be encrypted + listing_etags = set() + for cnode in cnodes: + listing = direct_client.direct_get_container( + cnode, cpart, self.account, 'c1')[1] + self.assertEqual(1, len(listing)) + self.assertEqual(len(content), listing[0]['bytes']) + self.assertEqual('test/ctype', listing[0]['content_type']) + listing_etags.add(listing[0]['hash']) + self.assertEqual(1, len(listing_etags)) + + # check that listing meta returned to client is consistent with object + # meta returned to client + hdrs, listing = client.get_container(self.url, self.token, 'c1') + self.assertEqual(1, len(listing)) + self.assertEqual('o1', listing[0]['name']) + self.assertEqual(len(content), listing[0]['bytes']) + self.assertEqual(meta['etag'], listing[0]['hash']) + self.assertEqual('test/ctype', listing[0]['content_type']) if __name__ == '__main__': diff --git a/test/unit/common/middleware/helpers.py b/test/unit/common/middleware/helpers.py index 8b8fff3b3d..c295ee4768 100644 --- a/test/unit/common/middleware/helpers.py +++ b/test/unit/common/middleware/helpers.py @@ -128,6 +128,8 @@ class FakeSwift(object): if "CONTENT_TYPE" in env: self.uploaded[path][0]['Content-Type'] = env["CONTENT_TYPE"] + # note: tests may assume this copy of req_headers is case insensitive + # so we deliberately use a HeaderKeyDict self._calls.append((method, path, HeaderKeyDict(req_headers))) # range requests ought to work, hence conditional_response=True diff --git a/test/unit/common/middleware/test_copy.py b/test/unit/common/middleware/test_copy.py index 254203e630..3f024d4395 100644 --- a/test/unit/common/middleware/test_copy.py +++ b/test/unit/common/middleware/test_copy.py @@ -20,6 +20,7 @@ import shutil import tempfile import unittest from hashlib import md5 +from six.moves import urllib from textwrap import dedent from swift.common import swob @@ -224,9 +225,10 @@ class TestServerSideCopyMiddleware(unittest.TestCase): self.assertEqual('PUT', self.authorized[1].method) self.assertEqual('/v1/a/c/o2', self.authorized[1].path) - def test_static_large_object(self): + def test_static_large_object_manifest(self): self.app.register('GET', '/v1/a/c/o', swob.HTTPOk, - {'X-Static-Large-Object': 'True'}, 'passed') + {'X-Static-Large-Object': 'True', + 'Etag': 'should not be sent'}, 'passed') self.app.register('PUT', '/v1/a/c/o2?multipart-manifest=put', swob.HTTPCreated, {}) req = Request.blank('/v1/a/c/o2?multipart-manifest=get', @@ -236,11 +238,43 @@ class TestServerSideCopyMiddleware(unittest.TestCase): status, headers, body = self.call_ssc(req) self.assertEqual(status, '201 Created') self.assertTrue(('X-Copied-From', 'c/o') in headers) - calls = self.app.calls_with_headers - method, path, req_headers = calls[1] - self.assertEqual('PUT', method) - self.assertEqual('/v1/a/c/o2?multipart-manifest=put', path) + self.assertEqual(2, len(self.app.calls)) + self.assertEqual('GET', self.app.calls[0][0]) + get_path, qs = self.app.calls[0][1].split('?') + params = urllib.parse.parse_qs(qs) + self.assertDictEqual( + {'format': ['raw'], 'multipart-manifest': ['get']}, params) + self.assertEqual(get_path, '/v1/a/c/o') + self.assertEqual(self.app.calls[1], + ('PUT', '/v1/a/c/o2?multipart-manifest=put')) + req_headers = self.app.headers[1] self.assertNotIn('X-Static-Large-Object', req_headers) + self.assertNotIn('Etag', req_headers) + self.assertEqual(len(self.authorized), 2) + self.assertEqual('GET', self.authorized[0].method) + self.assertEqual('/v1/a/c/o', self.authorized[0].path) + self.assertEqual('PUT', self.authorized[1].method) + self.assertEqual('/v1/a/c/o2', self.authorized[1].path) + + def test_static_large_object(self): + self.app.register('GET', '/v1/a/c/o', swob.HTTPOk, + {'X-Static-Large-Object': 'True', + 'Etag': 'should not be sent'}, 'passed') + self.app.register('PUT', '/v1/a/c/o2', + swob.HTTPCreated, {}) + req = Request.blank('/v1/a/c/o2', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Content-Length': '0', + 'X-Copy-From': 'c/o'}) + status, headers, body = self.call_ssc(req) + self.assertEqual(status, '201 Created') + self.assertTrue(('X-Copied-From', 'c/o') in headers) + self.assertEqual(self.app.calls, [ + ('GET', '/v1/a/c/o'), + ('PUT', '/v1/a/c/o2')]) + req_headers = self.app.headers[1] + self.assertNotIn('X-Static-Large-Object', req_headers) + self.assertNotIn('Etag', req_headers) self.assertEqual(len(self.authorized), 2) self.assertEqual('GET', self.authorized[0].method) self.assertEqual('/v1/a/c/o', self.authorized[0].path) @@ -587,7 +621,8 @@ class TestServerSideCopyMiddleware(unittest.TestCase): self.assertEqual('/v1/a/c/o', self.authorized[0].path) def test_basic_COPY(self): - self.app.register('GET', '/v1/a/c/o', swob.HTTPOk, {}, 'passed') + self.app.register('GET', '/v1/a/c/o', swob.HTTPOk, { + 'etag': 'is sent'}, 'passed') self.app.register('PUT', '/v1/a/c/o-copy', swob.HTTPCreated, {}) req = Request.blank( '/v1/a/c/o', method='COPY', @@ -601,6 +636,145 @@ class TestServerSideCopyMiddleware(unittest.TestCase): self.assertEqual('/v1/a/c/o', self.authorized[0].path) self.assertEqual('PUT', self.authorized[1].method) self.assertEqual('/v1/a/c/o-copy', self.authorized[1].path) + self.assertEqual(self.app.calls, [ + ('GET', '/v1/a/c/o'), + ('PUT', '/v1/a/c/o-copy')]) + self.assertIn('etag', self.app.headers[1]) + self.assertEqual(self.app.headers[1]['etag'], 'is sent') + + def test_basic_DLO(self): + self.app.register('GET', '/v1/a/c/o', swob.HTTPOk, { + 'x-object-manifest': 'some/path', + 'etag': 'is not sent'}, 'passed') + self.app.register('PUT', '/v1/a/c/o-copy', swob.HTTPCreated, {}) + req = Request.blank( + '/v1/a/c/o', method='COPY', + headers={'Content-Length': 0, + 'Destination': 'c/o-copy'}) + status, headers, body = self.call_ssc(req) + self.assertEqual(status, '201 Created') + self.assertTrue(('X-Copied-From', 'c/o') in headers) + self.assertEqual(self.app.calls, [ + ('GET', '/v1/a/c/o'), + ('PUT', '/v1/a/c/o-copy')]) + self.assertNotIn('x-object-manifest', self.app.headers[1]) + self.assertNotIn('etag', self.app.headers[1]) + + def test_basic_DLO_manifest(self): + self.app.register('GET', '/v1/a/c/o', swob.HTTPOk, { + 'x-object-manifest': 'some/path', + 'etag': 'is sent'}, 'passed') + self.app.register('PUT', '/v1/a/c/o-copy', swob.HTTPCreated, {}) + req = Request.blank( + '/v1/a/c/o?multipart-manifest=get', method='COPY', + headers={'Content-Length': 0, + 'Destination': 'c/o-copy'}) + status, headers, body = self.call_ssc(req) + self.assertEqual(status, '201 Created') + self.assertTrue(('X-Copied-From', 'c/o') in headers) + self.assertEqual(2, len(self.app.calls)) + self.assertEqual('GET', self.app.calls[0][0]) + get_path, qs = self.app.calls[0][1].split('?') + params = urllib.parse.parse_qs(qs) + self.assertDictEqual( + {'format': ['raw'], 'multipart-manifest': ['get']}, params) + self.assertEqual(get_path, '/v1/a/c/o') + self.assertEqual(self.app.calls[1], ('PUT', '/v1/a/c/o-copy')) + self.assertIn('x-object-manifest', self.app.headers[1]) + self.assertEqual(self.app.headers[1]['x-object-manifest'], 'some/path') + self.assertIn('etag', self.app.headers[1]) + self.assertEqual(self.app.headers[1]['etag'], 'is sent') + + def test_COPY_source_metadata(self): + source_headers = { + 'x-object-sysmeta-test1': 'copy me', + 'x-object-meta-test2': 'copy me too', + 'x-object-sysmeta-container-update-override-etag': 'etag val', + 'x-object-sysmeta-container-update-override-size': 'size val', + 'x-object-sysmeta-container-update-override-foo': 'bar'} + + get_resp_headers = source_headers.copy() + get_resp_headers['etag'] = 'source etag' + self.app.register( + 'GET', '/v1/a/c/o', swob.HTTPOk, + headers=get_resp_headers, body='passed') + + def verify_headers(expected_headers, unexpected_headers, + actual_headers): + for k, v in actual_headers: + if k.lower() in expected_headers: + expected_val = expected_headers.pop(k.lower()) + self.assertEqual(expected_val, v) + self.assertNotIn(k.lower(), unexpected_headers) + self.assertFalse(expected_headers) + + # use a COPY request + self.app.register('PUT', '/v1/a/c/o-copy0', swob.HTTPCreated, {}) + req = Request.blank('/v1/a/c/o', method='COPY', + headers={'Content-Length': 0, + 'Destination': 'c/o-copy0'}) + status, headers, body = self.call_ssc(req) + self.assertEqual('201 Created', status) + verify_headers(source_headers.copy(), [], headers) + method, path, headers = self.app.calls_with_headers[-1] + self.assertEqual('PUT', method) + self.assertEqual('/v1/a/c/o-copy0', path) + verify_headers(source_headers.copy(), [], headers.items()) + self.assertIn('etag', headers) + self.assertEqual(headers['etag'], 'source etag') + + req = Request.blank('/v1/a/c/o-copy0', method='GET') + status, headers, body = self.call_ssc(req) + self.assertEqual('200 OK', status) + verify_headers(source_headers.copy(), [], headers) + + # use a COPY request with a Range header + self.app.register('PUT', '/v1/a/c/o-copy1', swob.HTTPCreated, {}) + req = Request.blank('/v1/a/c/o', method='COPY', + headers={'Content-Length': 0, + 'Destination': 'c/o-copy1', + 'Range': 'bytes=1-2'}) + status, headers, body = self.call_ssc(req) + expected_headers = source_headers.copy() + unexpected_headers = ( + 'x-object-sysmeta-container-update-override-etag', + 'x-object-sysmeta-container-update-override-size', + 'x-object-sysmeta-container-update-override-foo') + for h in unexpected_headers: + expected_headers.pop(h) + self.assertEqual('201 Created', status) + verify_headers(expected_headers, unexpected_headers, headers) + method, path, headers = self.app.calls_with_headers[-1] + self.assertEqual('PUT', method) + self.assertEqual('/v1/a/c/o-copy1', path) + verify_headers(expected_headers, unexpected_headers, headers.items()) + # etag should not be copied with a Range request + self.assertNotIn('etag', headers) + + req = Request.blank('/v1/a/c/o-copy1', method='GET') + status, headers, body = self.call_ssc(req) + self.assertEqual('200 OK', status) + verify_headers(expected_headers, unexpected_headers, headers) + + # use a PUT with x-copy-from + self.app.register('PUT', '/v1/a/c/o-copy2', swob.HTTPCreated, {}) + req = Request.blank('/v1/a/c/o-copy2', method='PUT', + headers={'Content-Length': 0, + 'X-Copy-From': 'c/o'}) + status, headers, body = self.call_ssc(req) + self.assertEqual('201 Created', status) + verify_headers(source_headers.copy(), [], headers) + method, path, headers = self.app.calls_with_headers[-1] + self.assertEqual('PUT', method) + self.assertEqual('/v1/a/c/o-copy2', path) + verify_headers(source_headers.copy(), [], headers.items()) + self.assertIn('etag', headers) + self.assertEqual(headers['etag'], 'source etag') + + req = Request.blank('/v1/a/c/o-copy2', method='GET') + status, headers, body = self.call_ssc(req) + self.assertEqual('200 OK', status) + verify_headers(source_headers.copy(), [], headers) def test_COPY_no_destination_header(self): req = Request.blank( diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index b85230f395..a40d75c5a2 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -710,6 +710,102 @@ class TestObjectController(unittest.TestCase): self._test_POST_container_updates( POLICIES[1], update_etag='override_etag') + def test_POST_container_updates_precedence(self): + # Verify correct etag and size being sent with container updates for a + # PUT and for a subsequent POST. + ts_iter = make_timestamp_iter() + + def do_test(body, headers, policy): + def mock_container_update(ctlr, op, account, container, obj, req, + headers_out, objdevice, policy): + calls_made.append((headers_out, policy)) + calls_made = [] + ts_put = next(ts_iter) + + # make PUT with given headers and verify correct etag is sent in + # container update + headers.update({ + 'Content-Type': + 'application/octet-stream;swift_bytes=123456789', + 'X-Backend-Storage-Policy-Index': int(policy), + 'X-Object-Sysmeta-Ec-Frag-Index': 2, + 'X-Timestamp': ts_put.internal, + 'Content-Length': len(body)}) + + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'PUT'}, + headers=headers, body=body) + + with mock.patch( + 'swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 201) + self.assertEqual(1, len(calls_made)) + expected_headers = HeaderKeyDict({ + 'x-size': '4', + 'x-content-type': + 'application/octet-stream;swift_bytes=123456789', + 'x-timestamp': ts_put.internal, + 'x-etag': 'expected'}) + self.assertDictEqual(expected_headers, calls_made[0][0]) + self.assertEqual(policy, calls_made[0][1]) + + # make a POST and verify container update has the same etag + calls_made = [] + ts_post = next(ts_iter) + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'POST'}, + headers={'X-Timestamp': ts_post.internal, + 'X-Backend-Storage-Policy-Index': int(policy)}) + + with mock.patch( + 'swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 202) + self.assertEqual(1, len(calls_made)) + expected_headers.update({ + 'x-content-type-timestamp': ts_put.internal, + 'x-meta-timestamp': ts_post.internal}) + self.assertDictEqual(expected_headers, calls_made[0][0]) + self.assertEqual(policy, calls_made[0][1]) + + # sanity check - EC headers are ok + headers = { + 'X-Backend-Container-Update-Override-Etag': 'expected', + 'X-Backend-Container-Update-Override-Size': '4', + 'X-Object-Sysmeta-Ec-Etag': 'expected', + 'X-Object-Sysmeta-Ec-Content-Length': '4'} + do_test('test ec frag longer than 4', headers, POLICIES[1]) + + # middleware overrides take precedence over EC/older overrides + headers = { + 'X-Backend-Container-Update-Override-Etag': 'unexpected', + 'X-Backend-Container-Update-Override-Size': '3', + 'X-Object-Sysmeta-Ec-Etag': 'unexpected', + 'X-Object-Sysmeta-Ec-Content-Length': '3', + 'X-Object-Sysmeta-Container-Update-Override-Etag': 'expected', + 'X-Object-Sysmeta-Container-Update-Override-Size': '4'} + do_test('test ec frag longer than 4', headers, POLICIES[1]) + + # overrides with replication policy + headers = { + 'X-Object-Sysmeta-Container-Update-Override-Etag': 'expected', + 'X-Object-Sysmeta-Container-Update-Override-Size': '4'} + do_test('longer than 4', headers, POLICIES[0]) + + # middleware overrides take precedence over EC/older overrides with + # replication policy + headers = { + 'X-Backend-Container-Update-Override-Etag': 'unexpected', + 'X-Backend-Container-Update-Override-Size': '3', + 'X-Object-Sysmeta-Container-Update-Override-Etag': 'expected', + 'X-Object-Sysmeta-Container-Update-Override-Size': '4'} + do_test('longer than 4', headers, POLICIES[0]) + def _test_PUT_then_POST_async_pendings(self, policy, update_etag=None): # Test that PUT and POST requests result in distinct async pending # files when sync container update fails. @@ -4310,47 +4406,75 @@ class TestObjectController(unittest.TestCase): 'x-trans-id': '123', 'referer': 'PUT http://localhost/sda1/0/a/c/o'})) - def test_container_update_overrides(self): - container_updates = [] + def test_PUT_container_update_overrides(self): + ts_iter = make_timestamp_iter() - def capture_updates(ip, port, method, path, headers, *args, **kwargs): - container_updates.append((ip, port, method, path, headers)) + def do_test(override_headers): + container_updates = [] - headers = { - 'X-Timestamp': 1, - 'X-Trans-Id': '123', - 'X-Container-Host': 'chost:cport', - 'X-Container-Partition': 'cpartition', - 'X-Container-Device': 'cdevice', - 'Content-Type': 'text/plain', + def capture_updates( + ip, port, method, path, headers, *args, **kwargs): + container_updates.append((ip, port, method, path, headers)) + + ts_put = next(ts_iter) + headers = { + 'X-Timestamp': ts_put.internal, + 'X-Trans-Id': '123', + 'X-Container-Host': 'chost:cport', + 'X-Container-Partition': 'cpartition', + 'X-Container-Device': 'cdevice', + 'Content-Type': 'text/plain', + } + headers.update(override_headers) + req = Request.blank('/sda1/0/a/c/o', method='PUT', + headers=headers, body='') + with mocked_http_conn( + 200, give_connect=capture_updates) as fake_conn: + with fake_spawn(): + resp = req.get_response(self.object_controller) + self.assertRaises(StopIteration, fake_conn.code_iter.next) + self.assertEqual(resp.status_int, 201) + self.assertEqual(len(container_updates), 1) + ip, port, method, path, headers = container_updates[0] + self.assertEqual(ip, 'chost') + self.assertEqual(port, 'cport') + self.assertEqual(method, 'PUT') + self.assertEqual(path, '/cdevice/cpartition/a/c/o') + self.assertEqual(headers, HeaderKeyDict({ + 'user-agent': 'object-server %s' % os.getpid(), + 'x-size': '0', + 'x-etag': 'override_etag', + 'x-content-type': 'override_val', + 'x-timestamp': ts_put.internal, + 'X-Backend-Storage-Policy-Index': '0', # default + 'x-trans-id': '123', + 'referer': 'PUT http://localhost/sda1/0/a/c/o', + 'x-foo': 'bar'})) + + # EC policy override headers + do_test({ 'X-Backend-Container-Update-Override-Etag': 'override_etag', 'X-Backend-Container-Update-Override-Content-Type': 'override_val', 'X-Backend-Container-Update-Override-Foo': 'bar', - 'X-Backend-Container-Ignored': 'ignored' - } - req = Request.blank('/sda1/0/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, - headers=headers, body='') - with mocked_http_conn(200, give_connect=capture_updates) as fake_conn: - with fake_spawn(): - resp = req.get_response(self.object_controller) - self.assertRaises(StopIteration, fake_conn.code_iter.next) - self.assertEqual(resp.status_int, 201) - self.assertEqual(len(container_updates), 1) - ip, port, method, path, headers = container_updates[0] - self.assertEqual(ip, 'chost') - self.assertEqual(port, 'cport') - self.assertEqual(method, 'PUT') - self.assertEqual(path, '/cdevice/cpartition/a/c/o') - self.assertEqual(headers, HeaderKeyDict({ - 'user-agent': 'object-server %s' % os.getpid(), - 'x-size': '0', - 'x-etag': 'override_etag', - 'x-content-type': 'override_val', - 'x-timestamp': utils.Timestamp(1).internal, - 'X-Backend-Storage-Policy-Index': '0', # default when not given - 'x-trans-id': '123', - 'referer': 'PUT http://localhost/sda1/0/a/c/o', - 'x-foo': 'bar'})) + 'X-Backend-Container-Ignored': 'ignored'}) + + # middleware override headers + do_test({ + 'X-Object-Sysmeta-Container-Update-Override-Etag': 'override_etag', + 'X-Object-Sysmeta-Container-Update-Override-Content-Type': + 'override_val', + 'X-Object-Sysmeta-Container-Update-Override-Foo': 'bar', + 'X-Object-Sysmeta-Ignored': 'ignored'}) + + # middleware override headers take precedence over EC policy headers + do_test({ + 'X-Object-Sysmeta-Container-Update-Override-Etag': 'override_etag', + 'X-Object-Sysmeta-Container-Update-Override-Content-Type': + 'override_val', + 'X-Object-Sysmeta-Container-Update-Override-Foo': 'bar', + 'X-Backend-Container-Update-Override-Etag': 'ignored', + 'X-Backend-Container-Update-Override-Content-Type': 'ignored', + 'X-Backend-Container-Update-Override-Foo': 'ignored'}) def test_container_update_async(self): policy = random.choice(list(POLICIES)) From 3ad003cf51151f8ce6dfc6c2c529206eda5f7b60 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Mon, 6 Jun 2016 18:38:50 +0100 Subject: [PATCH 5/7] Enable middleware to set metadata on object POST Adds a new form of system metadata for objects. Sysmeta cannot be updated by an object POST because that would cause all existing sysmeta to be deleted. Crypto middleware will want to add 'system' metadata to object metadata on PUTs and POSTs, but it is ok for this metadata to be replaced en-masse on every POST. This patch introduces x-object-transient-sysmeta-* that is persisted by object servers and returned in GET and HEAD responses, just like user metadata, without polluting the x-object-meta-* namespace. All headers in this namespace will be filtered inbound and outbound by the gatekeeper, so cannot be set or read by clients. Co-Authored-By: Clay Gerrard Co-Authored-By: Janie Richling Change-Id: I5075493329935ba6790543fc82ea6e039704811d --- doc/source/development_middleware.rst | 67 ++++++++++- swift/common/middleware/copy.py | 30 ++--- swift/common/middleware/gatekeeper.py | 9 +- swift/common/request_helpers.py | 41 +++++++ swift/obj/server.py | 22 ++-- swift/proxy/controllers/base.py | 9 +- .../probe/test_object_metadata_replication.py | 25 ++-- test/unit/common/middleware/helpers.py | 33 ++++-- test/unit/common/middleware/test_copy.py | 66 +++++++---- .../unit/common/middleware/test_gatekeeper.py | 9 +- test/unit/common/test_request_helpers.py | 12 +- test/unit/obj/test_diskfile.py | 5 + test/unit/obj/test_server.py | 81 ++++++++++++- test/unit/proxy/controllers/test_base.py | 12 +- test/unit/proxy/test_server.py | 2 +- test/unit/proxy/test_sysmeta.py | 107 ++++++++++++++++++ 16 files changed, 450 insertions(+), 80 deletions(-) diff --git a/doc/source/development_middleware.rst b/doc/source/development_middleware.rst index 14bfcddb5b..b6dac83289 100644 --- a/doc/source/development_middleware.rst +++ b/doc/source/development_middleware.rst @@ -200,6 +200,8 @@ core swift features which predate sysmeta have added exceptions for custom non-user metadata headers (e.g. :ref:`acls`, :ref:`large-objects`) +.. _usermeta: + ^^^^^^^^^^^^^ User Metadata ^^^^^^^^^^^^^ @@ -209,7 +211,7 @@ User metadata takes the form of ``X--Meta-: ``, where and ```` and ```` are set by the client. User metadata should generally be reserved for use by the client or -client applications. An perfect example use-case for user metadata is +client applications. A perfect example use-case for user metadata is `python-swiftclient`_'s ``X-Object-Meta-Mtime`` which it stores on object it uploads to implement its ``--changed`` option which will only upload files that have changed since the last upload. @@ -223,6 +225,20 @@ borrows the user metadata namespace is :ref:`tempurl`. An example of middleware which uses custom non-user metadata to avoid the user metadata namespace is :ref:`slo-doc`. +User metadata that is stored by a PUT or POST request to a container or account +resource persists until it is explicitly removed by a subsequent PUT or POST +request that includes a header ``X--Meta-`` with no value or a +header ``X-Remove--Meta-: ``. In the latter case the +```` is not stored. All user metadata stored with an account or +container resource is deleted when the account or container is deleted. + +User metadata that is stored with an object resource has a different semantic; +object user metadata persists until any subsequent PUT or POST request is made +to the same object, at which point all user metadata stored with that object is +deleted en-masse and replaced with any user metadata included with the PUT or +POST request. As a result, it is not possible to update a subset of the user +metadata items stored with an object while leaving some items unchanged. + .. _sysmeta: ^^^^^^^^^^^^^^^ @@ -237,7 +253,7 @@ Swift WSGI Server. All headers on client requests in the form of ``X--Sysmeta-`` will be dropped from the request before being processed by any middleware. All headers on responses from back-end systems in the form -of ``X--Sysmeta-`` will be removed after all middleware has +of ``X--Sysmeta-`` will be removed after all middlewares have processed the response but before the response is sent to the client. See :ref:`gatekeeper` middleware for more information. @@ -249,3 +265,50 @@ modified directly by client requests, and the outgoing filter ensures that removing middleware that uses a specific system metadata key renders it benign. New middleware should take advantage of system metadata. + +System metadata may be set on accounts and containers by including headers with +a PUT or POST request. Where a header name matches the name of an existing item +of system metadata, the value of the existing item will be updated. Otherwise +existing items are preserved. A system metadata header with an empty value will +cause any existing item with the same name to be deleted. + +System metadata may be set on objects using only PUT requests. All items of +existing system metadata will be deleted and replaced en-masse by any system +metadata headers included with the PUT request. System metadata is neither +updated nor deleted by a POST request: updating individual items of system +metadata with a POST request is not yet supported in the same way that updating +individual items of user metadata is not supported. In cases where middleware +needs to store its own metadata with a POST request, it may use Object Transient +Sysmeta. + +^^^^^^^^^^^^^^^^^^^^^^^^ +Object Transient-Sysmeta +^^^^^^^^^^^^^^^^^^^^^^^^ + +If middleware needs to store object metadata with a POST request it may do so +using headers of the form ``X-Object-Transient-Sysmeta-: ``. + +All headers on client requests in the form of +``X-Object-Transient-Sysmeta-`` will be dropped from the request before +being processed by any middleware. All headers on responses from back-end +systems in the form of ``X-Object-Transient-Sysmeta-`` will be removed +after all middlewares have processed the response but before the response is +sent to the client. See :ref:`gatekeeper` middleware for more information. + +Transient-sysmeta updates on an object have the same semantic as user +metadata updates on an object (see :ref:`usermeta`) i.e. whenever any PUT or +POST request is made to an object, all existing items of transient-sysmeta are +deleted en-masse and replaced with any transient-sysmeta included with the PUT +or POST request. Transient-sysmeta set by a middleware is therefore prone to +deletion by a subsequent client-generated POST request unless the middleware is +careful to include its transient-sysmeta with every POST. Likewise, user +metadata set by a client is prone to deletion by a subsequent +middleware-generated POST request, and for that reason middleware should avoid +generating POST requests that are independent of any client request. + +Transient-sysmeta deliberately uses a different header prefix to user metadata +so that middlewares can avoid potential conflict with user metadata keys. + +Transient-sysmeta deliberately uses a different header prefix to system +metadata to emphasize the fact that the data is only persisted until a +subsequent POST. diff --git a/swift/common/middleware/copy.py b/swift/common/middleware/copy.py index a5fc44ca2d..1daadfe90c 100644 --- a/swift/common/middleware/copy.py +++ b/swift/common/middleware/copy.py @@ -145,7 +145,7 @@ from swift.common.http import HTTP_MULTIPLE_CHOICES, HTTP_CREATED, \ is_success, HTTP_OK from swift.common.constraints import check_account_format, MAX_FILE_SIZE from swift.common.request_helpers import copy_header_subset, remove_items, \ - is_sys_meta, is_sys_or_user_meta + is_sys_meta, is_sys_or_user_meta, is_object_transient_sysmeta from swift.common.wsgi import WSGIContext, make_subrequest @@ -206,16 +206,18 @@ def _check_destination_header(req): '/') -def _copy_headers_into(from_r, to_r): +def _copy_headers(src, dest): """ - Will copy desired headers from from_r to to_r - :params from_r: a swob Request or Response - :params to_r: a swob Request or Response + Will copy desired headers from src to dest. + + :params src: an instance of collections.Mapping + :params dest: an instance of collections.Mapping """ - pass_headers = ['x-delete-at'] - for k, v in from_r.headers.items(): - if is_sys_or_user_meta('object', k) or k.lower() in pass_headers: - to_r.headers[k] = v + for k, v in src.items(): + if (is_sys_or_user_meta('object', k) or + is_object_transient_sysmeta(k) or + k.lower() == 'x-delete-at'): + dest[k] = v class ServerSideCopyWebContext(WSGIContext): @@ -422,9 +424,7 @@ class ServerSideCopyMiddleware(object): source_resp.headers['last-modified'] # Existing sys and user meta of source object is added to response # headers in addition to the new ones. - for k, v in sink_req.headers.items(): - if is_sys_or_user_meta('object', k) or k.lower() == 'x-delete-at': - resp_headers[k] = v + _copy_headers(sink_req.headers, resp_headers) return resp_headers def handle_PUT(self, req, start_response): @@ -511,10 +511,10 @@ class ServerSideCopyMiddleware(object): remove_items(sink_req.headers, condition) copy_header_subset(source_resp, sink_req, condition) else: - # Copy/update existing sysmeta and user meta - _copy_headers_into(source_resp, sink_req) + # Copy/update existing sysmeta, transient-sysmeta and user meta + _copy_headers(source_resp.headers, sink_req.headers) # Copy/update new metadata provided in request if any - _copy_headers_into(req, sink_req) + _copy_headers(req.headers, sink_req.headers) # Create response headers for PUT response resp_headers = self._create_response_headers(source_path, diff --git a/swift/common/middleware/gatekeeper.py b/swift/common/middleware/gatekeeper.py index c5c1066505..e5df5bf44c 100644 --- a/swift/common/middleware/gatekeeper.py +++ b/swift/common/middleware/gatekeeper.py @@ -33,22 +33,25 @@ automatically inserted close to the start of the pipeline by the proxy server. from swift.common.swob import Request from swift.common.utils import get_logger, config_true_value -from swift.common.request_helpers import remove_items, get_sys_meta_prefix +from swift.common.request_helpers import ( + remove_items, get_sys_meta_prefix, OBJECT_TRANSIENT_SYSMETA_PREFIX +) import re #: A list of python regular expressions that will be used to #: match against inbound request headers. Matching headers will #: be removed from the request. # Exclude headers starting with a sysmeta prefix. +# Exclude headers starting with object transient system metadata prefix. +# Exclude headers starting with an internal backend header prefix. # If adding to this list, note that these are regex patterns, # so use a trailing $ to constrain to an exact header match # rather than prefix match. inbound_exclusions = [get_sys_meta_prefix('account'), get_sys_meta_prefix('container'), get_sys_meta_prefix('object'), + OBJECT_TRANSIENT_SYSMETA_PREFIX, 'x-backend'] -# 'x-object-sysmeta' is reserved in anticipation of future support -# for system metadata being applied to objects #: A list of python regular expressions that will be used to diff --git a/swift/common/request_helpers.py b/swift/common/request_helpers.py index 71a32106af..65f21bebce 100644 --- a/swift/common/request_helpers.py +++ b/swift/common/request_helpers.py @@ -44,6 +44,9 @@ from swift.common.utils import split_path, validate_device_partition, \ from swift.common.wsgi import make_subrequest +OBJECT_TRANSIENT_SYSMETA_PREFIX = 'x-object-transient-sysmeta-' + + def get_param(req, name, default=None): """ Get parameters from an HTTP request ensuring proper handling UTF-8 @@ -175,6 +178,19 @@ def is_sys_or_user_meta(server_type, key): return is_user_meta(server_type, key) or is_sys_meta(server_type, key) +def is_object_transient_sysmeta(key): + """ + Tests if a header key starts with and is longer than the prefix for object + transient system metadata. + + :param key: header key + :returns: True if the key satisfies the test, False otherwise + """ + if len(key) <= len(OBJECT_TRANSIENT_SYSMETA_PREFIX): + return False + return key.lower().startswith(OBJECT_TRANSIENT_SYSMETA_PREFIX) + + def strip_user_meta_prefix(server_type, key): """ Removes the user metadata prefix for a given server type from the start @@ -199,6 +215,17 @@ def strip_sys_meta_prefix(server_type, key): return key[len(get_sys_meta_prefix(server_type)):] +def strip_object_transient_sysmeta_prefix(key): + """ + Removes the object transient system metadata prefix from the start of a + header key. + + :param key: header key + :returns: stripped header key + """ + return key[len(OBJECT_TRANSIENT_SYSMETA_PREFIX):] + + def get_user_meta_prefix(server_type): """ Returns the prefix for user metadata headers for given server type. @@ -225,6 +252,20 @@ def get_sys_meta_prefix(server_type): return 'x-%s-%s-' % (server_type.lower(), 'sysmeta') +def get_object_transient_sysmeta(key): + """ + Returns the Object Transient System Metadata header for key. + The Object Transient System Metadata namespace will be persisted by + backend object servers. These headers are treated in the same way as + object user metadata i.e. all headers in this namespace will be + replaced on every POST request. + + :param key: metadata key + :returns: the entire object transient system metadata header for key + """ + return '%s%s' % (OBJECT_TRANSIENT_SYSMETA_PREFIX, key) + + def remove_items(headers, condition): """ Removes items from a dict whose keys satisfy diff --git a/swift/obj/server.py b/swift/obj/server.py index 7193b73e70..1edefb8cd4 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -46,7 +46,8 @@ from swift.common.http import is_success from swift.common.base_storage_server import BaseStorageServer from swift.common.header_key_dict import HeaderKeyDict from swift.common.request_helpers import get_name_and_placement, \ - is_user_meta, is_sys_or_user_meta, resolve_etag_is_at_header + is_user_meta, is_sys_or_user_meta, is_object_transient_sysmeta, \ + resolve_etag_is_at_header from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPCreated, \ HTTPInternalServerError, HTTPNoContent, HTTPNotFound, \ HTTPPreconditionFailed, HTTPRequestTimeout, HTTPUnprocessableEntity, \ @@ -520,7 +521,8 @@ class ObjectController(BaseStorageServer): metadata = {'X-Timestamp': req_timestamp.internal} self._preserve_slo_manifest(metadata, orig_metadata) metadata.update(val for val in request.headers.items() - if is_user_meta('object', val[0])) + if (is_user_meta('object', val[0]) or + is_object_transient_sysmeta(val[0]))) headers_to_copy = ( request.headers.get( 'X-Backend-Replication-Headers', '').split() + @@ -767,9 +769,11 @@ class ObjectController(BaseStorageServer): 'Content-Length': str(upload_size), } metadata.update(val for val in request.headers.items() - if is_sys_or_user_meta('object', val[0])) + if (is_sys_or_user_meta('object', val[0]) or + is_object_transient_sysmeta(val[0]))) metadata.update(val for val in footer_meta.items() - if is_sys_or_user_meta('object', val[0])) + if (is_sys_or_user_meta('object', val[0]) or + is_object_transient_sysmeta(val[0]))) headers_to_copy = ( request.headers.get( 'X-Backend-Replication-Headers', '').split() + @@ -861,8 +865,9 @@ class ObjectController(BaseStorageServer): response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.items(): - if is_sys_or_user_meta('object', key) or \ - key.lower() in self.allowed_headers: + if (is_sys_or_user_meta('object', key) or + is_object_transient_sysmeta(key) or + key.lower() in self.allowed_headers): response.headers[key] = value response.etag = metadata['ETag'] response.last_modified = math.ceil(float(file_x_ts)) @@ -913,8 +918,9 @@ class ObjectController(BaseStorageServer): response.headers['Content-Type'] = metadata.get( 'Content-Type', 'application/octet-stream') for key, value in metadata.items(): - if is_sys_or_user_meta('object', key) or \ - key.lower() in self.allowed_headers: + if (is_sys_or_user_meta('object', key) or + is_object_transient_sysmeta(key) or + key.lower() in self.allowed_headers): response.headers[key] = value response.etag = metadata['ETag'] ts = Timestamp(metadata['X-Timestamp']) diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py index 407a7aed93..c1a909dad5 100644 --- a/swift/proxy/controllers/base.py +++ b/swift/proxy/controllers/base.py @@ -58,7 +58,8 @@ from swift.common.swob import Request, Response, Range, \ status_map from swift.common.request_helpers import strip_sys_meta_prefix, \ strip_user_meta_prefix, is_user_meta, is_sys_meta, is_sys_or_user_meta, \ - http_response_to_document_iters + http_response_to_document_iters, is_object_transient_sysmeta, \ + strip_object_transient_sysmeta_prefix from swift.common.storage_policy import POLICIES @@ -180,12 +181,18 @@ def headers_to_object_info(headers, status_int=HTTP_OK): Construct a cacheable dict of object info based on response headers. """ headers, meta, sysmeta = _prep_headers_to_info(headers, 'object') + transient_sysmeta = {} + for key, val in headers.iteritems(): + if is_object_transient_sysmeta(key): + key = strip_object_transient_sysmeta_prefix(key.lower()) + transient_sysmeta[key] = val info = {'status': status_int, 'length': headers.get('content-length'), 'type': headers.get('content-type'), 'etag': headers.get('etag'), 'meta': meta, 'sysmeta': sysmeta, + 'transient_sysmeta': transient_sysmeta } return info diff --git a/test/probe/test_object_metadata_replication.py b/test/probe/test_object_metadata_replication.py index 4759d5dfc3..57ef8e455e 100644 --- a/test/probe/test_object_metadata_replication.py +++ b/test/probe/test_object_metadata_replication.py @@ -339,6 +339,8 @@ class Test(ReplProbeTest): def test_sysmeta_after_replication_with_subsequent_post(self): sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'} usermeta = {'x-object-meta-bar': 'meta-bar'} + transient_sysmeta = { + 'x-object-transient-sysmeta-bar': 'transient-sysmeta-bar'} self.brain.put_container(policy_index=int(self.policy)) # put object self._put_object() @@ -356,11 +358,13 @@ class Test(ReplProbeTest): # post some user meta to second server subset self.brain.stop_handoff_half() self.container_brain.stop_handoff_half() - self._post_object(usermeta) + user_and_transient_sysmeta = dict(usermeta) + user_and_transient_sysmeta.update(transient_sysmeta) + self._post_object(user_and_transient_sysmeta) metadata = self._get_object_metadata() - for key in usermeta: + for key in user_and_transient_sysmeta: self.assertTrue(key in metadata) - self.assertEqual(metadata[key], usermeta[key]) + self.assertEqual(metadata[key], user_and_transient_sysmeta[key]) for key in sysmeta: self.assertFalse(key in metadata) self.brain.start_handoff_half() @@ -376,6 +380,7 @@ class Test(ReplProbeTest): metadata = self._get_object_metadata() expected = dict(sysmeta) expected.update(usermeta) + expected.update(transient_sysmeta) for key in expected.keys(): self.assertTrue(key in metadata, key) self.assertEqual(metadata[key], expected[key]) @@ -399,6 +404,8 @@ class Test(ReplProbeTest): def test_sysmeta_after_replication_with_prior_post(self): sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'} usermeta = {'x-object-meta-bar': 'meta-bar'} + transient_sysmeta = { + 'x-object-transient-sysmeta-bar': 'transient-sysmeta-bar'} self.brain.put_container(policy_index=int(self.policy)) # put object self._put_object() @@ -406,11 +413,13 @@ class Test(ReplProbeTest): # put user meta to first server subset self.brain.stop_handoff_half() self.container_brain.stop_handoff_half() - self._post_object(headers=usermeta) + user_and_transient_sysmeta = dict(usermeta) + user_and_transient_sysmeta.update(transient_sysmeta) + self._post_object(user_and_transient_sysmeta) metadata = self._get_object_metadata() - for key in usermeta: + for key in user_and_transient_sysmeta: self.assertTrue(key in metadata) - self.assertEqual(metadata[key], usermeta[key]) + self.assertEqual(metadata[key], user_and_transient_sysmeta[key]) self.brain.start_handoff_half() self.container_brain.start_handoff_half() @@ -436,7 +445,7 @@ class Test(ReplProbeTest): for key in sysmeta: self.assertTrue(key in metadata) self.assertEqual(metadata[key], sysmeta[key]) - for key in usermeta: + for key in user_and_transient_sysmeta: self.assertFalse(key in metadata) self.brain.start_primary_half() self.container_brain.start_primary_half() @@ -449,7 +458,7 @@ class Test(ReplProbeTest): for key in sysmeta: self.assertTrue(key in metadata) self.assertEqual(metadata[key], sysmeta[key]) - for key in usermeta: + for key in user_and_transient_sysmeta: self.assertFalse(key in metadata) self.brain.start_handoff_half() self.container_brain.start_handoff_half() diff --git a/test/unit/common/middleware/helpers.py b/test/unit/common/middleware/helpers.py index c295ee4768..1e31362f0d 100644 --- a/test/unit/common/middleware/helpers.py +++ b/test/unit/common/middleware/helpers.py @@ -19,6 +19,8 @@ from collections import defaultdict from hashlib import md5 from swift.common import swob from swift.common.header_key_dict import HeaderKeyDict +from swift.common.request_helpers import is_user_meta, \ + is_object_transient_sysmeta from swift.common.swob import HTTPNotImplemented from swift.common.utils import split_path @@ -87,7 +89,7 @@ class FakeSwift(object): if resp: return resp(env, start_response) - req_headers = swob.Request(env).headers + req = swob.Request(env) self.swift_sources.append(env.get('swift.source')) self.txn_ids.append(env.get('swift.trans_id')) @@ -114,26 +116,41 @@ class FakeSwift(object): # simulate object PUT if method == 'PUT' and obj: - input = ''.join(iter(env['wsgi.input'].read, '')) + put_body = ''.join(iter(env['wsgi.input'].read, '')) if 'swift.callback.update_footers' in env: footers = HeaderKeyDict() env['swift.callback.update_footers'](footers) - req_headers.update(footers) - etag = md5(input).hexdigest() + req.headers.update(footers) + etag = md5(put_body).hexdigest() headers.setdefault('Etag', etag) - headers.setdefault('Content-Length', len(input)) + headers.setdefault('Content-Length', len(put_body)) # keep it for subsequent GET requests later - self.uploaded[path] = (dict(req_headers), input) + self.uploaded[path] = (dict(req.headers), put_body) if "CONTENT_TYPE" in env: self.uploaded[path][0]['Content-Type'] = env["CONTENT_TYPE"] + # simulate object POST + elif method == 'POST' and obj: + metadata, data = self.uploaded.get(path, ({}, None)) + # select items to keep from existing... + new_metadata = dict( + (k, v) for k, v in metadata.items() + if (not is_user_meta('object', k) and not + is_object_transient_sysmeta(k))) + # apply from new + new_metadata.update( + dict((k, v) for k, v in req.headers.items() + if (is_user_meta('object', k) or + is_object_transient_sysmeta(k) or + k.lower == 'content-type'))) + self.uploaded[path] = new_metadata, data + # note: tests may assume this copy of req_headers is case insensitive # so we deliberately use a HeaderKeyDict - self._calls.append((method, path, HeaderKeyDict(req_headers))) + self._calls.append((method, path, HeaderKeyDict(req.headers))) # range requests ought to work, hence conditional_response=True - req = swob.Request(env) if isinstance(body, list): resp = resp_class( req=req, headers=headers, app_iter=body, diff --git a/test/unit/common/middleware/test_copy.py b/test/unit/common/middleware/test_copy.py index 3f024d4395..3a6663db00 100644 --- a/test/unit/common/middleware/test_copy.py +++ b/test/unit/common/middleware/test_copy.py @@ -689,9 +689,11 @@ class TestServerSideCopyMiddleware(unittest.TestCase): source_headers = { 'x-object-sysmeta-test1': 'copy me', 'x-object-meta-test2': 'copy me too', + 'x-object-transient-sysmeta-test3': 'ditto', 'x-object-sysmeta-container-update-override-etag': 'etag val', 'x-object-sysmeta-container-update-override-size': 'size val', - 'x-object-sysmeta-container-update-override-foo': 'bar'} + 'x-object-sysmeta-container-update-override-foo': 'bar', + 'x-delete-at': 'delete-at-time'} get_resp_headers = source_headers.copy() get_resp_headers['etag'] = 'source etag' @@ -713,20 +715,20 @@ class TestServerSideCopyMiddleware(unittest.TestCase): req = Request.blank('/v1/a/c/o', method='COPY', headers={'Content-Length': 0, 'Destination': 'c/o-copy0'}) - status, headers, body = self.call_ssc(req) + status, resp_headers, body = self.call_ssc(req) self.assertEqual('201 Created', status) - verify_headers(source_headers.copy(), [], headers) - method, path, headers = self.app.calls_with_headers[-1] + verify_headers(source_headers.copy(), [], resp_headers) + method, path, put_headers = self.app.calls_with_headers[-1] self.assertEqual('PUT', method) self.assertEqual('/v1/a/c/o-copy0', path) - verify_headers(source_headers.copy(), [], headers.items()) - self.assertIn('etag', headers) - self.assertEqual(headers['etag'], 'source etag') + verify_headers(source_headers.copy(), [], put_headers.items()) + self.assertIn('etag', put_headers) + self.assertEqual(put_headers['etag'], 'source etag') req = Request.blank('/v1/a/c/o-copy0', method='GET') - status, headers, body = self.call_ssc(req) + status, resp_headers, body = self.call_ssc(req) self.assertEqual('200 OK', status) - verify_headers(source_headers.copy(), [], headers) + verify_headers(source_headers.copy(), [], resp_headers) # use a COPY request with a Range header self.app.register('PUT', '/v1/a/c/o-copy1', swob.HTTPCreated, {}) @@ -734,7 +736,7 @@ class TestServerSideCopyMiddleware(unittest.TestCase): headers={'Content-Length': 0, 'Destination': 'c/o-copy1', 'Range': 'bytes=1-2'}) - status, headers, body = self.call_ssc(req) + status, resp_headers, body = self.call_ssc(req) expected_headers = source_headers.copy() unexpected_headers = ( 'x-object-sysmeta-container-update-override-etag', @@ -743,38 +745,54 @@ class TestServerSideCopyMiddleware(unittest.TestCase): for h in unexpected_headers: expected_headers.pop(h) self.assertEqual('201 Created', status) - verify_headers(expected_headers, unexpected_headers, headers) - method, path, headers = self.app.calls_with_headers[-1] + verify_headers(expected_headers, unexpected_headers, resp_headers) + method, path, put_headers = self.app.calls_with_headers[-1] self.assertEqual('PUT', method) self.assertEqual('/v1/a/c/o-copy1', path) - verify_headers(expected_headers, unexpected_headers, headers.items()) + verify_headers( + expected_headers, unexpected_headers, put_headers.items()) # etag should not be copied with a Range request - self.assertNotIn('etag', headers) + self.assertNotIn('etag', put_headers) req = Request.blank('/v1/a/c/o-copy1', method='GET') - status, headers, body = self.call_ssc(req) + status, resp_headers, body = self.call_ssc(req) self.assertEqual('200 OK', status) - verify_headers(expected_headers, unexpected_headers, headers) + verify_headers(expected_headers, unexpected_headers, resp_headers) # use a PUT with x-copy-from self.app.register('PUT', '/v1/a/c/o-copy2', swob.HTTPCreated, {}) req = Request.blank('/v1/a/c/o-copy2', method='PUT', headers={'Content-Length': 0, 'X-Copy-From': 'c/o'}) - status, headers, body = self.call_ssc(req) + status, resp_headers, body = self.call_ssc(req) self.assertEqual('201 Created', status) - verify_headers(source_headers.copy(), [], headers) - method, path, headers = self.app.calls_with_headers[-1] + verify_headers(source_headers.copy(), [], resp_headers) + method, path, put_headers = self.app.calls_with_headers[-1] self.assertEqual('PUT', method) self.assertEqual('/v1/a/c/o-copy2', path) - verify_headers(source_headers.copy(), [], headers.items()) - self.assertIn('etag', headers) - self.assertEqual(headers['etag'], 'source etag') + verify_headers(source_headers.copy(), [], put_headers.items()) + self.assertIn('etag', put_headers) + self.assertEqual(put_headers['etag'], 'source etag') req = Request.blank('/v1/a/c/o-copy2', method='GET') - status, headers, body = self.call_ssc(req) + status, resp_headers, body = self.call_ssc(req) self.assertEqual('200 OK', status) - verify_headers(source_headers.copy(), [], headers) + verify_headers(source_headers.copy(), [], resp_headers) + + # copy to same path as source + self.app.register('PUT', '/v1/a/c/o', swob.HTTPCreated, {}) + req = Request.blank('/v1/a/c/o', method='PUT', + headers={'Content-Length': 0, + 'X-Copy-From': 'c/o'}) + status, resp_headers, body = self.call_ssc(req) + self.assertEqual('201 Created', status) + verify_headers(source_headers.copy(), [], resp_headers) + method, path, put_headers = self.app.calls_with_headers[-1] + self.assertEqual('PUT', method) + self.assertEqual('/v1/a/c/o', path) + verify_headers(source_headers.copy(), [], put_headers.items()) + self.assertIn('etag', put_headers) + self.assertEqual(put_headers['etag'], 'source etag') def test_COPY_no_destination_header(self): req = Request.blank( diff --git a/test/unit/common/middleware/test_gatekeeper.py b/test/unit/common/middleware/test_gatekeeper.py index a01d45cbb1..5f4e87b5a2 100644 --- a/test/unit/common/middleware/test_gatekeeper.py +++ b/test/unit/common/middleware/test_gatekeeper.py @@ -74,12 +74,17 @@ class TestGatekeeper(unittest.TestCase): x_backend_headers = {'X-Backend-Replication': 'true', 'X-Backend-Replication-Headers': 'stuff'} + object_transient_sysmeta_headers = { + 'x-object-transient-sysmeta-': 'value', + 'x-object-transient-sysmeta-foo': 'value'} x_timestamp_headers = {'X-Timestamp': '1455952805.719739'} forbidden_headers_out = dict(sysmeta_headers.items() + - x_backend_headers.items()) + x_backend_headers.items() + + object_transient_sysmeta_headers.items()) forbidden_headers_in = dict(sysmeta_headers.items() + - x_backend_headers.items()) + x_backend_headers.items() + + object_transient_sysmeta_headers.items()) shunted_headers_in = dict(x_timestamp_headers.items()) def _assertHeadersEqual(self, expected, actual): diff --git a/test/unit/common/test_request_helpers.py b/test/unit/common/test_request_helpers.py index 1c39e9f0af..e451174516 100644 --- a/test/unit/common/test_request_helpers.py +++ b/test/unit/common/test_request_helpers.py @@ -21,8 +21,8 @@ from swift.common.storage_policy import POLICIES, EC_POLICY, REPL_POLICY from swift.common.request_helpers import is_sys_meta, is_user_meta, \ is_sys_or_user_meta, strip_sys_meta_prefix, strip_user_meta_prefix, \ remove_items, copy_header_subset, get_name_and_placement, \ - http_response_to_document_iters, update_etag_is_at_header, \ - resolve_etag_is_at_header + http_response_to_document_iters, is_object_transient_sysmeta, \ + update_etag_is_at_header, resolve_etag_is_at_header from test.unit import patch_policies from test.unit.common.test_utils import FakeResponse @@ -69,6 +69,14 @@ class TestRequestHelpers(unittest.TestCase): self.assertEqual(strip_user_meta_prefix(st, 'x-%s-%s-a' % (st, mt)), 'a') + def test_is_object_transient_sysmeta(self): + self.assertTrue(is_object_transient_sysmeta( + 'x-object-transient-sysmeta-foo')) + self.assertFalse(is_object_transient_sysmeta( + 'x-object-transient-sysmeta-')) + self.assertFalse(is_object_transient_sysmeta( + 'x-object-meatmeta-foo')) + def test_remove_items(self): src = {'a': 'b', 'c': 'd'} diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 2a18478087..0a92f184f2 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -2374,6 +2374,7 @@ class DiskFileMixin(BaseDiskFileTestMixin): def test_disk_file_default_disallowed_metadata(self): # build an object with some meta (at t0+1s) orig_metadata = {'X-Object-Meta-Key1': 'Value1', + 'X-Object-Transient-Sysmeta-KeyA': 'ValueA', 'Content-Type': 'text/garbage'} df = self._get_open_disk_file(ts=self.ts().internal, extra_metadata=orig_metadata) @@ -2382,6 +2383,7 @@ class DiskFileMixin(BaseDiskFileTestMixin): # write some new metadata (fast POST, don't send orig meta, at t0+1) df = self._simple_get_diskfile() df.write_metadata({'X-Timestamp': self.ts().internal, + 'X-Object-Transient-Sysmeta-KeyB': 'ValueB', 'X-Object-Meta-Key2': 'Value2'}) df = self._simple_get_diskfile() with df.open(): @@ -2389,8 +2391,11 @@ class DiskFileMixin(BaseDiskFileTestMixin): self.assertEqual('text/garbage', df._metadata['Content-Type']) # original fast-post updateable keys are removed self.assertNotIn('X-Object-Meta-Key1', df._metadata) + self.assertNotIn('X-Object-Transient-Sysmeta-KeyA', df._metadata) # new fast-post updateable keys are added self.assertEqual('Value2', df._metadata['X-Object-Meta-Key2']) + self.assertEqual('ValueB', + df._metadata['X-Object-Transient-Sysmeta-KeyB']) def test_disk_file_preserves_sysmeta(self): # build an object with some meta (at t0) diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index a40d75c5a2..79fc1b32f4 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -1683,7 +1683,8 @@ class TestObjectController(unittest.TestCase): 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', 'X-Object-Meta-1': 'One', 'X-Object-Sysmeta-1': 'One', - 'X-Object-Sysmeta-Two': 'Two'}) + 'X-Object-Sysmeta-Two': 'Two', + 'X-Object-Transient-Sysmeta-Foo': 'Bar'}) req.body = 'VERIFY SYSMETA' resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 201) @@ -1702,7 +1703,8 @@ class TestObjectController(unittest.TestCase): 'name': '/a/c/o', 'X-Object-Meta-1': 'One', 'X-Object-Sysmeta-1': 'One', - 'X-Object-Sysmeta-Two': 'Two'}) + 'X-Object-Sysmeta-Two': 'Two', + 'X-Object-Transient-Sysmeta-Foo': 'Bar'}) def test_PUT_succeeds_with_later_POST(self): ts_iter = make_timestamp_iter() @@ -1875,6 +1877,62 @@ class TestObjectController(unittest.TestCase): resp = req.get_response(self.object_controller) check_response(resp) + def test_POST_transient_sysmeta(self): + # check that diskfile transient system meta is changed by a POST + timestamp1 = normalize_timestamp(time()) + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': timestamp1, + 'Content-Type': 'text/plain', + 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', + 'X-Object-Meta-1': 'One', + 'X-Object-Sysmeta-1': 'One', + 'X-Object-Transient-Sysmeta-Foo': 'Bar'}) + req.body = 'VERIFY SYSMETA' + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 201) + + timestamp2 = normalize_timestamp(time()) + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'POST'}, + headers={'X-Timestamp': timestamp2, + 'X-Object-Meta-1': 'Not One', + 'X-Object-Sysmeta-1': 'Not One', + 'X-Object-Transient-Sysmeta-Foo': 'Not Bar'}) + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 202) + + # original .data file metadata should be unchanged + objfile = os.path.join( + self.testdir, 'sda1', + storage_directory(diskfile.get_data_dir(0), 'p', + hash_path('a', 'c', 'o')), + timestamp1 + '.data') + self.assertTrue(os.path.isfile(objfile)) + self.assertEqual(open(objfile).read(), 'VERIFY SYSMETA') + self.assertDictEqual(diskfile.read_metadata(objfile), + {'X-Timestamp': timestamp1, + 'Content-Length': '14', + 'Content-Type': 'text/plain', + 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', + 'name': '/a/c/o', + 'X-Object-Meta-1': 'One', + 'X-Object-Sysmeta-1': 'One', + 'X-Object-Transient-Sysmeta-Foo': 'Bar'}) + + # .meta file metadata should have only user meta items + metafile = os.path.join( + self.testdir, 'sda1', + storage_directory(diskfile.get_data_dir(0), 'p', + hash_path('a', 'c', 'o')), + timestamp2 + '.meta') + self.assertTrue(os.path.isfile(metafile)) + self.assertDictEqual(diskfile.read_metadata(metafile), + {'X-Timestamp': timestamp2, + 'name': '/a/c/o', + 'X-Object-Meta-1': 'Not One', + 'X-Object-Transient-Sysmeta-Foo': 'Not Bar'}) + def test_PUT_then_fetch_system_metadata(self): timestamp = normalize_timestamp(time()) req = Request.blank( @@ -1884,7 +1942,8 @@ class TestObjectController(unittest.TestCase): 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', 'X-Object-Meta-1': 'One', 'X-Object-Sysmeta-1': 'One', - 'X-Object-Sysmeta-Two': 'Two'}) + 'X-Object-Sysmeta-Two': 'Two', + 'X-Object-Transient-Sysmeta-Foo': 'Bar'}) req.body = 'VERIFY SYSMETA' resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 201) @@ -1903,6 +1962,8 @@ class TestObjectController(unittest.TestCase): self.assertEqual(resp.headers['x-object-meta-1'], 'One') self.assertEqual(resp.headers['x-object-sysmeta-1'], 'One') self.assertEqual(resp.headers['x-object-sysmeta-two'], 'Two') + self.assertEqual(resp.headers['x-object-transient-sysmeta-foo'], + 'Bar') req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'HEAD'}) @@ -1921,9 +1982,13 @@ class TestObjectController(unittest.TestCase): headers={'X-Timestamp': timestamp, 'Content-Type': 'text/plain', 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', + 'X-Object-Meta-0': 'deleted by post', + 'X-Object-Sysmeta-0': 'Zero', + 'X-Object-Transient-Sysmeta-0': 'deleted by post', 'X-Object-Meta-1': 'One', 'X-Object-Sysmeta-1': 'One', - 'X-Object-Sysmeta-Two': 'Two'}) + 'X-Object-Sysmeta-Two': 'Two', + 'X-Object-Transient-Sysmeta-Foo': 'Bar'}) req.body = 'VERIFY SYSMETA' resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 201) @@ -1934,7 +1999,8 @@ class TestObjectController(unittest.TestCase): headers={'X-Timestamp': timestamp2, 'X-Object-Meta-1': 'Not One', 'X-Object-Sysmeta-1': 'Not One', - 'X-Object-Sysmeta-Two': 'Not Two'}) + 'X-Object-Sysmeta-Two': 'Not Two', + 'X-Object-Transient-Sysmeta-Foo': 'Not Bar'}) resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 202) @@ -1951,8 +2017,13 @@ class TestObjectController(unittest.TestCase): self.assertEqual(resp.headers['etag'], '"1000d172764c9dbc3a5798a67ec5bb76"') self.assertEqual(resp.headers['x-object-meta-1'], 'Not One') + self.assertEqual(resp.headers['x-object-sysmeta-0'], 'Zero') self.assertEqual(resp.headers['x-object-sysmeta-1'], 'One') self.assertEqual(resp.headers['x-object-sysmeta-two'], 'Two') + self.assertEqual(resp.headers['x-object-transient-sysmeta-foo'], + 'Not Bar') + self.assertNotIn('x-object-meta-0', resp.headers) + self.assertNotIn('x-object-transient-sysmeta-0', resp.headers) req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'HEAD'}) diff --git a/test/unit/proxy/controllers/test_base.py b/test/unit/proxy/controllers/test_base.py index 689c6c88a8..55214f6d03 100644 --- a/test/unit/proxy/controllers/test_base.py +++ b/test/unit/proxy/controllers/test_base.py @@ -29,7 +29,9 @@ from swift.common.http import is_success from swift.common.storage_policy import StoragePolicy from test.unit import fake_http_connect, FakeRing, FakeMemcache from swift.proxy import server as proxy_server -from swift.common.request_helpers import get_sys_meta_prefix +from swift.common.request_helpers import ( + get_sys_meta_prefix, get_object_transient_sysmeta +) from test.unit import patch_policies @@ -537,6 +539,14 @@ class TestFuncs(unittest.TestCase): self.assertEqual(resp['sysmeta']['whatevs'], 14) self.assertEqual(resp['sysmeta']['somethingelse'], 0) + def test_headers_to_object_info_transient_sysmeta(self): + headers = {get_object_transient_sysmeta('Whatevs'): 14, + get_object_transient_sysmeta('somethingelse'): 0} + resp = headers_to_object_info(headers.items(), 200) + self.assertEqual(len(resp['transient_sysmeta']), 2) + self.assertEqual(resp['transient_sysmeta']['whatevs'], 14) + self.assertEqual(resp['transient_sysmeta']['somethingelse'], 0) + def test_headers_to_object_info_values(self): headers = { 'content-length': '1024', diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index f43ca5778e..6452fb5b0c 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -53,7 +53,7 @@ from swift.common.utils import hash_path, storage_directory, \ iter_multipart_mime_documents, public from test.unit import ( - connect_tcp, readuntil2crlfs, FakeLogger, FakeRing, fake_http_connect, + connect_tcp, readuntil2crlfs, FakeLogger, fake_http_connect, FakeRing, FakeMemcache, debug_logger, patch_policies, write_fake_ring, mocked_http_conn, DEFAULT_TEST_EC_TYPE) from swift.proxy import server as proxy_server diff --git a/test/unit/proxy/test_sysmeta.py b/test/unit/proxy/test_sysmeta.py index 1a7f82334e..eb58523e39 100644 --- a/test/unit/proxy/test_sysmeta.py +++ b/test/unit/proxy/test_sysmeta.py @@ -28,6 +28,7 @@ from swift.common.wsgi import monkey_patch_mimetools, WSGIContext from swift.obj import server as object_server from swift.proxy import server as proxy import swift.proxy.controllers +from swift.proxy.controllers.base import get_object_info from test.unit import FakeMemcache, debug_logger, FakeRing, \ fake_http_connect, patch_policies @@ -172,6 +173,17 @@ class TestObjectSysmeta(unittest.TestCase): 'x-object-meta-test1': 'meta1 changed'} new_meta_headers = {'x-object-meta-test3': 'meta3'} bad_headers = {'x-account-sysmeta-test1': 'bad1'} + # these transient_sysmeta headers get changed... + original_transient_sysmeta_headers_1 = \ + {'x-object-transient-sysmeta-testA': 'A'} + # these transient_sysmeta headers get deleted... + original_transient_sysmeta_headers_2 = \ + {'x-object-transient-sysmeta-testB': 'B'} + # these are replacement transient_sysmeta headers + changed_transient_sysmeta_headers = \ + {'x-object-transient-sysmeta-testA': 'changed_A'} + new_transient_sysmeta_headers_1 = {'x-object-transient-sysmeta-testC': 'C'} + new_transient_sysmeta_headers_2 = {'x-object-transient-sysmeta-testD': 'D'} def test_PUT_sysmeta_then_GET(self): path = '/v1/a/c/o' @@ -180,6 +192,7 @@ class TestObjectSysmeta(unittest.TestCase): hdrs = dict(self.original_sysmeta_headers_1) hdrs.update(self.original_meta_headers_1) hdrs.update(self.bad_headers) + hdrs.update(self.original_transient_sysmeta_headers_1) req = Request.blank(path, environ=env, headers=hdrs, body='x') resp = req.get_response(self.app) self._assertStatus(resp, 201) @@ -189,6 +202,7 @@ class TestObjectSysmeta(unittest.TestCase): self._assertStatus(resp, 200) self._assertInHeaders(resp, self.original_sysmeta_headers_1) self._assertInHeaders(resp, self.original_meta_headers_1) + self._assertInHeaders(resp, self.original_transient_sysmeta_headers_1) self._assertNotInHeaders(resp, self.bad_headers) def test_PUT_sysmeta_then_HEAD(self): @@ -198,6 +212,7 @@ class TestObjectSysmeta(unittest.TestCase): hdrs = dict(self.original_sysmeta_headers_1) hdrs.update(self.original_meta_headers_1) hdrs.update(self.bad_headers) + hdrs.update(self.original_transient_sysmeta_headers_1) req = Request.blank(path, environ=env, headers=hdrs, body='x') resp = req.get_response(self.app) self._assertStatus(resp, 201) @@ -208,6 +223,7 @@ class TestObjectSysmeta(unittest.TestCase): self._assertStatus(resp, 200) self._assertInHeaders(resp, self.original_sysmeta_headers_1) self._assertInHeaders(resp, self.original_meta_headers_1) + self._assertInHeaders(resp, self.original_transient_sysmeta_headers_1) self._assertNotInHeaders(resp, self.bad_headers) def test_sysmeta_replaced_by_PUT(self): @@ -306,6 +322,8 @@ class TestObjectSysmeta(unittest.TestCase): hdrs.update(self.original_sysmeta_headers_2) hdrs.update(self.original_meta_headers_1) hdrs.update(self.original_meta_headers_2) + hdrs.update(self.original_transient_sysmeta_headers_1) + hdrs.update(self.original_transient_sysmeta_headers_2) req = Request.blank(path, environ=env, headers=hdrs, body='x') resp = req.get_response(self.copy_app) self._assertStatus(resp, 201) @@ -315,6 +333,8 @@ class TestObjectSysmeta(unittest.TestCase): hdrs.update(self.new_sysmeta_headers) hdrs.update(self.changed_meta_headers) hdrs.update(self.new_meta_headers) + hdrs.update(self.changed_transient_sysmeta_headers) + hdrs.update(self.new_transient_sysmeta_headers_1) hdrs.update(self.bad_headers) hdrs.update({'Destination': dest}) req = Request.blank(path, environ=env, headers=hdrs) @@ -326,6 +346,9 @@ class TestObjectSysmeta(unittest.TestCase): self._assertInHeaders(resp, self.changed_meta_headers) self._assertInHeaders(resp, self.new_meta_headers) self._assertInHeaders(resp, self.original_meta_headers_2) + self._assertInHeaders(resp, self.changed_transient_sysmeta_headers) + self._assertInHeaders(resp, self.new_transient_sysmeta_headers_1) + self._assertInHeaders(resp, self.original_transient_sysmeta_headers_2) self._assertNotInHeaders(resp, self.bad_headers) req = Request.blank('/v1/a/c/o2', environ={}) @@ -337,6 +360,9 @@ class TestObjectSysmeta(unittest.TestCase): self._assertInHeaders(resp, self.changed_meta_headers) self._assertInHeaders(resp, self.new_meta_headers) self._assertInHeaders(resp, self.original_meta_headers_2) + self._assertInHeaders(resp, self.changed_transient_sysmeta_headers) + self._assertInHeaders(resp, self.new_transient_sysmeta_headers_1) + self._assertInHeaders(resp, self.original_transient_sysmeta_headers_2) self._assertNotInHeaders(resp, self.bad_headers) def test_sysmeta_updated_by_COPY_from(self): @@ -380,3 +406,84 @@ class TestObjectSysmeta(unittest.TestCase): self._assertInHeaders(resp, self.new_meta_headers) self._assertInHeaders(resp, self.original_meta_headers_2) self._assertNotInHeaders(resp, self.bad_headers) + + def _test_transient_sysmeta_replaced_by_PUT_or_POST(self, app): + # check transient_sysmeta is replaced en-masse by a POST + path = '/v1/a/c/o' + + env = {'REQUEST_METHOD': 'PUT'} + hdrs = dict(self.original_transient_sysmeta_headers_1) + hdrs.update(self.original_transient_sysmeta_headers_2) + hdrs.update(self.original_meta_headers_1) + req = Request.blank(path, environ=env, headers=hdrs, body='x') + resp = req.get_response(app) + self._assertStatus(resp, 201) + + req = Request.blank(path, environ={}) + resp = req.get_response(app) + self._assertStatus(resp, 200) + self._assertInHeaders(resp, self.original_transient_sysmeta_headers_1) + self._assertInHeaders(resp, self.original_transient_sysmeta_headers_2) + self._assertInHeaders(resp, self.original_meta_headers_1) + + info = get_object_info(req.environ, app) + self.assertEqual(2, len(info.get('transient_sysmeta', ()))) + self.assertEqual({'testa': 'A', 'testb': 'B'}, + info['transient_sysmeta']) + + # POST will replace all existing transient_sysmeta and usermeta values + env = {'REQUEST_METHOD': 'POST'} + hdrs = dict(self.changed_transient_sysmeta_headers) + hdrs.update(self.new_transient_sysmeta_headers_1) + req = Request.blank(path, environ=env, headers=hdrs) + resp = req.get_response(app) + self._assertStatus(resp, 202) + + req = Request.blank(path, environ={}) + resp = req.get_response(app) + self._assertStatus(resp, 200) + self._assertInHeaders(resp, self.changed_transient_sysmeta_headers) + self._assertInHeaders(resp, self.new_transient_sysmeta_headers_1) + self._assertNotInHeaders(resp, self.original_meta_headers_1) + self._assertNotInHeaders(resp, + self.original_transient_sysmeta_headers_2) + + info = get_object_info(req.environ, app) + self.assertEqual(2, len(info.get('transient_sysmeta', ()))) + self.assertEqual({'testa': 'changed_A', 'testc': 'C'}, + info['transient_sysmeta']) + + # subsequent PUT replaces all transient_sysmeta and usermeta values + env = {'REQUEST_METHOD': 'PUT'} + hdrs = dict(self.new_transient_sysmeta_headers_2) + hdrs.update(self.original_meta_headers_2) + req = Request.blank(path, environ=env, headers=hdrs, body='x') + resp = req.get_response(app) + self._assertStatus(resp, 201) + + req = Request.blank(path, environ={}) + resp = req.get_response(app) + self._assertStatus(resp, 200) + self._assertInHeaders(resp, self.original_meta_headers_2) + self._assertInHeaders(resp, self.new_transient_sysmeta_headers_2) + # meta from previous POST should have gone away... + self._assertNotInHeaders(resp, self.changed_transient_sysmeta_headers) + self._assertNotInHeaders(resp, self.new_transient_sysmeta_headers_1) + # sanity check that meta from first PUT did not re-appear... + self._assertNotInHeaders(resp, self.original_meta_headers_1) + self._assertNotInHeaders(resp, + self.original_transient_sysmeta_headers_1) + self._assertNotInHeaders(resp, + self.original_transient_sysmeta_headers_2) + + info = get_object_info(req.environ, app) + self.assertEqual(1, len(info.get('transient_sysmeta', ()))) + self.assertEqual({'testd': 'D'}, info['transient_sysmeta']) + + def test_transient_sysmeta_replaced_by_PUT_or_POST(self): + self._test_transient_sysmeta_replaced_by_PUT_or_POST(self.app) + + def test_transient_sysmeta_replaced_by_PUT_or_POST_as_copy(self): + # test post-as-copy by issuing requests to the copy middleware app + self.copy_app.object_post_as_copy = True + self._test_transient_sysmeta_replaced_by_PUT_or_POST(self.copy_app) From 96a0e077532c3227b9290af7d74a0b42ee08e8de Mon Sep 17 00:00:00 2001 From: Janie Richling Date: Tue, 7 Jun 2016 15:01:32 +0100 Subject: [PATCH 6/7] Enable object body and metadata encryption Adds encryption middlewares. All object servers and proxy servers should be upgraded before introducing encryption middleware. Encryption middleware should be first introduced with the encryption middleware disable_encryption option set to True. Once all proxies have encryption middleware installed this option may be set to False (the default). Increases constraints.py:MAX_HEADER_COUNT by 4 to allow for headers generated by encryption-related middleware. Co-Authored-By: Tim Burke Co-Authored-By: Christian Cachin Co-Authored-By: Mahati Chamarthy Co-Authored-By: Peter Chng Co-Authored-By: Alistair Coles Co-Authored-By: Jonathan Hinson Co-Authored-By: Hamdi Roumani UpgradeImpact Change-Id: Ie6db22697ceb1021baaa6bddcf8e41ae3acb5376 --- doc/source/middleware.rst | 18 +- etc/proxy-server.conf-sample | 32 +- etc/swift.conf-sample | 7 +- other-requirements.txt | 2 + requirements.txt | 1 + setup.cfg | 2 + swift/common/constraints.py | 9 +- swift/common/exceptions.py | 4 + swift/common/middleware/crypto/__init__.py | 34 + .../common/middleware/crypto/crypto_utils.py | 283 +++++ swift/common/middleware/crypto/decrypter.py | 449 +++++++ swift/common/middleware/crypto/encrypter.py | 369 ++++++ swift/common/middleware/crypto/keymaster.py | 153 +++ swift/common/swob.py | 1 + test/functional/__init__.py | 6 + test/probe/test_empty_device_handoff.py | 15 +- test/probe/test_object_failures.py | 19 +- test/probe/test_object_handoff.py | 34 +- .../unit/common/middleware/crypto/__init__.py | 0 .../middleware/crypto/crypto_helpers.py | 54 + .../common/middleware/crypto/test_crypto.py | 39 + .../middleware/crypto/test_crypto_utils.py | 495 ++++++++ .../middleware/crypto/test_decrypter.py | 1119 +++++++++++++++++ .../middleware/crypto/test_encrypter.py | 820 ++++++++++++ .../middleware/crypto/test_encryption.py | 631 ++++++++++ .../middleware/crypto/test_keymaster.py | 163 +++ 26 files changed, 4731 insertions(+), 28 deletions(-) create mode 100644 swift/common/middleware/crypto/__init__.py create mode 100644 swift/common/middleware/crypto/crypto_utils.py create mode 100644 swift/common/middleware/crypto/decrypter.py create mode 100644 swift/common/middleware/crypto/encrypter.py create mode 100644 swift/common/middleware/crypto/keymaster.py create mode 100644 test/unit/common/middleware/crypto/__init__.py create mode 100644 test/unit/common/middleware/crypto/crypto_helpers.py create mode 100644 test/unit/common/middleware/crypto/test_crypto.py create mode 100644 test/unit/common/middleware/crypto/test_crypto_utils.py create mode 100644 test/unit/common/middleware/crypto/test_decrypter.py create mode 100644 test/unit/common/middleware/crypto/test_encrypter.py create mode 100644 test/unit/common/middleware/crypto/test_encryption.py create mode 100644 test/unit/common/middleware/crypto/test_keymaster.py diff --git a/doc/source/middleware.rst b/doc/source/middleware.rst index a078747204..f636c11f91 100644 --- a/doc/source/middleware.rst +++ b/doc/source/middleware.rst @@ -96,6 +96,15 @@ DLO support centers around a user specified filter that matches segments and concatenates them together in object listing order. Please see the DLO docs for :ref:`dlo-doc` further details. +.. _encryption: + +Encryption +========== + +.. automodule:: swift.common.middleware.crypto + :members: + :show-inheritance: + .. _formpost: FormPost @@ -108,7 +117,7 @@ FormPost .. _gatekeeper: GateKeeper -============= +========== .. automodule:: swift.common.middleware.gatekeeper :members: @@ -123,6 +132,13 @@ Healthcheck :members: :show-inheritance: +Keymaster +========= + +.. automodule:: swift.common.middleware.crypto.keymaster + :members: + :show-inheritance: + .. _keystoneauth: KeystoneAuth diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index 6a4962ff9c..aebb872787 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -79,7 +79,7 @@ bind_port = 8080 [pipeline:main] # This sample pipeline uses tempauth and is used for SAIO dev work and # testing. See below for a pipeline using keystone. -pipeline = catch_errors gatekeeper healthcheck proxy-logging cache container_sync bulk tempurl ratelimit tempauth copy container-quotas account-quotas slo dlo versioned_writes proxy-logging proxy-server +pipeline = catch_errors gatekeeper healthcheck proxy-logging cache container_sync bulk tempurl ratelimit tempauth copy container-quotas account-quotas slo dlo versioned_writes keymaster encryption proxy-logging proxy-server # The following pipeline shows keystone integration. Comment out the one # above and uncomment this one. Additional steps for integrating keystone are @@ -765,3 +765,33 @@ use = egg:swift#copy # When object_post_as_copy is set to True, a POST request will be transformed # into a COPY request where source and destination objects are the same. # object_post_as_copy = true + +# Note: To enable encryption, add the following 2 dependent pieces of crypto +# middleware to the proxy-server pipeline. They should be to the right of all +# other middleware apart from the final proxy-logging middleware, and in the +# order shown in this example: +# keymaster encryption proxy-logging proxy-server +[filter:keymaster] +use = egg:swift#keymaster + +# Sets the root secret from which encryption keys are derived. This must be set +# before first use to a value that is a base64 encoding of at least 32 bytes. +# The security of all encrypted data critically depends on this key, therefore +# it should be set to a high-entropy value. For example, a suitable value may +# be obtained by base-64 encoding a 32 byte (or longer) value generated by a +# cryptographically secure random number generator. Changing the root secret is +# likely to result in data loss. +# TODO - STOP SETTING THIS DEFAULT! This is only here while work +# continues on the feature/crypto branch. Later, this will be added +# to the devstack proxy-config so that gate tests can pass. +# base64 encoding of "dontEverUseThisIn_PRODUCTION_xxxxxxxxxxxxxxx" +encryption_root_secret = ZG9udEV2ZXJVc2VUaGlzSW5fUFJPRFVDVElPTl94eHh4eHh4eHh4eHh4eHg= + +[filter:encryption] +use = egg:swift#encryption + +# By default all PUT or POST'ed object data and/or metadata will be encrypted. +# Encryption of new data and/or metadata may be disabled by setting +# disable_encryption to True. However, all encryption middleware should remain +# in the pipeline in order for existing encrypted data to be read. +# disable_encryption = False diff --git a/etc/swift.conf-sample b/etc/swift.conf-sample index 78684730e2..1d21ba20a8 100644 --- a/etc/swift.conf-sample +++ b/etc/swift.conf-sample @@ -136,9 +136,10 @@ aliases = yellow, orange # By default the maximum number of allowed headers depends on the number of max -# allowed metadata settings plus a default value of 32 for regular http -# headers. If for some reason this is not enough (custom middleware for -# example) it can be increased with the extra_header_count constraint. +# allowed metadata settings plus a default value of 36 for swift internally +# generated headers and regular http headers. If for some reason this is not +# enough (custom middleware for example) it can be increased with the +# extra_header_count constraint. #extra_header_count = 0 diff --git a/other-requirements.txt b/other-requirements.txt index 394f2b0f7a..2fef68fdd8 100644 --- a/other-requirements.txt +++ b/other-requirements.txt @@ -13,3 +13,5 @@ python-dev [platform:dpkg] python-devel [platform:rpm] rsync xfsprogs +libssl-dev [platform:dpkg] +openssl-devel [platform:rpm] diff --git a/requirements.txt b/requirements.txt index 3480d4f3b2..3c17288b9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ pastedeploy>=1.3.3 six>=1.9.0 xattr>=0.4 PyECLib>=1.2.0 # BSD +cryptography>=1.0,!=1.3.0 # BSD/Apache-2.0 diff --git a/setup.cfg b/setup.cfg index 098b6c64f7..cb4cda4419 100644 --- a/setup.cfg +++ b/setup.cfg @@ -97,6 +97,8 @@ paste.filter_factory = xprofile = swift.common.middleware.xprofile:filter_factory versioned_writes = swift.common.middleware.versioned_writes:filter_factory copy = swift.common.middleware.copy:filter_factory + keymaster = swift.common.middleware.crypto.keymaster:filter_factory + encryption = swift.common.middleware.crypto:filter_factory [build_sphinx] all_files = 1 diff --git a/swift/common/constraints.py b/swift/common/constraints.py index 787d2d91da..efb7089871 100644 --- a/swift/common/constraints.py +++ b/swift/common/constraints.py @@ -110,10 +110,11 @@ FORMAT2CONTENT_TYPE = {'plain': 'text/plain', 'json': 'application/json', # By default the maximum number of allowed headers depends on the number of max -# allowed metadata settings plus a default value of 32 for regular http -# headers. If for some reason this is not enough (custom middleware for -# example) it can be increased with the extra_header_count constraint. -MAX_HEADER_COUNT = MAX_META_COUNT + 32 + max(EXTRA_HEADER_COUNT, 0) +# allowed metadata settings plus a default value of 36 for swift internally +# generated headers and regular http headers. If for some reason this is not +# enough (custom middleware for example) it can be increased with the +# extra_header_count constraint. +MAX_HEADER_COUNT = MAX_META_COUNT + 36 + max(EXTRA_HEADER_COUNT, 0) def check_metadata(req, target_type): diff --git a/swift/common/exceptions.py b/swift/common/exceptions.py index 721ac3421a..05f972f972 100644 --- a/swift/common/exceptions.py +++ b/swift/common/exceptions.py @@ -207,6 +207,10 @@ class APIVersionError(SwiftException): pass +class EncryptionException(SwiftException): + pass + + class ClientException(Exception): def __init__(self, msg, http_scheme='', http_host='', http_port='', diff --git a/swift/common/middleware/crypto/__init__.py b/swift/common/middleware/crypto/__init__.py new file mode 100644 index 0000000000..55fd93a046 --- /dev/null +++ b/swift/common/middleware/crypto/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) 2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Implements middleware for object encryption which comprises an instance of a +Decrypter combined with an instance of an Encrypter. +""" +from swift.common.middleware.crypto.decrypter import Decrypter +from swift.common.middleware.crypto.encrypter import Encrypter + +from swift.common.utils import config_true_value, register_swift_info + + +def filter_factory(global_conf, **local_conf): + """Provides a factory function for loading encryption middleware.""" + conf = global_conf.copy() + conf.update(local_conf) + enabled = not config_true_value(conf.get('disable_encryption', 'false')) + register_swift_info('encryption', admin=True, enabled=enabled) + + def encryption_filter(app): + return Decrypter(Encrypter(app, conf), conf) + return encryption_filter diff --git a/swift/common/middleware/crypto/crypto_utils.py b/swift/common/middleware/crypto/crypto_utils.py new file mode 100644 index 0000000000..4efa152259 --- /dev/null +++ b/swift/common/middleware/crypto/crypto_utils.py @@ -0,0 +1,283 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import binascii +import collections +import json +import os + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes +import six +from six.moves.urllib import parse as urlparse + +from swift import gettext_ as _ +from swift.common.exceptions import EncryptionException +from swift.common.swob import HTTPInternalServerError +from swift.common.utils import get_logger +from swift.common.wsgi import WSGIContext + +CRYPTO_KEY_CALLBACK = 'swift.callback.fetch_crypto_keys' + + +class Crypto(object): + """ + Used by middleware: Calls cryptography library + """ + cipher = 'AES_CTR_256' + # AES will accept several key sizes - we are using 256 bits i.e. 32 bytes + key_length = 32 + iv_length = algorithms.AES.block_size / 8 + + def __init__(self, conf=None): + self.logger = get_logger(conf, log_route="crypto") + # memoize backend to avoid repeated iteration over entry points + self.backend = default_backend() + + def create_encryption_ctxt(self, key, iv): + """ + Creates a crypto context for encrypting + + :param key: 256-bit key + :param iv: 128-bit iv or nonce used for encryption + :raises: ValueError on invalid key or iv + :returns: an instance of an encryptor + """ + self.check_key(key) + engine = Cipher(algorithms.AES(key), modes.CTR(iv), + backend=self.backend) + return engine.encryptor() + + def create_decryption_ctxt(self, key, iv, offset): + """ + Creates a crypto context for decrypting + + :param key: 256-bit key + :param iv: 128-bit iv or nonce used for decryption + :param offset: offset into the message; used for range reads + :returns: an instance of a decryptor + """ + self.check_key(key) + if offset < 0: + raise ValueError('Offset must not be negative') + if offset: + # Adjust IV so that it is correct for decryption at offset. + # The CTR mode offset is incremented for every AES block and taken + # modulo 2^128. + offset_blocks, offset_in_block = divmod(offset, self.iv_length) + ivl = long(binascii.hexlify(iv), 16) + offset_blocks + ivl %= 1 << algorithms.AES.block_size + iv = str(bytearray.fromhex(format( + ivl, '0%dx' % (2 * self.iv_length)))) + else: + offset_in_block = 0 + + engine = Cipher(algorithms.AES(key), modes.CTR(iv), + backend=self.backend) + dec = engine.decryptor() + # Adjust decryption boundary within current AES block + dec.update('*' * offset_in_block) + return dec + + def create_iv(self): + return os.urandom(self.iv_length) + + def create_crypto_meta(self): + # create a set of parameters + return {'iv': self.create_iv(), 'cipher': self.cipher} + + def check_crypto_meta(self, meta): + """ + Check that crypto meta dict has valid items. + + :param meta: a dict + :raises EncryptionException: if an error is found in the crypto meta + """ + try: + if meta['cipher'] != self.cipher: + raise EncryptionException('Bad crypto meta: Cipher must be %s' + % self.cipher) + if len(meta['iv']) != self.iv_length: + raise EncryptionException( + 'Bad crypto meta: IV must be length %s bytes' + % self.iv_length) + except KeyError as err: + raise EncryptionException( + 'Bad crypto meta: Missing %s' % err) + + def create_random_key(self): + # helper method to create random key of correct length + return os.urandom(self.key_length) + + def wrap_key(self, wrapping_key, key_to_wrap): + # we don't use an RFC 3394 key wrap algorithm such as cryptography's + # aes_wrap_key because it's slower and we have iv material readily + # available so don't need a deterministic algorithm + iv = self.create_iv() + encryptor = Cipher(algorithms.AES(wrapping_key), modes.CTR(iv), + backend=self.backend).encryptor() + return {'key': encryptor.update(key_to_wrap), 'iv': iv} + + def unwrap_key(self, wrapping_key, context): + # unwrap a key from dict of form returned by wrap_key + # check the key length early - unwrapping won't change the length + self.check_key(context['key']) + decryptor = Cipher(algorithms.AES(wrapping_key), + modes.CTR(context['iv']), + backend=self.backend).decryptor() + return decryptor.update(context['key']) + + def check_key(self, key): + if len(key) != self.key_length: + raise ValueError("Key must be length %s bytes" % self.key_length) + + +class CryptoWSGIContext(WSGIContext): + """ + Base class for contexts used by crypto middlewares. + """ + def __init__(self, crypto_app, server_type, logger): + super(CryptoWSGIContext, self).__init__(crypto_app.app) + self.crypto = crypto_app.crypto + self.logger = logger + self.server_type = server_type + + def get_keys(self, env, required=None): + # Get the key(s) from the keymaster + required = required if required is not None else [self.server_type] + try: + fetch_crypto_keys = env[CRYPTO_KEY_CALLBACK] + except KeyError: + self.logger.exception(_('ERROR get_keys() missing callback')) + raise HTTPInternalServerError( + "Unable to retrieve encryption keys.") + + try: + keys = fetch_crypto_keys() + except Exception as err: # noqa + self.logger.exception(_( + 'ERROR get_keys(): from callback: %s') % err) + raise HTTPInternalServerError( + "Unable to retrieve encryption keys.") + + for name in required: + try: + key = keys[name] + self.crypto.check_key(key) + continue + except KeyError: + self.logger.exception(_("Missing key for %r") % name) + except TypeError: + self.logger.exception(_("Did not get a keys dict")) + except ValueError as e: + # don't include the key in any messages! + self.logger.exception(_("Bad key for %(name)r: %(err)s") % + {'name': name, 'err': e}) + raise HTTPInternalServerError( + "Unable to retrieve encryption keys.") + + return keys + + +def dump_crypto_meta(crypto_meta): + """ + Serialize crypto meta to a form suitable for including in a header value. + + The crypto-meta is serialized as a json object. The iv and key values are + random bytes and as a result need to be base64 encoded before sending over + the wire. Base64 encoding returns a bytes object in py3, to future proof + the code, decode this data to produce a string, which is what the + json.dumps function expects. + + :param crypto_meta: a dict containing crypto meta items + :returns: a string serialization of a crypto meta dict + """ + def b64_encode_meta(crypto_meta): + return { + name: (base64.b64encode(value).decode() if name in ('iv', 'key') + else b64_encode_meta(value) if isinstance(value, dict) + else value) + for name, value in crypto_meta.items()} + + # use sort_keys=True to make serialized form predictable for testing + return urlparse.quote_plus( + json.dumps(b64_encode_meta(crypto_meta), sort_keys=True)) + + +def load_crypto_meta(value): + """ + Build the crypto_meta from the json object. + + Note that json.loads always produces unicode strings, to ensure the + resultant crypto_meta matches the original object cast all key and value + data to a str except the key and iv which are base64 decoded. This will + work in py3 as well where all strings are unicode implying the cast is + effectively a no-op. + + :param value: a string serialization of a crypto meta dict + :returns: a dict containing crypto meta items + :raises EncryptionException: if an error occurs while parsing the + crypto meta + """ + def b64_decode_meta(crypto_meta): + return { + str(name): (base64.b64decode(val) if name in ('iv', 'key') + else b64_decode_meta(val) if isinstance(val, dict) + else val.encode('utf8')) + for name, val in crypto_meta.items()} + + try: + if not isinstance(value, six.string_types): + raise ValueError('crypto meta not a string') + val = json.loads(urlparse.unquote_plus(value)) + if not isinstance(val, collections.Mapping): + raise ValueError('crypto meta not a Mapping') + return b64_decode_meta(val) + except (KeyError, ValueError, TypeError) as err: + msg = 'Bad crypto meta %r: %s' % (value, err) + raise EncryptionException(msg) + + +def append_crypto_meta(value, crypto_meta): + """ + Serialize and append crypto metadata to an encrypted value. + + :param value: value to which serialized crypto meta will be appended. + :param crypto_meta: a dict of crypto meta + :return: a string of the form ; swift_meta= + """ + return '%s; swift_meta=%s' % (value, dump_crypto_meta(crypto_meta)) + + +def extract_crypto_meta(value): + """ + Extract and deserialize any crypto meta from the end of a value. + + :param value: string that may have crypto meta at end + :return: a tuple of the form: + (, or None) + """ + crypto_meta = None + # we only attempt to extract crypto meta from values that we know were + # encrypted and base64-encoded, or from etag values, so it's safe to split + # on ';' even if it turns out that the value was an unencrypted etag + parts = value.split(';') + if len(parts) == 2: + value, param = parts + crypto_meta_tag = 'swift_meta=' + if param.strip().startswith(crypto_meta_tag): + param = param.strip()[len(crypto_meta_tag):] + crypto_meta = load_crypto_meta(param) + return value, crypto_meta diff --git a/swift/common/middleware/crypto/decrypter.py b/swift/common/middleware/crypto/decrypter.py new file mode 100644 index 0000000000..46e2dbc484 --- /dev/null +++ b/swift/common/middleware/crypto/decrypter.py @@ -0,0 +1,449 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import json +import xml.etree.cElementTree as ElementTree + +from swift import gettext_ as _ +from swift.common.http import is_success +from swift.common.middleware.crypto.crypto_utils import CryptoWSGIContext, \ + load_crypto_meta, extract_crypto_meta, Crypto +from swift.common.exceptions import EncryptionException +from swift.common.request_helpers import get_object_transient_sysmeta, \ + get_listing_content_type, get_sys_meta_prefix, get_user_meta_prefix +from swift.common.swob import Request, HTTPException, HTTPInternalServerError +from swift.common.utils import get_logger, config_true_value, \ + parse_content_range, closing_if_possible, parse_content_type, \ + FileLikeIter, multipart_byteranges_to_document_iters + +DECRYPT_CHUNK_SIZE = 65536 + + +def purge_crypto_sysmeta_headers(headers): + return [h for h in headers if not + h[0].lower().startswith( + (get_object_transient_sysmeta('crypto-'), + get_sys_meta_prefix('object') + 'crypto-'))] + + +class BaseDecrypterContext(CryptoWSGIContext): + def get_crypto_meta(self, header_name): + """ + Extract a crypto_meta dict from a header. + + :param header_name: name of header that may have crypto_meta + :return: A dict containing crypto_meta items + :raises EncryptionException: if an error occurs while parsing the + crypto meta + """ + crypto_meta_json = self._response_header_value(header_name) + + if crypto_meta_json is None: + return None + crypto_meta = load_crypto_meta(crypto_meta_json) + self.crypto.check_crypto_meta(crypto_meta) + return crypto_meta + + def get_unwrapped_key(self, crypto_meta, wrapping_key): + """ + Get a wrapped key from crypto-meta and unwrap it using the provided + wrapping key. + + :param crypto_meta: a dict of crypto-meta + :param wrapping_key: key to be used to decrypt the wrapped key + :return: an unwrapped key + :raises EncryptionException: if the crypto-meta has no wrapped key or + the unwrapped key is invalid + """ + try: + return self.crypto.unwrap_key(wrapping_key, + crypto_meta['body_key']) + except KeyError as err: + err = 'Missing %s' % err + except ValueError as err: + pass + msg = 'Error decrypting %s' % self.server_type + self.logger.error(_('%(msg)s: %(err)s') % {'msg': msg, 'err': err}) + raise HTTPInternalServerError(body=msg, content_type='text/plain') + + def decrypt_value_with_meta(self, value, key, required=False): + """ + Base64-decode and decrypt a value if crypto meta can be extracted from + the value itself, otherwise return the value unmodified. + + A value should either be a string that does not contain the ';' + character or should be of the form: + + ;swift_meta= + + :param value: value to decrypt + :param key: crypto key to use + :param required: if True then the value is required to be decrypted + and an EncryptionException will be raised if the + header cannot be decrypted due to missing crypto meta. + :returns: decrypted value if crypto meta is found, otherwise the + unmodified value + :raises EncryptionException: if an error occurs while parsing crypto + meta or if the header value was required + to be decrypted but crypto meta was not + found. + """ + value, crypto_meta = extract_crypto_meta(value) + if crypto_meta: + self.crypto.check_crypto_meta(crypto_meta) + value = self.decrypt_value(value, key, crypto_meta) + elif required: + raise EncryptionException( + "Missing crypto meta in value %s" % value) + return value + + def decrypt_value(self, value, key, crypto_meta): + """ + Base64-decode and decrypt a value using the crypto_meta provided. + + :param value: a base64-encoded value to decrypt + :param key: crypto key to use + :param crypto_meta: a crypto-meta dict of form returned by + :py:func:`~swift.common.middleware.crypto.Crypto.get_crypto_meta` + :returns: decrypted value + """ + if not value: + return '' + crypto_ctxt = self.crypto.create_decryption_ctxt( + key, crypto_meta['iv'], 0) + return crypto_ctxt.update(base64.b64decode(value)) + + def get_decryption_keys(self, req): + """ + Determine if a response should be decrypted, and if so then fetch keys. + + :param req: a Request object + :returns: a dict of decryption keys + """ + if config_true_value(req.environ.get('swift.crypto.override')): + self.logger.debug('No decryption is necessary because of override') + return None + + return self.get_keys(req.environ) + + +class DecrypterObjContext(BaseDecrypterContext): + def __init__(self, decrypter, logger): + super(DecrypterObjContext, self).__init__(decrypter, 'object', logger) + + def _decrypt_header(self, header, value, key, required=False): + """ + Attempt to decrypt a header value that may be encrypted. + + :param header: the header name + :param value: the header value + :param key: decryption key + :param required: if True then the header is required to be decrypted + and an HTTPInternalServerError will be raised if the + header cannot be decrypted due to missing crypto meta. + :return: decrypted value or the original value if it was not encrypted. + :raises HTTPInternalServerError: if an error occurred during decryption + or if the header value was required to + be decrypted but crypto meta was not + found. + """ + try: + return self.decrypt_value_with_meta(value, key, required) + except EncryptionException as e: + msg = "Error decrypting header" + self.logger.error(_("%(msg)s %(hdr)s: %(e)s") % + {'msg': msg, 'hdr': header, 'e': e}) + raise HTTPInternalServerError(body=msg, content_type='text/plain') + + def decrypt_user_metadata(self, keys): + prefix = get_object_transient_sysmeta('crypto-meta-') + prefix_len = len(prefix) + new_prefix = get_user_meta_prefix(self.server_type).title() + result = [] + for name, val in self._response_headers: + if name.lower().startswith(prefix) and val: + short_name = name[prefix_len:] + decrypted_value = self._decrypt_header( + name, val, keys[self.server_type], required=True) + result.append((new_prefix + short_name, decrypted_value)) + return result + + def decrypt_resp_headers(self, keys): + """ + Find encrypted headers and replace with the decrypted versions. + + :param keys: a dict of decryption keys. + :return: A list of headers with any encrypted headers replaced by their + decrypted values. + :raises HTTPInternalServerError: if any error occurs while decrypting + headers + """ + mod_hdr_pairs = [] + + # Decrypt plaintext etag and place in Etag header for client response + etag_header = 'X-Object-Sysmeta-Crypto-Etag' + encrypted_etag = self._response_header_value(etag_header) + if encrypted_etag: + decrypted_etag = self._decrypt_header( + etag_header, encrypted_etag, keys['object'], required=True) + mod_hdr_pairs.append(('Etag', decrypted_etag)) + + etag_header = 'X-Object-Sysmeta-Container-Update-Override-Etag' + encrypted_etag = self._response_header_value(etag_header) + if encrypted_etag: + decrypted_etag = self._decrypt_header( + etag_header, encrypted_etag, keys['container']) + mod_hdr_pairs.append((etag_header, decrypted_etag)) + + # Decrypt all user metadata. Encrypted user metadata values are stored + # in the x-object-transient-sysmeta-crypto-meta- namespace. Those are + # decrypted and moved back to the x-object-meta- namespace. Prior to + # decryption, the response should have no x-object-meta- headers, but + # if it does then they will be overwritten by any decrypted headers + # that map to the same x-object-meta- header names i.e. decrypted + # headers win over unexpected, unencrypted headers. + mod_hdr_pairs.extend(self.decrypt_user_metadata(keys)) + + mod_hdr_names = {h.lower() for h, v in mod_hdr_pairs} + mod_hdr_pairs.extend([(h, v) for h, v in self._response_headers + if h.lower() not in mod_hdr_names]) + return mod_hdr_pairs + + def multipart_response_iter(self, resp, boundary, body_key, crypto_meta): + """ + Decrypts a multipart mime doc response body. + + :param resp: application response + :param boundary: multipart boundary string + :param keys: a dict of decryption keys. + :param crypto_meta: crypto_meta for the response body + :return: generator for decrypted response body + """ + with closing_if_possible(resp): + parts_iter = multipart_byteranges_to_document_iters( + FileLikeIter(resp), boundary) + for first_byte, last_byte, length, headers, body in parts_iter: + yield "--" + boundary + "\r\n" + + for header_pair in headers: + yield "%s: %s\r\n" % header_pair + + yield "\r\n" + + decrypt_ctxt = self.crypto.create_decryption_ctxt( + body_key, crypto_meta['iv'], first_byte) + for chunk in iter(lambda: body.read(DECRYPT_CHUNK_SIZE), ''): + yield decrypt_ctxt.update(chunk) + + yield "\r\n" + + yield "--" + boundary + "--" + + def response_iter(self, resp, body_key, crypto_meta, offset): + """ + Decrypts a response body. + + :param resp: application response + :param keys: a dict of decryption keys. + :param crypto_meta: crypto_meta for the response body + :param offset: offset into object content at which response body starts + :return: generator for decrypted response body + """ + decrypt_ctxt = self.crypto.create_decryption_ctxt( + body_key, crypto_meta['iv'], offset) + with closing_if_possible(resp): + for chunk in resp: + yield decrypt_ctxt.update(chunk) + + def handle_get(self, req, start_response): + app_resp = self._app_call(req.environ) + + keys = self.get_decryption_keys(req) + if keys is None: + # skip decryption + start_response(self._response_status, self._response_headers, + self._response_exc_info) + return app_resp + + mod_resp_headers = self.decrypt_resp_headers(keys) + + crypto_meta = None + if is_success(self._get_status_int()): + try: + crypto_meta = self.get_crypto_meta( + 'X-Object-Sysmeta-Crypto-Body-Meta') + except EncryptionException as err: + msg = 'Error decrypting object' + self.logger.error(_('%(msg)s: %(err)s') % + {'msg': msg, 'err': err}) + raise HTTPInternalServerError( + body=msg, content_type='text/plain') + + if crypto_meta: + # 2xx response and encrypted body + body_key = self.get_unwrapped_key(crypto_meta, keys['object']) + content_type, content_type_attrs = parse_content_type( + self._response_header_value('Content-Type')) + + if (self._get_status_int() == 206 and + content_type == 'multipart/byteranges'): + boundary = dict(content_type_attrs)["boundary"] + resp_iter = self.multipart_response_iter( + app_resp, boundary, body_key, crypto_meta) + else: + offset = 0 + content_range = self._response_header_value('Content-Range') + if content_range: + # Determine offset within the whole object if ranged GET + offset, end, total = parse_content_range(content_range) + resp_iter = self.response_iter( + app_resp, body_key, crypto_meta, offset) + else: + # don't decrypt body of unencrypted or non-2xx responses + resp_iter = app_resp + + mod_resp_headers = purge_crypto_sysmeta_headers(mod_resp_headers) + start_response(self._response_status, mod_resp_headers, + self._response_exc_info) + + return resp_iter + + def handle_head(self, req, start_response): + app_resp = self._app_call(req.environ) + + keys = self.get_decryption_keys(req) + + if keys is None: + # skip decryption + start_response(self._response_status, self._response_headers, + self._response_exc_info) + else: + mod_resp_headers = self.decrypt_resp_headers(keys) + mod_resp_headers = purge_crypto_sysmeta_headers(mod_resp_headers) + start_response(self._response_status, mod_resp_headers, + self._response_exc_info) + + return app_resp + + +class DecrypterContContext(BaseDecrypterContext): + def __init__(self, decrypter, logger): + super(DecrypterContContext, self).__init__( + decrypter, 'container', logger) + + def handle_get(self, req, start_response): + app_resp = self._app_call(req.environ) + + if is_success(self._get_status_int()): + # only decrypt body of 2xx responses + out_content_type = get_listing_content_type(req) + if out_content_type == 'application/json': + handler = self.process_json_resp + keys = self.get_decryption_keys(req) + elif out_content_type.endswith('/xml'): + handler = self.process_xml_resp + keys = self.get_decryption_keys(req) + else: + handler = keys = None + + if handler and keys: + try: + app_resp = handler(keys['container'], app_resp) + except EncryptionException as err: + msg = "Error decrypting container listing" + self.logger.error(_('%(msg)s: %(err)s') % + {'msg': msg, 'err': err}) + raise HTTPInternalServerError( + body=msg, content_type='text/plain') + + start_response(self._response_status, + self._response_headers, + self._response_exc_info) + + return app_resp + + def update_content_length(self, new_total_len): + self._response_headers = [ + (h, v) for h, v in self._response_headers + if h.lower() != 'content-length'] + self._response_headers.append(('Content-Length', str(new_total_len))) + + def process_json_resp(self, key, resp_iter): + """ + Parses json body listing and decrypt encrypted entries. Updates + Content-Length header with new body length and return a body iter. + """ + with closing_if_possible(resp_iter): + resp_body = ''.join(resp_iter) + body_json = json.loads(resp_body) + new_body = json.dumps([self.decrypt_obj_dict(obj_dict, key) + for obj_dict in body_json]) + self.update_content_length(len(new_body)) + return [new_body] + + def decrypt_obj_dict(self, obj_dict, key): + ciphertext = obj_dict['hash'] + obj_dict['hash'] = self.decrypt_value_with_meta(ciphertext, key) + return obj_dict + + def process_xml_resp(self, key, resp_iter): + """ + Parses xml body listing and decrypt encrypted entries. Updates + Content-Length header with new body length and return a body iter. + """ + with closing_if_possible(resp_iter): + resp_body = ''.join(resp_iter) + tree = ElementTree.fromstring(resp_body) + for elem in tree.iter('hash'): + ciphertext = elem.text.encode('utf8') + plain = self.decrypt_value_with_meta(ciphertext, key) + elem.text = plain.decode('utf8') + new_body = ElementTree.tostring(tree, encoding='UTF-8').replace( + "", + '', 1) + self.update_content_length(len(new_body)) + return [new_body] + + +class Decrypter(object): + """Middleware for decrypting data and user metadata.""" + + def __init__(self, app, conf): + self.app = app + self.logger = get_logger(conf, log_route="decrypter") + self.crypto = Crypto(conf) + + def __call__(self, env, start_response): + req = Request(env) + try: + parts = req.split_path(3, 4, True) + except ValueError: + return self.app(env, start_response) + + if parts[3] and req.method == 'GET': + handler = DecrypterObjContext(self, self.logger).handle_get + elif parts[3] and req.method == 'HEAD': + handler = DecrypterObjContext(self, self.logger).handle_head + elif parts[2] and req.method == 'GET': + handler = DecrypterContContext(self, self.logger).handle_get + else: + # url and/or request verb is not handled by decrypter + return self.app(env, start_response) + + try: + return handler(req, start_response) + except HTTPException as err_resp: + return err_resp(env, start_response) diff --git a/swift/common/middleware/crypto/encrypter.py b/swift/common/middleware/crypto/encrypter.py new file mode 100644 index 0000000000..2719d47700 --- /dev/null +++ b/swift/common/middleware/crypto/encrypter.py @@ -0,0 +1,369 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import hashlib +import hmac +from contextlib import contextmanager + +from swift.common.constraints import check_metadata +from swift.common.http import is_success +from swift.common.middleware.crypto.crypto_utils import CryptoWSGIContext, \ + dump_crypto_meta, append_crypto_meta, Crypto +from swift.common.request_helpers import get_object_transient_sysmeta, \ + strip_user_meta_prefix, is_user_meta, update_etag_is_at_header +from swift.common.swob import Request, Match, HTTPException, \ + HTTPUnprocessableEntity +from swift.common.utils import get_logger, config_true_value + + +def encrypt_header_val(crypto, value, key): + """ + Encrypt a header value using the supplied key. + + :param crypto: a Crypto instance + :param value: value to encrypt + :param key: crypto key to use + :returns: a tuple of (encrypted value, crypto_meta) where crypto_meta is a + dict of form returned by + :py:func:`~swift.common.middleware.crypto.Crypto.get_crypto_meta` + """ + if not value: + return '', None + + crypto_meta = crypto.create_crypto_meta() + crypto_ctxt = crypto.create_encryption_ctxt(key, crypto_meta['iv']) + enc_val = base64.b64encode(crypto_ctxt.update(value)) + return enc_val, crypto_meta + + +def _hmac_etag(key, etag): + """ + Compute an HMAC-SHA256 using given key and etag. + + :param key: The starting key for the hash. + :param etag: The etag to hash. + :returns: a Base64-encoded representation of the HMAC + """ + result = hmac.new(key, etag, digestmod=hashlib.sha256).digest() + return base64.b64encode(result).decode() + + +class EncInputWrapper(object): + """File-like object to be swapped in for wsgi.input.""" + def __init__(self, crypto, keys, req, logger): + self.env = req.environ + self.wsgi_input = req.environ['wsgi.input'] + self.path = req.path + self.crypto = crypto + self.body_crypto_ctxt = None + self.keys = keys + self.plaintext_md5 = None + self.ciphertext_md5 = None + self.logger = logger + self.install_footers_callback(req) + + def _init_encryption_context(self): + # do this once when body is first read + if self.body_crypto_ctxt is None: + self.body_crypto_meta = self.crypto.create_crypto_meta() + body_key = self.crypto.create_random_key() + # wrap the body key with object key + self.body_crypto_meta['body_key'] = self.crypto.wrap_key( + self.keys['object'], body_key) + self.body_crypto_meta['key_id'] = self.keys['id'] + self.body_crypto_ctxt = self.crypto.create_encryption_ctxt( + body_key, self.body_crypto_meta.get('iv')) + self.plaintext_md5 = hashlib.md5() + self.ciphertext_md5 = hashlib.md5() + + def install_footers_callback(self, req): + # the proxy controller will call back for footer metadata after + # body has been sent + inner_callback = req.environ.get('swift.callback.update_footers') + # remove any Etag from headers, it won't be valid for ciphertext and + # we'll send the ciphertext Etag later in footer metadata + client_etag = req.headers.pop('etag', None) + container_listing_etag_header = req.headers.get( + 'X-Object-Sysmeta-Container-Update-Override-Etag') + + def footers_callback(footers): + if inner_callback: + # pass on footers dict to any other callback that was + # registered before this one. It may override any footers that + # were set. + inner_callback(footers) + + plaintext_etag = None + if self.body_crypto_ctxt: + plaintext_etag = self.plaintext_md5.hexdigest() + # If client (or other middleware) supplied etag, then validate + # against plaintext etag + etag_to_check = footers.get('Etag') or client_etag + if (etag_to_check is not None and + plaintext_etag != etag_to_check): + raise HTTPUnprocessableEntity(request=Request(self.env)) + + # override any previous notion of etag with the ciphertext etag + footers['Etag'] = self.ciphertext_md5.hexdigest() + + # Encrypt the plaintext etag using the object key and persist + # as sysmeta along with the crypto parameters that were used. + encrypted_etag, etag_crypto_meta = encrypt_header_val( + self.crypto, plaintext_etag, self.keys['object']) + footers['X-Object-Sysmeta-Crypto-Etag'] = \ + append_crypto_meta(encrypted_etag, etag_crypto_meta) + footers['X-Object-Sysmeta-Crypto-Body-Meta'] = \ + dump_crypto_meta(self.body_crypto_meta) + + # Also add an HMAC of the etag for use when evaluating + # conditional requests + footers['X-Object-Sysmeta-Crypto-Etag-Mac'] = _hmac_etag( + self.keys['object'], plaintext_etag) + else: + # No data was read from body, nothing was encrypted, so don't + # set any crypto sysmeta for the body, but do re-instate any + # etag provided in inbound request if other middleware has not + # already set a value. + if client_etag is not None: + footers.setdefault('Etag', client_etag) + + # When deciding on the etag that should appear in container + # listings, look for: + # * override in the footer, otherwise + # * override in the header, and finally + # * MD5 of the plaintext received + # This may be None if no override was set and no data was read + container_listing_etag = footers.get( + 'X-Object-Sysmeta-Container-Update-Override-Etag', + container_listing_etag_header) or plaintext_etag + + if container_listing_etag is not None: + # Encrypt the container-listing etag using the container key + # and a random IV, and use it to override the container update + # value, with the crypto parameters appended. We use the + # container key here so that only that key is required to + # decrypt all etag values in a container listing when handling + # a container GET request. + val, crypto_meta = encrypt_header_val( + self.crypto, container_listing_etag, + self.keys['container']) + crypto_meta['key_id'] = self.keys['id'] + footers['X-Object-Sysmeta-Container-Update-Override-Etag'] = \ + append_crypto_meta(val, crypto_meta) + # else: no override was set and no data was read + + req.environ['swift.callback.update_footers'] = footers_callback + + def read(self, *args, **kwargs): + return self.readChunk(self.wsgi_input.read, *args, **kwargs) + + def readline(self, *args, **kwargs): + return self.readChunk(self.wsgi_input.readline, *args, **kwargs) + + def readChunk(self, read_method, *args, **kwargs): + chunk = read_method(*args, **kwargs) + + if chunk: + self._init_encryption_context() + self.plaintext_md5.update(chunk) + # Encrypt one chunk at a time + ciphertext = self.body_crypto_ctxt.update(chunk) + self.ciphertext_md5.update(ciphertext) + return ciphertext + + return chunk + + +class EncrypterObjContext(CryptoWSGIContext): + def __init__(self, encrypter, logger): + super(EncrypterObjContext, self).__init__( + encrypter, 'object', logger) + + def _check_headers(self, req): + # Check the user-metadata length before encrypting and encoding + error_response = check_metadata(req, self.server_type) + if error_response: + raise error_response + + def encrypt_user_metadata(self, req, keys): + """ + Encrypt user-metadata header values. Replace each x-object-meta- + user metadata header with a corresponding + x-object-transient-sysmeta-crypto-meta- header which has the + crypto metadata required to decrypt appended to the encrypted value. + + :param req: a swob Request + :param keys: a dict of encryption keys + """ + prefix = get_object_transient_sysmeta('crypto-meta-') + user_meta_headers = [h for h in req.headers.items() if + is_user_meta(self.server_type, h[0]) and h[1]] + crypto_meta = None + for name, val in user_meta_headers: + short_name = strip_user_meta_prefix(self.server_type, name) + new_name = prefix + short_name + enc_val, crypto_meta = encrypt_header_val( + self.crypto, val, keys[self.server_type]) + req.headers[new_name] = append_crypto_meta(enc_val, crypto_meta) + req.headers.pop(name) + # store a single copy of the crypto meta items that are common to all + # encrypted user metadata independently of any such meta that is stored + # with the object body because it might change on a POST. This is done + # for future-proofing - the meta stored here is not currently used + # during decryption. + if crypto_meta: + meta = dump_crypto_meta({'cipher': crypto_meta['cipher'], + 'key_id': keys['id']}) + req.headers[get_object_transient_sysmeta('crypto-meta')] = meta + + def handle_put(self, req, start_response): + self._check_headers(req) + keys = self.get_keys(req.environ, required=['object', 'container']) + self.encrypt_user_metadata(req, keys) + + enc_input_proxy = EncInputWrapper(self.crypto, keys, req, self.logger) + req.environ['wsgi.input'] = enc_input_proxy + + resp = self._app_call(req.environ) + + # If an etag is in the response headers and a plaintext etag was + # calculated, then overwrite the response value with the plaintext etag + # provided it matches the ciphertext etag. If it does not match then do + # not overwrite and allow the response value to return to client. + mod_resp_headers = self._response_headers + if (is_success(self._get_status_int()) and + enc_input_proxy.plaintext_md5): + plaintext_etag = enc_input_proxy.plaintext_md5.hexdigest() + ciphertext_etag = enc_input_proxy.ciphertext_md5.hexdigest() + mod_resp_headers = [ + (h, v if (h.lower() != 'etag' or + v.strip('"') != ciphertext_etag) + else plaintext_etag) + for h, v in mod_resp_headers] + + start_response(self._response_status, mod_resp_headers, + self._response_exc_info) + return resp + + def handle_post(self, req, start_response): + """ + Encrypt the new object headers with a new iv and the current crypto. + Note that an object may have encrypted headers while the body may + remain unencrypted. + """ + self._check_headers(req) + keys = self.get_keys(req.environ) + self.encrypt_user_metadata(req, keys) + + resp = self._app_call(req.environ) + start_response(self._response_status, self._response_headers, + self._response_exc_info) + return resp + + @contextmanager + def _mask_conditional_etags(self, req, header_name): + """ + Calculate HMACs of etags in header value and append to existing list. + The HMACs are calculated in the same way as was done for the object + plaintext etag to generate the value of + X-Object-Sysmeta-Crypto-Etag-Mac when the object was PUT. The object + server can therefore use these HMACs to evaluate conditional requests. + + The existing etag values are left in the list of values to match in + case the object was not encrypted when it was PUT. It is unlikely that + a masked etag value would collide with an unmasked value. + + :param req: an instance of swob.Request + :param header_name: name of header that has etags to mask + :return: True if any etags were masked, False otherwise + """ + masked = False + old_etags = req.headers.get(header_name) + if old_etags: + keys = self.get_keys(req.environ) + new_etags = [] + for etag in Match(old_etags).tags: + if etag == '*': + new_etags.append(etag) + continue + masked_etag = _hmac_etag(keys['object'], etag) + new_etags.extend(('"%s"' % etag, '"%s"' % masked_etag)) + masked = True + + req.headers[header_name] = ', '.join(new_etags) + + try: + yield masked + finally: + if old_etags: + req.headers[header_name] = old_etags + + def handle_get_or_head(self, req, start_response): + with self._mask_conditional_etags(req, 'If-Match') as masked1: + with self._mask_conditional_etags(req, 'If-None-Match') as masked2: + if masked1 or masked2: + update_etag_is_at_header( + req, 'X-Object-Sysmeta-Crypto-Etag-Mac') + resp = self._app_call(req.environ) + start_response(self._response_status, self._response_headers, + self._response_exc_info) + return resp + + +class Encrypter(object): + """Middleware for encrypting data and user metadata. + + By default all PUT or POST'ed object data and/or metadata will be + encrypted. Encryption of new data and/or metadata may be disabled by + setting the ``disable_encryption`` option to True. However, this middleware + should remain in the pipeline in order for existing encrypted data to be + read. + """ + + def __init__(self, app, conf): + self.app = app + self.logger = get_logger(conf, log_route="encrypter") + self.crypto = Crypto(conf) + self.disable_encryption = config_true_value( + conf.get('disable_encryption', 'false')) + + def __call__(self, env, start_response): + # If override is set in env, then just pass along + if config_true_value(env.get('swift.crypto.override')): + return self.app(env, start_response) + + req = Request(env) + + if self.disable_encryption and req.method in ('PUT', 'POST'): + return self.app(env, start_response) + try: + req.split_path(4, 4, True) + except ValueError: + return self.app(env, start_response) + + if req.method in ('GET', 'HEAD'): + handler = EncrypterObjContext(self, self.logger).handle_get_or_head + elif req.method == 'PUT': + handler = EncrypterObjContext(self, self.logger).handle_put + elif req.method == 'POST': + handler = EncrypterObjContext(self, self.logger).handle_post + else: + # anything else + return self.app(env, start_response) + + try: + return handler(req, start_response) + except HTTPException as err_resp: + return err_resp(env, start_response) diff --git a/swift/common/middleware/crypto/keymaster.py b/swift/common/middleware/crypto/keymaster.py new file mode 100644 index 0000000000..4b6ac71f2c --- /dev/null +++ b/swift/common/middleware/crypto/keymaster.py @@ -0,0 +1,153 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import hashlib +import hmac +import os + +from swift.common.middleware.crypto.crypto_utils import CRYPTO_KEY_CALLBACK +from swift.common.swob import Request, HTTPException +from swift.common.wsgi import WSGIContext + + +class KeyMasterContext(WSGIContext): + """ + The simple scheme for key derivation is as follows: every path is + associated with a key, where the key is derived from the path itself in a + deterministic fashion such that the key does not need to be stored. + Specifically, the key for any path is an HMAC of a root key and the path + itself, calculated using an SHA256 hash function:: + + = HMAC_SHA256(, ) + """ + def __init__(self, keymaster, account, container, obj): + """ + :param keymaster: a Keymaster instance + :param account: account name + :param container: container name + :param obj: object name + """ + super(KeyMasterContext, self).__init__(keymaster.app) + self.keymaster = keymaster + self.account = account + self.container = container + self.obj = obj + self._keys = None + + def fetch_crypto_keys(self, *args, **kwargs): + """ + Setup container and object keys based on the request path. + + Keys are derived from request path. The 'id' entry in the results dict + includes the part of the path used to derive keys. Other keymaster + implementations may use a different strategy to generate keys and may + include a different type of 'id', so callers should treat the 'id' as + opaque keymaster-specific data. + + :returns: A dict containing encryption keys for 'object' and + 'container' and a key 'id'. + """ + if self._keys: + return self._keys + + self._keys = {} + account_path = os.path.join(os.sep, self.account) + + if self.container: + path = os.path.join(account_path, self.container) + self._keys['container'] = self.keymaster.create_key(path) + + if self.obj: + path = os.path.join(path, self.obj) + self._keys['object'] = self.keymaster.create_key(path) + + # For future-proofing include a keymaster version number and the + # path used to derive keys in the 'id' entry of the results. The + # encrypter will persist this as part of the crypto-meta for + # encrypted data and metadata. If we ever change the way keys are + # generated then the decrypter could pass the persisted 'id' value + # when it calls fetch_crypto_keys to inform the keymaster as to how + # that particular data or metadata had its keys generated. + # Currently we have no need to do that, so we are simply persisting + # this information for future use. + self._keys['id'] = {'v': '1', 'path': path} + + return self._keys + + def handle_request(self, req, start_response): + req.environ[CRYPTO_KEY_CALLBACK] = self.fetch_crypto_keys + resp = self._app_call(req.environ) + start_response(self._response_status, self._response_headers, + self._response_exc_info) + return resp + + +class KeyMaster(object): + """Middleware for providing encryption keys. + + The middleware requires its ``encryption_root_secret`` option to be set. + This is the root secret from which encryption keys are derived. This must + be set before first use to a value that is a base64 encoding of at least 32 + bytes. The security of all encrypted data critically depends on this key, + therefore it should be set to a high-entropy value. For example, a suitable + value may be obtained by base-64 encoding a 32 byte (or longer) value + generated by a cryptographically secure random number generator. Changing + the root secret is likely to result in data loss. + """ + + def __init__(self, app, conf): + self.app = app + self.root_secret = conf.get('encryption_root_secret') + try: + self.root_secret = base64.b64decode(self.root_secret) + if len(self.root_secret) < 32: + raise ValueError + except (TypeError, ValueError): + raise ValueError( + 'encryption_root_secret option in proxy-server.conf must be ' + 'a base64 encoding of at least 32 raw bytes') + + def __call__(self, env, start_response): + req = Request(env) + + try: + parts = req.split_path(2, 4, True) + except ValueError: + return self.app(env, start_response) + + if req.method in ('PUT', 'POST', 'GET', 'HEAD'): + # handle only those request methods that may require keys + km_context = KeyMasterContext(self, *parts[1:]) + try: + return km_context.handle_request(req, start_response) + except HTTPException as err_resp: + return err_resp(env, start_response) + + # anything else + return self.app(env, start_response) + + def create_key(self, key_id): + return hmac.new(self.root_secret, key_id, + digestmod=hashlib.sha256).digest() + + +def filter_factory(global_conf, **local_conf): + conf = global_conf.copy() + conf.update(local_conf) + + def keymaster_filter(app): + return KeyMaster(app, conf) + + return keymaster_filter diff --git a/swift/common/swob.py b/swift/common/swob.py index aa11ec01f2..f80c13846d 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -1419,6 +1419,7 @@ HTTPOk = status_map[200] HTTPCreated = status_map[201] HTTPAccepted = status_map[202] HTTPNoContent = status_map[204] +HTTPPartialContent = status_map[206] HTTPMovedPermanently = status_map[301] HTTPFound = status_map[302] HTTPSeeOther = status_map[303] diff --git a/test/functional/__init__.py b/test/functional/__init__.py index 52be849bfa..0bf324f85d 100644 --- a/test/functional/__init__.py +++ b/test/functional/__init__.py @@ -361,6 +361,12 @@ def in_process_setup(the_object_server=object_server): 'allow_account_management': 'true', 'account_autocreate': 'true', 'allow_versions': 'True', + # TODO - Remove encryption_root_secret - this is only necessary while + # encryption middleware is in the default proxy pipeline in + # proxy-server.conf-sample + # base64 encoding of "dontEverUseThisIn_PRODUCTION_xxxxxxxxxxxxxxx" + 'encryption_root_secret': + 'ZG9udEV2ZXJVc2VUaGlzSW5fUFJPRFVDVElPTl94eHh4eHh4eHh4eHh4eHg=', # Below are values used by the functional test framework, as well as # by the various in-process swift servers 'auth_host': '127.0.0.1', diff --git a/test/probe/test_empty_device_handoff.py b/test/probe/test_empty_device_handoff.py index 65338ed84b..e1f8ade50c 100755 --- a/test/probe/test_empty_device_handoff.py +++ b/test/probe/test_empty_device_handoff.py @@ -73,6 +73,13 @@ class TestEmptyDevice(ReplProbeTest): raise Exception('Object GET did not return VERIFY, instead it ' 'returned: %s' % repr(odata)) + # Stash the on disk data from a primary for future comparison with the + # handoff - this may not equal 'VERIFY' if for example the proxy has + # crypto enabled + direct_get_data = direct_client.direct_get_object( + onodes[1], opart, self.account, container, obj, headers={ + 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] + # Kill other two container/obj primary servers # to ensure GET handoff works for node in onodes[1:]: @@ -95,9 +102,7 @@ class TestEmptyDevice(ReplProbeTest): odata = direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] - if odata != 'VERIFY': - raise Exception('Direct object GET did not return VERIFY, instead ' - 'it returned: %s' % repr(odata)) + self.assertEqual(direct_get_data, odata) # Assert container listing (via proxy and directly) has container/obj objs = [o['name'] for o in @@ -155,9 +160,7 @@ class TestEmptyDevice(ReplProbeTest): odata = direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] - if odata != 'VERIFY': - raise Exception('Direct object GET did not return VERIFY, instead ' - 'it returned: %s' % repr(odata)) + self.assertEqual(direct_get_data, odata) # Assert the handoff server no longer has container/obj try: diff --git a/test/probe/test_object_failures.py b/test/probe/test_object_failures.py index ba53177743..1850b2750d 100755 --- a/test/probe/test_object_failures.py +++ b/test/probe/test_object_failures.py @@ -77,6 +77,12 @@ class TestObjectFailures(ReplProbeTest): obj = 'object-%s' % uuid4() onode, opart, data_file = self._setup_data_file(container, obj, 'VERIFY') + # Stash the on disk data for future comparison - this may not equal + # 'VERIFY' if for example the proxy has crypto enabled + backend_data = direct_client.direct_get_object( + onode, opart, self.account, container, obj, headers={ + 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] + metadata = read_metadata(data_file) metadata['ETag'] = 'badetag' write_metadata(data_file, metadata) @@ -84,7 +90,7 @@ class TestObjectFailures(ReplProbeTest): odata = direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] - self.assertEqual(odata, 'VERIFY') + self.assertEqual(odata, backend_data) try: direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ @@ -98,14 +104,19 @@ class TestObjectFailures(ReplProbeTest): obj = 'object-range-%s' % uuid4() onode, opart, data_file = self._setup_data_file(container, obj, 'RANGE') + # Stash the on disk data for future comparison - this may not equal + # 'VERIFY' if for example the proxy has crypto enabled + backend_data = direct_client.direct_get_object( + onode, opart, self.account, container, obj, headers={ + 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] metadata = read_metadata(data_file) metadata['ETag'] = 'badetag' write_metadata(data_file, metadata) base_headers = {'X-Backend-Storage-Policy-Index': self.policy.idx} - for header, result in [({'Range': 'bytes=0-2'}, 'RAN'), - ({'Range': 'bytes=1-11'}, 'ANGE'), - ({'Range': 'bytes=0-11'}, 'RANGE')]: + for header, result in [({'Range': 'bytes=0-2'}, backend_data[0:3]), + ({'Range': 'bytes=1-11'}, backend_data[1:]), + ({'Range': 'bytes=0-11'}, backend_data)]: req_headers = base_headers.copy() req_headers.update(header) odata = direct_client.direct_get_object( diff --git a/test/probe/test_object_handoff.py b/test/probe/test_object_handoff.py index 3808df0616..ca0b3d0e02 100755 --- a/test/probe/test_object_handoff.py +++ b/test/probe/test_object_handoff.py @@ -55,6 +55,13 @@ class TestObjectHandoff(ReplProbeTest): raise Exception('Object GET did not return VERIFY, instead it ' 'returned: %s' % repr(odata)) + # Stash the on disk data from a primary for future comparison with the + # handoff - this may not equal 'VERIFY' if for example the proxy has + # crypto enabled + direct_get_data = direct_client.direct_get_object( + onodes[1], opart, self.account, container, obj, headers={ + 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] + # Kill other two container/obj primary servers # to ensure GET handoff works for node in onodes[1:]: @@ -76,9 +83,7 @@ class TestObjectHandoff(ReplProbeTest): odata = direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] - if odata != 'VERIFY': - raise Exception('Direct object GET did not return VERIFY, instead ' - 'it returned: %s' % repr(odata)) + self.assertEqual(direct_get_data, odata) # drop a tempfile in the handoff's datadir, like it might have # had if there was an rsync failure while it was previously a @@ -143,9 +148,7 @@ class TestObjectHandoff(ReplProbeTest): odata = direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] - if odata != 'VERIFY': - raise Exception('Direct object GET did not return VERIFY, instead ' - 'it returned: %s' % repr(odata)) + self.assertEqual(direct_get_data, odata) # and that it does *not* have a temporary rsync dropping! found_data_filename = False @@ -273,6 +276,14 @@ class TestECObjectHandoffOverwrite(ECProbeTest): # shutdown one of the primary data nodes failed_primary = random.choice(onodes) failed_primary_device_path = self.device_dir('object', failed_primary) + # first read its ec etag value for future reference - this may not + # equal old_contents.etag if for example the proxy has crypto enabled + req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)} + headers = direct_client.direct_head_object( + failed_primary, opart, self.account, container_name, + object_name, headers=req_headers) + old_backend_etag = headers['X-Object-Sysmeta-EC-Etag'] + self.kill_drive(failed_primary_device_path) # overwrite our object with some new data @@ -290,13 +301,18 @@ class TestECObjectHandoffOverwrite(ECProbeTest): failed_primary, opart, self.account, container_name, object_name, headers=req_headers) self.assertEqual(headers['X-Object-Sysmeta-EC-Etag'], - old_contents.etag) + old_backend_etag) # we have 1 primary with wrong old etag, and we should have 5 with # new etag plus a handoff with the new etag, so killing 2 other # primaries forces proxy to try to GET from all primaries plus handoff. other_nodes = [n for n in onodes if n != failed_primary] random.shuffle(other_nodes) + # grab the value of the new content's ec etag for future reference + headers = direct_client.direct_head_object( + other_nodes[0], opart, self.account, container_name, + object_name, headers=req_headers) + new_backend_etag = headers['X-Object-Sysmeta-EC-Etag'] for node in other_nodes[:2]: self.kill_drive(self.device_dir('object', node)) @@ -314,8 +330,8 @@ class TestECObjectHandoffOverwrite(ECProbeTest): continue found_frags[headers['X-Object-Sysmeta-EC-Etag']] += 1 self.assertEqual(found_frags, { - new_contents.etag: 4, # this should be enough to rebuild! - old_contents.etag: 1, + new_backend_etag: 4, # this should be enough to rebuild! + old_backend_etag: 1, }) # clear node error limiting diff --git a/test/unit/common/middleware/crypto/__init__.py b/test/unit/common/middleware/crypto/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/common/middleware/crypto/crypto_helpers.py b/test/unit/common/middleware/crypto/crypto_helpers.py new file mode 100644 index 0000000000..0af7d3e83c --- /dev/null +++ b/test/unit/common/middleware/crypto/crypto_helpers.py @@ -0,0 +1,54 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import hashlib + +from swift.common.middleware.crypto.crypto_utils import Crypto + + +def fetch_crypto_keys(): + return {'account': 'This is an account key 012345678', + 'container': 'This is a container key 01234567', + 'object': 'This is an object key 0123456789', + 'id': {'v': 'fake', 'path': '/a/c/fake'}} + + +def md5hex(s): + return hashlib.md5(s).hexdigest() + + +def encrypt(val, key=None, iv=None, ctxt=None): + if ctxt is None: + ctxt = Crypto({}).create_encryption_ctxt(key, iv) + enc_val = ctxt.update(val) + return enc_val + + +def decrypt(key, iv, enc_val): + dec_ctxt = Crypto({}).create_decryption_ctxt(key, iv, 0) + dec_val = dec_ctxt.update(enc_val) + return dec_val + + +FAKE_IV = "This is an IV123" +# do not use this example encryption_root_secret in production, use a randomly +# generated value with high entropy +TEST_KEYMASTER_CONF = {'encryption_root_secret': base64.b64encode(b'x' * 32)} + + +def fake_get_crypto_meta(**kwargs): + meta = {'iv': FAKE_IV, 'cipher': Crypto.cipher} + meta.update(kwargs) + return meta diff --git a/test/unit/common/middleware/crypto/test_crypto.py b/test/unit/common/middleware/crypto/test_crypto.py new file mode 100644 index 0000000000..c5f6cd0cd7 --- /dev/null +++ b/test/unit/common/middleware/crypto/test_crypto.py @@ -0,0 +1,39 @@ +# Copyright (c) 2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +from swift.common import utils +from swift.common.middleware import crypto + + +class TestCrypto(unittest.TestCase): + def test_filter_factory(self): + factory = crypto.filter_factory({}) + self.assertTrue(callable(factory)) + self.assertIsInstance(factory({}), crypto.decrypter.Decrypter) + self.assertIsInstance(factory({}).app, crypto.encrypter.Encrypter) + self.assertIn('encryption', utils._swift_admin_info) + self.assertDictEqual( + {'enabled': True}, utils._swift_admin_info['encryption']) + self.assertNotIn('encryption', utils._swift_info) + + factory = crypto.filter_factory({'disable_encryption': True}) + self.assertTrue(callable(factory)) + self.assertIsInstance(factory({}), crypto.decrypter.Decrypter) + self.assertIsInstance(factory({}).app, crypto.encrypter.Encrypter) + self.assertIn('encryption', utils._swift_admin_info) + self.assertDictEqual( + {'enabled': False}, utils._swift_admin_info['encryption']) + self.assertNotIn('encryption', utils._swift_info) diff --git a/test/unit/common/middleware/crypto/test_crypto_utils.py b/test/unit/common/middleware/crypto/test_crypto_utils.py new file mode 100644 index 0000000000..56aca2ea0b --- /dev/null +++ b/test/unit/common/middleware/crypto/test_crypto_utils.py @@ -0,0 +1,495 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import unittest + +import mock +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + +from swift.common.exceptions import EncryptionException +from swift.common.middleware.crypto import crypto_utils +from swift.common.middleware.crypto.crypto_utils import ( + CRYPTO_KEY_CALLBACK, Crypto, CryptoWSGIContext) +from swift.common.swob import HTTPException +from test.unit import FakeLogger +from test.unit.common.middleware.crypto.crypto_helpers import fetch_crypto_keys + + +class TestCryptoWsgiContext(unittest.TestCase): + def setUp(self): + class FakeFilter(object): + app = None + crypto = Crypto({}) + + self.fake_logger = FakeLogger() + self.crypto_context = CryptoWSGIContext( + FakeFilter(), 'object', self.fake_logger) + + def test_get_keys(self): + # ok + env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + keys = self.crypto_context.get_keys(env) + self.assertDictEqual(fetch_crypto_keys(), keys) + + # only default required keys are checked + subset_keys = {'object': fetch_crypto_keys()['object']} + env = {CRYPTO_KEY_CALLBACK: lambda: subset_keys} + keys = self.crypto_context.get_keys(env) + self.assertDictEqual(subset_keys, keys) + + # only specified required keys are checked + subset_keys = {'container': fetch_crypto_keys()['container']} + env = {CRYPTO_KEY_CALLBACK: lambda: subset_keys} + keys = self.crypto_context.get_keys(env, required=['container']) + self.assertDictEqual(subset_keys, keys) + + subset_keys = {'object': fetch_crypto_keys()['object'], + 'container': fetch_crypto_keys()['container']} + env = {CRYPTO_KEY_CALLBACK: lambda: subset_keys} + keys = self.crypto_context.get_keys( + env, required=['object', 'container']) + self.assertDictEqual(subset_keys, keys) + + def test_get_keys_missing_callback(self): + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys({}) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn('missing callback', + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + def test_get_keys_callback_exception(self): + def callback(): + raise Exception('boom') + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys({CRYPTO_KEY_CALLBACK: callback}) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn('from callback: boom', + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + def test_get_keys_missing_key_for_default_required_list(self): + bad_keys = dict(fetch_crypto_keys()) + bad_keys.pop('object') + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys( + {CRYPTO_KEY_CALLBACK: lambda: bad_keys}) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn("Missing key for 'object'", + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + def test_get_keys_missing_object_key_for_specified_required_list(self): + bad_keys = dict(fetch_crypto_keys()) + bad_keys.pop('object') + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys( + {CRYPTO_KEY_CALLBACK: lambda: bad_keys}, + required=['object', 'container']) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn("Missing key for 'object'", + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + def test_get_keys_missing_container_key_for_specified_required_list(self): + bad_keys = dict(fetch_crypto_keys()) + bad_keys.pop('container') + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys( + {CRYPTO_KEY_CALLBACK: lambda: bad_keys}, + required=['object', 'container']) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn("Missing key for 'container'", + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + def test_bad_object_key_for_default_required_list(self): + bad_keys = dict(fetch_crypto_keys()) + bad_keys['object'] = 'the minor key' + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys( + {CRYPTO_KEY_CALLBACK: lambda: bad_keys}) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn("Bad key for 'object'", + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + def test_bad_container_key_for_default_required_list(self): + bad_keys = dict(fetch_crypto_keys()) + bad_keys['container'] = 'the major key' + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys( + {CRYPTO_KEY_CALLBACK: lambda: bad_keys}, + required=['object', 'container']) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn("Bad key for 'container'", + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + def test_get_keys_not_a_dict(self): + with self.assertRaises(HTTPException) as cm: + self.crypto_context.get_keys( + {CRYPTO_KEY_CALLBACK: lambda: ['key', 'quay', 'qui']}) + self.assertIn('500 Internal Error', cm.exception.message) + self.assertIn("Did not get a keys dict", + self.fake_logger.get_lines_for_level('error')[0]) + self.assertIn('Unable to retrieve encryption keys.', cm.exception.body) + + +class TestModuleMethods(unittest.TestCase): + meta = {'iv': '0123456789abcdef', 'cipher': 'AES_CTR_256'} + serialized_meta = '%7B%22cipher%22%3A+%22AES_CTR_256%22%2C+%22' \ + 'iv%22%3A+%22MDEyMzQ1Njc4OWFiY2RlZg%3D%3D%22%7D' + + meta_with_key = {'iv': '0123456789abcdef', 'cipher': 'AES_CTR_256', + 'body_key': {'key': 'fedcba9876543210fedcba9876543210', + 'iv': 'fedcba9876543210'}} + serialized_meta_with_key = '%7B%22body_key%22%3A+%7B%22iv%22%3A+%22ZmVkY' \ + '2JhOTg3NjU0MzIxMA%3D%3D%22%2C+%22key%22%3A+%' \ + '22ZmVkY2JhOTg3NjU0MzIxMGZlZGNiYTk4NzY1NDMyMT' \ + 'A%3D%22%7D%2C+%22cipher%22%3A+%22AES_CTR_256' \ + '%22%2C+%22iv%22%3A+%22MDEyMzQ1Njc4OWFiY2RlZg' \ + '%3D%3D%22%7D' + + def test_dump_crypto_meta(self): + actual = crypto_utils.dump_crypto_meta(self.meta) + self.assertEqual(self.serialized_meta, actual) + + actual = crypto_utils.dump_crypto_meta(self.meta_with_key) + self.assertEqual(self.serialized_meta_with_key, actual) + + def test_load_crypto_meta(self): + actual = crypto_utils.load_crypto_meta(self.serialized_meta) + self.assertEqual(self.meta, actual) + + actual = crypto_utils.load_crypto_meta(self.serialized_meta_with_key) + self.assertEqual(self.meta_with_key, actual) + + def assert_raises(value, message): + with self.assertRaises(EncryptionException) as cm: + crypto_utils.load_crypto_meta(value) + self.assertIn('Bad crypto meta %r' % value, cm.exception.message) + self.assertIn(message, cm.exception.message) + + assert_raises(None, 'crypto meta not a string') + assert_raises(99, 'crypto meta not a string') + assert_raises('', 'No JSON object could be decoded') + assert_raises('abc', 'No JSON object could be decoded') + assert_raises('[]', 'crypto meta not a Mapping') + assert_raises('{"iv": "abcdef"}', 'Incorrect padding') + assert_raises('{"iv": []}', 'must be string or buffer') + assert_raises('{"iv": {}}', 'must be string or buffer') + assert_raises('{"iv": 99}', 'must be string or buffer') + assert_raises('{"key": "abcdef"}', 'Incorrect padding') + assert_raises('{"key": []}', 'must be string or buffer') + assert_raises('{"key": {}}', 'must be string or buffer') + assert_raises('{"key": 99}', 'must be string or buffer') + assert_raises('{"body_key": {"iv": "abcdef"}}', 'Incorrect padding') + assert_raises('{"body_key": {"iv": []}}', 'must be string or buffer') + assert_raises('{"body_key": {"iv": {}}}', 'must be string or buffer') + assert_raises('{"body_key": {"iv": 99}}', 'must be string or buffer') + assert_raises('{"body_key": {"key": "abcdef"}}', 'Incorrect padding') + assert_raises('{"body_key": {"key": []}}', 'must be string or buffer') + assert_raises('{"body_key": {"key": {}}}', 'must be string or buffer') + assert_raises('{"body_key": {"key": 99}}', 'must be string or buffer') + + def test_dump_then_load_crypto_meta(self): + actual = crypto_utils.load_crypto_meta( + crypto_utils.dump_crypto_meta(self.meta)) + self.assertEqual(self.meta, actual) + + actual = crypto_utils.load_crypto_meta( + crypto_utils.dump_crypto_meta(self.meta_with_key)) + self.assertEqual(self.meta_with_key, actual) + + def test_append_crypto_meta(self): + actual = crypto_utils.append_crypto_meta('abc', self.meta) + expected = 'abc; swift_meta=%s' % self.serialized_meta + self.assertEqual(actual, expected) + + actual = crypto_utils.append_crypto_meta('abc', self.meta_with_key) + expected = 'abc; swift_meta=%s' % self.serialized_meta_with_key + self.assertEqual(actual, expected) + + def test_extract_crypto_meta(self): + val, meta = crypto_utils.extract_crypto_meta( + 'abc; swift_meta=%s' % self.serialized_meta) + self.assertEqual('abc', val) + self.assertDictEqual(self.meta, meta) + + val, meta = crypto_utils.extract_crypto_meta( + 'abc; swift_meta=%s' % self.serialized_meta_with_key) + self.assertEqual('abc', val) + self.assertDictEqual(self.meta_with_key, meta) + + val, meta = crypto_utils.extract_crypto_meta('abc') + self.assertEqual('abc', val) + self.assertIsNone(meta) + + # other param names will be ignored + val, meta = crypto_utils.extract_crypto_meta('abc; foo=bar') + self.assertEqual('abc', val) + self.assertIsNone(meta) + + def test_append_then_extract_crypto_meta(self): + val = 'abc' + actual = crypto_utils.extract_crypto_meta( + crypto_utils.append_crypto_meta(val, self.meta)) + self.assertEqual((val, self.meta), actual) + + +class TestCrypto(unittest.TestCase): + + def setUp(self): + self.crypto = Crypto({}) + + def test_create_encryption_context(self): + value = 'encrypt me' * 100 # more than one cipher block + key = os.urandom(32) + iv = os.urandom(16) + ctxt = self.crypto.create_encryption_ctxt(key, iv) + expected = Cipher( + algorithms.AES(key), modes.CTR(iv), + backend=default_backend()).encryptor().update(value) + self.assertEqual(expected, ctxt.update(value)) + + for bad_iv in ('a little too long', 'too short'): + self.assertRaises( + ValueError, self.crypto.create_encryption_ctxt, key, bad_iv) + + for bad_key in ('objKey', 'a' * 31, 'a' * 33, 'a' * 16, 'a' * 24): + self.assertRaises( + ValueError, self.crypto.create_encryption_ctxt, bad_key, iv) + + def test_create_decryption_context(self): + value = 'decrypt me' * 100 # more than one cipher block + key = os.urandom(32) + iv = os.urandom(16) + ctxt = self.crypto.create_decryption_ctxt(key, iv, 0) + expected = Cipher( + algorithms.AES(key), modes.CTR(iv), + backend=default_backend()).decryptor().update(value) + self.assertEqual(expected, ctxt.update(value)) + + for bad_iv in ('a little too long', 'too short'): + self.assertRaises( + ValueError, self.crypto.create_decryption_ctxt, key, bad_iv, 0) + + for bad_key in ('objKey', 'a' * 31, 'a' * 33, 'a' * 16, 'a' * 24): + self.assertRaises( + ValueError, self.crypto.create_decryption_ctxt, bad_key, iv, 0) + + with self.assertRaises(ValueError) as cm: + self.crypto.create_decryption_ctxt(key, iv, -1) + self.assertEqual("Offset must not be negative", cm.exception.message) + + def test_enc_dec_small_chunks(self): + self.enc_dec_chunks(['encrypt me', 'because I', 'am sensitive']) + + def test_enc_dec_large_chunks(self): + self.enc_dec_chunks([os.urandom(65536), os.urandom(65536)]) + + def enc_dec_chunks(self, chunks): + key = 'objL7wjV6L79Sfs4y7dy41273l0k6Wki' + iv = self.crypto.create_iv() + enc_ctxt = self.crypto.create_encryption_ctxt(key, iv) + enc_val = [enc_ctxt.update(chunk) for chunk in chunks] + self.assertTrue(''.join(enc_val) != chunks) + dec_ctxt = self.crypto.create_decryption_ctxt(key, iv, 0) + dec_val = [dec_ctxt.update(chunk) for chunk in enc_val] + self.assertEqual(''.join(chunks), ''.join(dec_val), + 'Expected value {%s} but got {%s}' % + (''.join(chunks), ''.join(dec_val))) + + def test_decrypt_range(self): + chunks = ['0123456789abcdef', 'ghijklmnopqrstuv'] + key = 'objL7wjV6L79Sfs4y7dy41273l0k6Wki' + iv = self.crypto.create_iv() + enc_ctxt = self.crypto.create_encryption_ctxt(key, iv) + enc_val = [enc_ctxt.update(chunk) for chunk in chunks] + self.assertTrue(''.join(enc_val) != chunks) + + # Simulate a ranged GET from byte 19 to 32 : 'jklmnopqrstuv' + dec_ctxt = self.crypto.create_decryption_ctxt(key, iv, 19) + ranged_chunks = [enc_val[1][3:]] + dec_val = [dec_ctxt.update(chunk) for chunk in ranged_chunks] + self.assertEqual('jklmnopqrstuv', ''.join(dec_val), + 'Expected value {%s} but got {%s}' % + ('jklmnopqrstuv', ''.join(dec_val))) + + def test_create_decryption_context_non_zero_offset(self): + # Verify that iv increments for each 16 bytes of offset. + # For a ranged GET we pass a non-zero offset so that the decrypter + # counter is incremented to the correct value to start decrypting at + # that offset into the object body. The counter should increment by one + # from the starting IV value for every 16 bytes offset into the object + # body, until it reaches 2^128 -1 when it should wrap to zero. We check + # that is happening by verifying a decrypted value using various + # offsets. + key = 'objL7wjV6L79Sfs4y7dy41273l0k6Wki' + + def do_test(): + for offset, exp_iv in mappings.items(): + dec_ctxt = self.crypto.create_decryption_ctxt(key, iv, offset) + offset_in_block = offset % 16 + cipher = Cipher(algorithms.AES(key), + modes.CTR(exp_iv), + backend=default_backend()) + expected = cipher.decryptor().update( + 'p' * offset_in_block + 'ciphertext') + actual = dec_ctxt.update('ciphertext') + expected = expected[offset % 16:] + self.assertEqual(expected, actual, + 'Expected %r but got %r, iv=%s and offset=%s' + % (expected, actual, iv, offset)) + + iv = '0000000010000000' + mappings = { + 2: '0000000010000000', + 16: '0000000010000001', + 19: '0000000010000001', + 48: '0000000010000003', + 1024: '000000001000000p', + 5119: '000000001000001o' + } + do_test() + + # choose max iv value and test that it wraps to zero + iv = chr(0xff) * 16 + mappings = { + 2: iv, + 16: str(bytearray.fromhex('00' * 16)), # iv wraps to 0 + 19: str(bytearray.fromhex('00' * 16)), + 48: str(bytearray.fromhex('00' * 15 + '02')), + 1024: str(bytearray.fromhex('00' * 15 + '3f')), + 5119: str(bytearray.fromhex('00' * 14 + '013E')) + } + do_test() + + iv = chr(0x0) * 16 + mappings = { + 2: iv, + 16: str(bytearray.fromhex('00' * 15 + '01')), + 19: str(bytearray.fromhex('00' * 15 + '01')), + 48: str(bytearray.fromhex('00' * 15 + '03')), + 1024: str(bytearray.fromhex('00' * 15 + '40')), + 5119: str(bytearray.fromhex('00' * 14 + '013F')) + } + do_test() + + iv = chr(0x0) * 8 + chr(0xff) * 8 + mappings = { + 2: iv, + 16: str(bytearray.fromhex('00' * 7 + '01' + '00' * 8)), + 19: str(bytearray.fromhex('00' * 7 + '01' + '00' * 8)), + 48: str(bytearray.fromhex('00' * 7 + '01' + '00' * 7 + '02')), + 1024: str(bytearray.fromhex('00' * 7 + '01' + '00' * 7 + '3F')), + 5119: str(bytearray.fromhex('00' * 7 + '01' + '00' * 6 + '013E')) + } + do_test() + + def test_check_key(self): + for key in ('objKey', 'a' * 31, 'a' * 33, 'a' * 16, 'a' * 24): + with self.assertRaises(ValueError) as cm: + self.crypto.check_key(key) + self.assertEqual("Key must be length 32 bytes", + cm.exception.message) + + def test_check_crypto_meta(self): + meta = {'cipher': 'AES_CTR_256'} + with self.assertRaises(EncryptionException) as cm: + self.crypto.check_crypto_meta(meta) + self.assertEqual("Bad crypto meta: Missing 'iv'", + cm.exception.message) + + for bad_iv in ('a little too long', 'too short'): + meta['iv'] = bad_iv + with self.assertRaises(EncryptionException) as cm: + self.crypto.check_crypto_meta(meta) + self.assertEqual("Bad crypto meta: IV must be length 16 bytes", + cm.exception.message) + + meta = {'iv': os.urandom(16)} + with self.assertRaises(EncryptionException) as cm: + self.crypto.check_crypto_meta(meta) + self.assertEqual("Bad crypto meta: Missing 'cipher'", + cm.exception.message) + + meta['cipher'] = 'Mystery cipher' + with self.assertRaises(EncryptionException) as cm: + self.crypto.check_crypto_meta(meta) + self.assertEqual("Bad crypto meta: Cipher must be AES_CTR_256", + cm.exception.message) + + def test_create_iv(self): + self.assertEqual(16, len(self.crypto.create_iv())) + # crude check that we get back different values on each call + self.assertNotEqual(self.crypto.create_iv(), self.crypto.create_iv()) + + def test_get_crypto_meta(self): + meta = self.crypto.create_crypto_meta() + self.assertIsInstance(meta, dict) + # this is deliberately brittle so that if new items are added then the + # test will need to be updated + self.assertEqual(2, len(meta)) + self.assertIn('iv', meta) + self.assertEqual(16, len(meta['iv'])) + self.assertIn('cipher', meta) + self.assertEqual('AES_CTR_256', meta['cipher']) + self.crypto.check_crypto_meta(meta) # sanity check + meta2 = self.crypto.create_crypto_meta() + self.assertNotEqual(meta['iv'], meta2['iv']) # crude sanity check + + def test_create_random_key(self): + # crude check that we get unique keys on each call + keys = set() + for i in range(10): + key = self.crypto.create_random_key() + self.assertEqual(32, len(key)) + keys.add(key) + self.assertEqual(10, len(keys)) + + def test_wrap_unwrap_key(self): + wrapping_key = os.urandom(32) + key_to_wrap = os.urandom(32) + iv = os.urandom(16) + with mock.patch( + 'swift.common.middleware.crypto.crypto_utils.Crypto.create_iv', + return_value=iv): + wrapped = self.crypto.wrap_key(wrapping_key, key_to_wrap) + cipher = Cipher(algorithms.AES(wrapping_key), modes.CTR(iv), + backend=default_backend()) + expected = {'key': cipher.encryptor().update(key_to_wrap), + 'iv': iv} + self.assertEqual(expected, wrapped) + + unwrapped = self.crypto.unwrap_key(wrapping_key, wrapped) + self.assertEqual(key_to_wrap, unwrapped) + + def test_unwrap_bad_key(self): + # verify that ValueError is raised if unwrapped key is invalid + wrapping_key = os.urandom(32) + for length in (0, 16, 24, 31, 33): + key_to_wrap = os.urandom(length) + wrapped = self.crypto.wrap_key(wrapping_key, key_to_wrap) + with self.assertRaises(ValueError) as cm: + self.crypto.unwrap_key(wrapping_key, wrapped) + self.assertEqual( + cm.exception.message, 'Key must be length 32 bytes') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/unit/common/middleware/crypto/test_decrypter.py b/test/unit/common/middleware/crypto/test_decrypter.py new file mode 100644 index 0000000000..b70d65029b --- /dev/null +++ b/test/unit/common/middleware/crypto/test_decrypter.py @@ -0,0 +1,1119 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import json +import os +import unittest +from xml.dom import minidom + +import mock + +from swift.common.header_key_dict import HeaderKeyDict +from swift.common.middleware.crypto import decrypter +from swift.common.middleware.crypto.crypto_utils import CRYPTO_KEY_CALLBACK, \ + dump_crypto_meta, Crypto +from swift.common.swob import Request, HTTPException, HTTPOk, \ + HTTPPreconditionFailed, HTTPNotFound, HTTPPartialContent + +from test.unit import FakeLogger +from test.unit.common.middleware.crypto.crypto_helpers import md5hex, \ + fetch_crypto_keys, FAKE_IV, encrypt, fake_get_crypto_meta +from test.unit.common.middleware.helpers import FakeSwift, FakeAppThatExcepts + + +def get_crypto_meta_header(crypto_meta=None): + if crypto_meta is None: + crypto_meta = fake_get_crypto_meta() + return dump_crypto_meta(crypto_meta) + + +def encrypt_and_append_meta(value, key, crypto_meta=None): + return '%s; swift_meta=%s' % ( + base64.b64encode(encrypt(value, key, FAKE_IV)), + get_crypto_meta_header(crypto_meta)) + + +class TestDecrypterObjectRequests(unittest.TestCase): + def setUp(self): + self.app = FakeSwift() + self.decrypter = decrypter.Decrypter(self.app, {}) + self.decrypter.logger = FakeLogger() + + def _make_response_headers(self, content_length, plaintext_etag, keys, + body_key): + # helper method to make a typical set of response headers for a GET or + # HEAD request + cont_key = keys['container'] + object_key = keys['object'] + body_key_meta = {'key': encrypt(body_key, object_key, FAKE_IV), + 'iv': FAKE_IV} + body_crypto_meta = fake_get_crypto_meta(body_key=body_key_meta) + return HeaderKeyDict({ + 'Etag': 'hashOfCiphertext', + 'content-type': 'text/plain', + 'content-length': content_length, + 'X-Object-Sysmeta-Crypto-Etag': '%s; swift_meta=%s' % ( + base64.b64encode(encrypt(plaintext_etag, object_key, FAKE_IV)), + get_crypto_meta_header()), + 'X-Object-Sysmeta-Crypto-Body-Meta': + get_crypto_meta_header(body_crypto_meta), + 'x-object-transient-sysmeta-crypto-meta-test': + base64.b64encode(encrypt('encrypt me', object_key, FAKE_IV)) + + ';swift_meta=' + get_crypto_meta_header(), + 'x-object-sysmeta-container-update-override-etag': + encrypt_and_append_meta('encrypt me, too', cont_key), + 'x-object-sysmeta-test': 'do not encrypt me', + }) + + def _test_request_success(self, method, body): + env = {'REQUEST_METHOD': method, + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + plaintext_etag = md5hex(body) + body_key = os.urandom(32) + enc_body = encrypt(body, body_key, FAKE_IV) + hdrs = self._make_response_headers( + len(enc_body), plaintext_etag, fetch_crypto_keys(), body_key) + + # there shouldn't be any x-object-meta- headers, but if there are + # then the decrypted header will win where there is a name clash... + hdrs.update({ + 'x-object-meta-test': 'unexpected, overwritten by decrypted value', + 'x-object-meta-distinct': 'unexpected but distinct from encrypted' + }) + self.app.register( + method, '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('200 OK', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual('text/plain', resp.headers['Content-Type']) + self.assertEqual('encrypt me', resp.headers['x-object-meta-test']) + self.assertEqual('unexpected but distinct from encrypted', + resp.headers['x-object-meta-distinct']) + self.assertEqual('do not encrypt me', + resp.headers['x-object-sysmeta-test']) + self.assertEqual( + 'encrypt me, too', + resp.headers['X-Object-Sysmeta-Container-Update-Override-Etag']) + self.assertNotIn('X-Object-Sysmeta-Crypto-Body-Meta', resp.headers) + self.assertNotIn('X-Object-Sysmeta-Crypto-Etag', resp.headers) + return resp + + def test_GET_success(self): + body = 'FAKE APP' + resp = self._test_request_success('GET', body) + self.assertEqual(body, resp.body) + + def test_HEAD_success(self): + body = 'FAKE APP' + resp = self._test_request_success('HEAD', body) + self.assertEqual('', resp.body) + + def test_headers_case(self): + body = 'fAkE ApP' + req = Request.blank('/v1/a/c/o', body='FaKe') + req.environ[CRYPTO_KEY_CALLBACK] = fetch_crypto_keys + plaintext_etag = md5hex(body) + body_key = os.urandom(32) + enc_body = encrypt(body, body_key, FAKE_IV) + hdrs = self._make_response_headers( + len(enc_body), plaintext_etag, fetch_crypto_keys(), body_key) + + hdrs.update({ + 'x-Object-mEta-ignoRes-caSe': 'thIs pArt WilL bE cOol', + }) + self.app.register( + 'GET', '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + + status, headers, app_iter = req.call_application(self.decrypter) + self.assertEqual(status, '200 OK') + expected = { + 'Etag': '7f7837924188f7b511a9e3881a9f77a8', + 'X-Object-Sysmeta-Container-Update-Override-Etag': + 'encrypt me, too', + 'X-Object-Meta-Test': 'encrypt me', + 'Content-Length': '8', + 'X-Object-Meta-Ignores-Case': 'thIs pArt WilL bE cOol', + 'X-Object-Sysmeta-Test': 'do not encrypt me', + 'Content-Type': 'text/plain', + } + self.assertEqual(dict(headers), expected) + self.assertEqual('fAkE ApP', ''.join(app_iter)) + + def _test_412_response(self, method): + # simulate a 412 response to a conditional GET which has an Etag header + data = 'the object content' + env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env, method=method) + resp_body = 'I am sorry, you have failed to meet a precondition' + hdrs = self._make_response_headers( + len(resp_body), md5hex(data), fetch_crypto_keys(), 'not used') + self.app.register(method, '/v1/a/c/o', HTTPPreconditionFailed, + body=resp_body, headers=hdrs) + resp = req.get_response(self.decrypter) + + self.assertEqual('412 Precondition Failed', resp.status) + # the response body should not be decrypted, it is already plaintext + self.assertEqual(resp_body if method == 'GET' else '', resp.body) + # whereas the Etag and other headers should be decrypted + self.assertEqual(md5hex(data), resp.headers['Etag']) + self.assertEqual('text/plain', resp.headers['Content-Type']) + self.assertEqual('encrypt me', resp.headers['x-object-meta-test']) + self.assertEqual('do not encrypt me', + resp.headers['x-object-sysmeta-test']) + + def test_GET_412_response(self): + self._test_412_response('GET') + + def test_HEAD_412_response(self): + self._test_412_response('HEAD') + + def _test_404_response(self, method): + # simulate a 404 response, sanity check response headers + env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env, method=method) + resp_body = 'You still have not found what you are looking for' + hdrs = {'content-type': 'text/plain', + 'content-length': len(resp_body)} + self.app.register(method, '/v1/a/c/o', HTTPNotFound, + body=resp_body, headers=hdrs) + resp = req.get_response(self.decrypter) + + self.assertEqual('404 Not Found', resp.status) + # the response body should not be decrypted, it is already plaintext + self.assertEqual(resp_body if method == 'GET' else '', resp.body) + # there should be no etag header inserted by decrypter + self.assertNotIn('Etag', resp.headers) + self.assertEqual('text/plain', resp.headers['Content-Type']) + + def test_GET_404_response(self): + self._test_404_response('GET') + + def test_HEAD_404_response(self): + self._test_404_response('HEAD') + + def test_GET_missing_etag_crypto_meta(self): + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + key = fetch_crypto_keys()['object'] + enc_body = encrypt(body, key, FAKE_IV) + hdrs = self._make_response_headers( + len(body), md5hex(body), fetch_crypto_keys(), 'not used') + # simulate missing crypto meta from encrypted etag + hdrs['X-Object-Sysmeta-Crypto-Etag'] = \ + base64.b64encode(encrypt(md5hex(body), key, FAKE_IV)) + self.app.register('GET', '/v1/a/c/o', HTTPOk, body=enc_body, + headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertIn('Error decrypting header', resp.body) + self.assertIn('Error decrypting header X-Object-Sysmeta-Crypto-Etag', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def _test_override_etag_bad_meta(self, method, bad_crypto_meta): + env = {'REQUEST_METHOD': method, + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + key = fetch_crypto_keys()['object'] + enc_body = encrypt(body, key, FAKE_IV) + hdrs = self._make_response_headers( + len(body), md5hex(body), fetch_crypto_keys(), 'not used') + # simulate missing crypto meta from encrypted override etag + hdrs['X-Object-Sysmeta-Container-Update-Override-Etag'] = \ + encrypt_and_append_meta( + md5hex(body), key, crypto_meta=bad_crypto_meta) + self.app.register(method, '/v1/a/c/o', HTTPOk, body=enc_body, + headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertIn('Error decrypting header ' + 'X-Object-Sysmeta-Container-Update-Override-Etag', + self.decrypter.logger.get_lines_for_level('error')[0]) + return resp + + def test_GET_override_etag_bad_iv(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['iv'] = 'bad_iv' + resp = self._test_override_etag_bad_meta('GET', bad_crypto_meta) + self.assertIn('Error decrypting header', resp.body) + + def test_HEAD_override_etag_bad_iv(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['iv'] = 'bad_iv' + resp = self._test_override_etag_bad_meta('HEAD', bad_crypto_meta) + self.assertEqual('', resp.body) + + def test_GET_override_etag_bad_cipher(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['cipher'] = 'unknown cipher' + resp = self._test_override_etag_bad_meta('GET', bad_crypto_meta) + self.assertIn('Error decrypting header', resp.body) + + def test_HEAD_override_etag_bad_cipher(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['cipher'] = 'unknown cipher' + resp = self._test_override_etag_bad_meta('HEAD', bad_crypto_meta) + self.assertEqual('', resp.body) + + def _test_bad_key(self, method): + # use bad key + def bad_fetch_crypto_keys(): + keys = fetch_crypto_keys() + keys['object'] = 'bad key' + return keys + + env = {'REQUEST_METHOD': method, + CRYPTO_KEY_CALLBACK: bad_fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + key = fetch_crypto_keys()['object'] + enc_body = encrypt(body, key, FAKE_IV) + hdrs = self._make_response_headers( + len(body), md5hex(body), fetch_crypto_keys(), 'not used') + self.app.register(method, '/v1/a/c/o', HTTPOk, body=enc_body, + headers=hdrs) + return req.get_response(self.decrypter) + + def test_HEAD_with_bad_key(self): + resp = self._test_bad_key('HEAD') + self.assertEqual('500 Internal Error', resp.status) + self.assertIn("Bad key for 'object'", + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_bad_key(self): + resp = self._test_bad_key('GET') + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Unable to retrieve encryption keys.', + resp.body) + self.assertIn("Bad key for 'object'", + self.decrypter.logger.get_lines_for_level('error')[0]) + + def _test_bad_crypto_meta_for_user_metadata(self, method, bad_crypto_meta): + # use bad iv for metadata headers + env = {'REQUEST_METHOD': method, + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + key = fetch_crypto_keys()['object'] + enc_body = encrypt(body, key, FAKE_IV) + hdrs = self._make_response_headers( + len(body), md5hex(body), fetch_crypto_keys(), 'not used') + enc_val = base64.b64encode(encrypt('encrypt me', key, FAKE_IV)) + if bad_crypto_meta: + enc_val += ';swift_meta=' + get_crypto_meta_header( + crypto_meta=bad_crypto_meta) + hdrs['x-object-transient-sysmeta-crypto-meta-test'] = enc_val + self.app.register(method, '/v1/a/c/o', HTTPOk, body=enc_body, + headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertIn( + 'Error decrypting header X-Object-Transient-Sysmeta-Crypto-Meta-' + 'Test', self.decrypter.logger.get_lines_for_level('error')[0]) + return resp + + def test_HEAD_with_missing_crypto_meta_for_user_metadata(self): + self._test_bad_crypto_meta_for_user_metadata('HEAD', None) + self.assertIn('Missing crypto meta in value', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_missing_crypto_meta_for_user_metadata(self): + self._test_bad_crypto_meta_for_user_metadata('GET', None) + self.assertIn('Missing crypto meta in value', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_HEAD_with_bad_iv_for_user_metadata(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['iv'] = 'bad_iv' + self._test_bad_crypto_meta_for_user_metadata('HEAD', bad_crypto_meta) + self.assertIn('IV must be length 16', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_HEAD_with_missing_iv_for_user_metadata(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta.pop('iv') + self._test_bad_crypto_meta_for_user_metadata('HEAD', bad_crypto_meta) + self.assertIn( + 'iv', self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_bad_iv_for_user_metadata(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['iv'] = 'bad_iv' + resp = self._test_bad_crypto_meta_for_user_metadata( + 'GET', bad_crypto_meta) + self.assertEqual('Error decrypting header', resp.body) + self.assertIn('IV must be length 16', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_missing_iv_for_user_metadata(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta.pop('iv') + resp = self._test_bad_crypto_meta_for_user_metadata( + 'GET', bad_crypto_meta) + self.assertEqual('Error decrypting header', resp.body) + self.assertIn( + 'iv', self.decrypter.logger.get_lines_for_level('error')[0]) + + def _test_GET_with_bad_crypto_meta_for_object_body(self, bad_crypto_meta): + # use bad iv for object body + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + key = fetch_crypto_keys()['object'] + enc_body = encrypt(body, key, FAKE_IV) + hdrs = self._make_response_headers( + len(body), md5hex(body), fetch_crypto_keys(), 'not used') + hdrs['X-Object-Sysmeta-Crypto-Body-Meta'] = \ + get_crypto_meta_header(crypto_meta=bad_crypto_meta) + self.app.register('GET', '/v1/a/c/o', HTTPOk, body=enc_body, + headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Error decrypting object', resp.body) + self.assertIn('Error decrypting object', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_bad_iv_for_object_body(self): + bad_crypto_meta = fake_get_crypto_meta(key=os.urandom(32)) + bad_crypto_meta['iv'] = 'bad_iv' + self._test_GET_with_bad_crypto_meta_for_object_body(bad_crypto_meta) + self.assertIn('IV must be length 16', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_missing_iv_for_object_body(self): + bad_crypto_meta = fake_get_crypto_meta(key=os.urandom(32)) + bad_crypto_meta.pop('iv') + self._test_GET_with_bad_crypto_meta_for_object_body(bad_crypto_meta) + self.assertIn("Missing 'iv'", + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_bad_body_key_for_object_body(self): + body_key_meta = {'key': 'wrapped too short key', 'iv': FAKE_IV} + bad_crypto_meta = fake_get_crypto_meta(body_key=body_key_meta) + self._test_GET_with_bad_crypto_meta_for_object_body(bad_crypto_meta) + self.assertIn('Key must be length 32', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_with_missing_body_key_for_object_body(self): + bad_crypto_meta = fake_get_crypto_meta() # no key by default + self._test_GET_with_bad_crypto_meta_for_object_body(bad_crypto_meta) + self.assertIn("Missing 'body_key'", + self.decrypter.logger.get_lines_for_level('error')[0]) + + def _test_req_metadata_not_encrypted(self, method): + # check that metadata is not decrypted if it does not have crypto meta; + # testing for case of an unencrypted POST to an object. + env = {'REQUEST_METHOD': method, + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + plaintext_etag = md5hex(body) + body_key = os.urandom(32) + enc_body = encrypt(body, body_key, FAKE_IV) + hdrs = self._make_response_headers( + len(body), plaintext_etag, fetch_crypto_keys(), body_key) + hdrs.pop('x-object-transient-sysmeta-crypto-meta-test') + hdrs['x-object-meta-test'] = 'plaintext not encrypted' + self.app.register( + method, '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('200 OK', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual('text/plain', resp.headers['Content-Type']) + self.assertEqual('plaintext not encrypted', + resp.headers['x-object-meta-test']) + + def test_HEAD_metadata_not_encrypted(self): + self._test_req_metadata_not_encrypted('HEAD') + + def test_GET_metadata_not_encrypted(self): + self._test_req_metadata_not_encrypted('GET') + + def test_GET_unencrypted_data(self): + # testing case of an unencrypted object with encrypted metadata from + # a later POST + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + obj_key = fetch_crypto_keys()['object'] + hdrs = {'Etag': md5hex(body), + 'content-type': 'text/plain', + 'content-length': len(body), + 'x-object-transient-sysmeta-crypto-meta-test': + base64.b64encode(encrypt('encrypt me', obj_key, FAKE_IV)) + + ';swift_meta=' + get_crypto_meta_header(), + 'x-object-sysmeta-test': 'do not encrypt me'} + self.app.register('GET', '/v1/a/c/o', HTTPOk, body=body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual(body, resp.body) + self.assertEqual('200 OK', resp.status) + self.assertEqual(md5hex(body), resp.headers['Etag']) + self.assertEqual('text/plain', resp.headers['Content-Type']) + # POSTed user meta was encrypted + self.assertEqual('encrypt me', resp.headers['x-object-meta-test']) + # PUT sysmeta was not encrypted + self.assertEqual('do not encrypt me', + resp.headers['x-object-sysmeta-test']) + + def test_GET_multiseg(self): + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + chunks = ['some', 'chunks', 'of data'] + body = ''.join(chunks) + plaintext_etag = md5hex(body) + body_key = os.urandom(32) + ctxt = Crypto().create_encryption_ctxt(body_key, FAKE_IV) + enc_body = [encrypt(chunk, ctxt=ctxt) for chunk in chunks] + hdrs = self._make_response_headers( + sum(map(len, enc_body)), plaintext_etag, fetch_crypto_keys(), + body_key) + self.app.register( + 'GET', '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual(body, resp.body) + self.assertEqual('200 OK', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual('text/plain', resp.headers['Content-Type']) + + def test_GET_multiseg_with_range(self): + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + req.headers['Content-Range'] = 'bytes 3-10/17' + chunks = ['0123', '45678', '9abcdef'] + body = ''.join(chunks) + plaintext_etag = md5hex(body) + body_key = os.urandom(32) + ctxt = Crypto().create_encryption_ctxt(body_key, FAKE_IV) + enc_body = [encrypt(chunk, ctxt=ctxt) for chunk in chunks] + enc_body = [enc_body[0][3:], enc_body[1], enc_body[2][:2]] + hdrs = self._make_response_headers( + sum(map(len, enc_body)), plaintext_etag, fetch_crypto_keys(), + body_key) + hdrs['content-range'] = req.headers['Content-Range'] + self.app.register( + 'GET', '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('3456789a', resp.body) + self.assertEqual('200 OK', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual('text/plain', resp.headers['Content-Type']) + + # Force the decrypter context updates to be less than one of our range + # sizes to check that the decrypt context offset is setup correctly with + # offset to first byte of range for first update and then re-used. + # Do mocking here to have the mocked value have effect in the generator + # function. + @mock.patch.object(decrypter, 'DECRYPT_CHUNK_SIZE', 4) + def test_GET_multipart_ciphertext(self): + # build fake multipart response body + body_key = os.urandom(32) + plaintext = 'Cwm fjord veg balks nth pyx quiz' + plaintext_etag = md5hex(plaintext) + ciphertext = encrypt(plaintext, body_key, FAKE_IV) + parts = ((0, 3, 'text/plain'), + (4, 9, 'text/plain; charset=us-ascii'), + (24, 32, 'text/plain')) + length = len(ciphertext) + body = '' + for start, end, ctype in parts: + body += '--multipartboundary\r\n' + body += 'Content-Type: %s\r\n' % ctype + body += 'Content-Range: bytes %s-%s/%s' % (start, end - 1, length) + body += '\r\n\r\n' + ciphertext[start:end] + '\r\n' + body += '--multipartboundary--' + + # register request with fake swift + hdrs = self._make_response_headers( + len(body), plaintext_etag, fetch_crypto_keys(), body_key) + hdrs['content-type'] = \ + 'multipart/byteranges;boundary=multipartboundary' + self.app.register('GET', '/v1/a/c/o', HTTPPartialContent, body=body, + headers=hdrs) + + # issue request + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + resp = req.get_response(self.decrypter) + + self.assertEqual('206 Partial Content', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual(len(body), int(resp.headers['Content-Length'])) + self.assertEqual('multipart/byteranges;boundary=multipartboundary', + resp.headers['Content-Type']) + + # the multipart headers could be re-ordered, so parse response body to + # verify expected content + resp_lines = resp.body.split('\r\n') + resp_lines.reverse() + for start, end, ctype in parts: + self.assertEqual('--multipartboundary', resp_lines.pop()) + expected_header_lines = { + 'Content-Type: %s' % ctype, + 'Content-Range: bytes %s-%s/%s' % (start, end - 1, length)} + resp_header_lines = {resp_lines.pop(), resp_lines.pop()} + self.assertEqual(expected_header_lines, resp_header_lines) + self.assertEqual('', resp_lines.pop()) + self.assertEqual(plaintext[start:end], resp_lines.pop()) + self.assertEqual('--multipartboundary--', resp_lines.pop()) + + # we should have consumed the whole response body + self.assertFalse(resp_lines) + + def test_GET_multipart_content_type(self): + # *just* having multipart content type shouldn't trigger the mime doc + # code path + body_key = os.urandom(32) + plaintext = 'Cwm fjord veg balks nth pyx quiz' + plaintext_etag = md5hex(plaintext) + ciphertext = encrypt(plaintext, body_key, FAKE_IV) + + # register request with fake swift + hdrs = self._make_response_headers( + len(ciphertext), plaintext_etag, fetch_crypto_keys(), body_key) + hdrs['content-type'] = \ + 'multipart/byteranges;boundary=multipartboundary' + self.app.register('GET', '/v1/a/c/o', HTTPOk, body=ciphertext, + headers=hdrs) + + # issue request + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + resp = req.get_response(self.decrypter) + + self.assertEqual('200 OK', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual(len(plaintext), int(resp.headers['Content-Length'])) + self.assertEqual('multipart/byteranges;boundary=multipartboundary', + resp.headers['Content-Type']) + self.assertEqual(plaintext, resp.body) + + def test_GET_multipart_no_body_crypto_meta(self): + # build fake multipart response body + plaintext = 'Cwm fjord veg balks nth pyx quiz' + plaintext_etag = md5hex(plaintext) + parts = ((0, 3, 'text/plain'), + (4, 9, 'text/plain; charset=us-ascii'), + (24, 32, 'text/plain')) + length = len(plaintext) + body = '' + for start, end, ctype in parts: + body += '--multipartboundary\r\n' + body += 'Content-Type: %s\r\n' % ctype + body += 'Content-Range: bytes %s-%s/%s' % (start, end - 1, length) + body += '\r\n\r\n' + plaintext[start:end] + '\r\n' + body += '--multipartboundary--' + + # register request with fake swift + hdrs = { + 'Etag': plaintext_etag, + 'content-type': 'multipart/byteranges;boundary=multipartboundary', + 'content-length': len(body)} + self.app.register('GET', '/v1/a/c/o', HTTPPartialContent, body=body, + headers=hdrs) + + # issue request + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + resp = req.get_response(self.decrypter) + + self.assertEqual('206 Partial Content', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual(len(body), int(resp.headers['Content-Length'])) + self.assertEqual('multipart/byteranges;boundary=multipartboundary', + resp.headers['Content-Type']) + + # the multipart response body should be unchanged + self.assertEqual(body, resp.body) + + def _test_GET_multipart_bad_body_crypto_meta(self, bad_crypto_meta): + # build fake multipart response body + key = fetch_crypto_keys()['object'] + ctxt = Crypto().create_encryption_ctxt(key, FAKE_IV) + plaintext = 'Cwm fjord veg balks nth pyx quiz' + plaintext_etag = md5hex(plaintext) + ciphertext = encrypt(plaintext, ctxt=ctxt) + parts = ((0, 3, 'text/plain'), + (4, 9, 'text/plain; charset=us-ascii'), + (24, 32, 'text/plain')) + length = len(ciphertext) + body = '' + for start, end, ctype in parts: + body += '--multipartboundary\r\n' + body += 'Content-Type: %s\r\n' % ctype + body += 'Content-Range: bytes %s-%s/%s' % (start, end - 1, length) + body += '\r\n\r\n' + ciphertext[start:end] + '\r\n' + body += '--multipartboundary--' + + # register request with fake swift + hdrs = self._make_response_headers( + len(body), plaintext_etag, fetch_crypto_keys(), 'not used') + hdrs['content-type'] = \ + 'multipart/byteranges;boundary=multipartboundary' + hdrs['X-Object-Sysmeta-Crypto-Body-Meta'] = \ + get_crypto_meta_header(bad_crypto_meta) + self.app.register('GET', '/v1/a/c/o', HTTPOk, body=body, headers=hdrs) + + # issue request + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + resp = req.get_response(self.decrypter) + + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Error decrypting object', resp.body) + self.assertIn('Error decrypting object', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_multipart_bad_body_cipher(self): + self._test_GET_multipart_bad_body_crypto_meta( + {'cipher': 'Mystery cipher', 'iv': '1234567887654321'}) + self.assertIn('Cipher must be AES_CTR_256', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_multipart_missing_body_cipher(self): + self._test_GET_multipart_bad_body_crypto_meta( + {'iv': '1234567887654321'}) + self.assertIn('cipher', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_multipart_too_short_body_iv(self): + self._test_GET_multipart_bad_body_crypto_meta( + {'cipher': 'AES_CTR_256', 'iv': 'too short'}) + self.assertIn('IV must be length 16', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_multipart_too_long_body_iv(self): + self._test_GET_multipart_bad_body_crypto_meta( + {'cipher': 'AES_CTR_256', 'iv': 'a little too long'}) + self.assertIn('IV must be length 16', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_multipart_missing_body_iv(self): + self._test_GET_multipart_bad_body_crypto_meta( + {'cipher': 'AES_CTR_256'}) + self.assertIn('iv', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_missing_key_callback(self): + # Do not provide keys, and do not set override flag + env = {'REQUEST_METHOD': 'GET'} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + enc_body = encrypt(body, fetch_crypto_keys()['object'], FAKE_IV) + hdrs = self._make_response_headers( + len(body), md5hex('not the body'), fetch_crypto_keys(), 'not used') + self.app.register( + 'GET', '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Unable to retrieve encryption keys.', + resp.body) + self.assertIn('missing callback', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_error_in_key_callback(self): + def raise_exc(): + raise Exception('Testing') + + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: raise_exc} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + enc_body = encrypt(body, fetch_crypto_keys()['object'], FAKE_IV) + hdrs = self._make_response_headers( + len(body), md5hex(body), fetch_crypto_keys(), 'not used') + self.app.register( + 'GET', '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Unable to retrieve encryption keys.', + resp.body) + self.assertIn('from callback: Testing', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_cipher_mismatch_for_body(self): + # Cipher does not match + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + enc_body = encrypt(body, fetch_crypto_keys()['object'], FAKE_IV) + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['cipher'] = 'unknown_cipher' + hdrs = self._make_response_headers( + len(enc_body), md5hex(body), fetch_crypto_keys(), 'not used') + hdrs['X-Object-Sysmeta-Crypto-Body-Meta'] = \ + get_crypto_meta_header(crypto_meta=bad_crypto_meta) + self.app.register( + 'GET', '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Error decrypting object', resp.body) + self.assertIn('Error decrypting object', + self.decrypter.logger.get_lines_for_level('error')[0]) + self.assertIn('Bad crypto meta: Cipher', + self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_cipher_mismatch_for_metadata(self): + # Cipher does not match + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + key = fetch_crypto_keys()['object'] + enc_body = encrypt(body, key, FAKE_IV) + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['cipher'] = 'unknown_cipher' + hdrs = self._make_response_headers( + len(enc_body), md5hex(body), fetch_crypto_keys(), 'not used') + hdrs.update({'x-object-transient-sysmeta-crypto-meta-test': + base64.b64encode(encrypt('encrypt me', key, FAKE_IV)) + + ';swift_meta=' + + get_crypto_meta_header(crypto_meta=bad_crypto_meta)}) + self.app.register( + 'GET', '/v1/a/c/o', HTTPOk, body=enc_body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Error decrypting header', resp.body) + self.assertIn( + 'Error decrypting header X-Object-Transient-Sysmeta-Crypto-Meta-' + 'Test', self.decrypter.logger.get_lines_for_level('error')[0]) + + def test_GET_decryption_override(self): + # This covers the case of an old un-encrypted object + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys, + 'swift.crypto.override': True} + req = Request.blank('/v1/a/c/o', environ=env) + body = 'FAKE APP' + hdrs = {'Etag': md5hex(body), + 'content-type': 'text/plain', + 'content-length': len(body), + 'x-object-meta-test': 'do not encrypt me', + 'x-object-sysmeta-test': 'do not encrypt me'} + self.app.register('GET', '/v1/a/c/o', HTTPOk, body=body, headers=hdrs) + resp = req.get_response(self.decrypter) + self.assertEqual(body, resp.body) + self.assertEqual('200 OK', resp.status) + self.assertEqual(md5hex(body), resp.headers['Etag']) + self.assertEqual('text/plain', resp.headers['Content-Type']) + self.assertEqual('do not encrypt me', + resp.headers['x-object-meta-test']) + self.assertEqual('do not encrypt me', + resp.headers['x-object-sysmeta-test']) + + +class TestDecrypterContainerRequests(unittest.TestCase): + def setUp(self): + self.app = FakeSwift() + self.decrypter = decrypter.Decrypter(self.app, {}) + self.decrypter.logger = FakeLogger() + + def _make_cont_get_req(self, resp_body, format, override=False, + callback=fetch_crypto_keys): + path = '/v1/a/c' + content_type = 'text/plain' + if format: + path = '%s/?format=%s' % (path, format) + content_type = 'application/' + format + env = {'REQUEST_METHOD': 'GET', + CRYPTO_KEY_CALLBACK: callback} + if override: + env['swift.crypto.override'] = True + req = Request.blank(path, environ=env) + hdrs = {'content-type': content_type} + self.app.register('GET', path, HTTPOk, body=resp_body, headers=hdrs) + return req.get_response(self.decrypter) + + def test_GET_container_success(self): + # no format requested, listing has names only + fake_body = 'testfile1\ntestfile2\n' + calls = [0] + + def wrapped_fetch_crypto_keys(): + calls[0] += 1 + return fetch_crypto_keys() + + resp = self._make_cont_get_req(fake_body, None, + callback=wrapped_fetch_crypto_keys) + + self.assertEqual('200 OK', resp.status) + names = resp.body.split('\n') + self.assertEqual(3, len(names)) + self.assertIn('testfile1', names) + self.assertIn('testfile2', names) + self.assertIn('', names) + self.assertEqual(0, calls[0]) + + def test_GET_container_json(self): + content_type_1 = u'\uF10F\uD20D\uB30B\u9409' + content_type_2 = 'text/plain; param=foo' + pt_etag1 = 'c6e8196d7f0fff6444b90861fe8d609d' + pt_etag2 = 'ac0374ed4d43635f803c82469d0b5a10' + key = fetch_crypto_keys()['container'] + + obj_dict_1 = {"bytes": 16, + "last_modified": "2015-04-14T23:33:06.439040", + "hash": encrypt_and_append_meta( + pt_etag1.encode('utf-8'), key), + "name": "testfile", + "content_type": content_type_1} + + obj_dict_2 = {"bytes": 24, + "last_modified": "2015-04-14T23:33:06.519020", + "hash": encrypt_and_append_meta( + pt_etag2.encode('utf-8'), key), + "name": "testfile2", + "content_type": content_type_2} + + listing = [obj_dict_1, obj_dict_2] + fake_body = json.dumps(listing) + + resp = self._make_cont_get_req(fake_body, 'json') + + self.assertEqual('200 OK', resp.status) + body = resp.body + self.assertEqual(len(body), int(resp.headers['Content-Length'])) + body_json = json.loads(body) + self.assertEqual(2, len(body_json)) + obj_dict_1['hash'] = pt_etag1 + self.assertDictEqual(obj_dict_1, body_json[0]) + obj_dict_2['hash'] = pt_etag2 + self.assertDictEqual(obj_dict_2, body_json[1]) + + def test_GET_container_json_with_crypto_override(self): + content_type_1 = 'image/jpeg' + content_type_2 = 'text/plain; param=foo' + pt_etag1 = 'c6e8196d7f0fff6444b90861fe8d609d' + pt_etag2 = 'ac0374ed4d43635f803c82469d0b5a10' + + obj_dict_1 = {"bytes": 16, + "last_modified": "2015-04-14T23:33:06.439040", + "hash": pt_etag1, + "name": "testfile", + "content_type": content_type_1} + + obj_dict_2 = {"bytes": 24, + "last_modified": "2015-04-14T23:33:06.519020", + "hash": pt_etag2, + "name": "testfile2", + "content_type": content_type_2} + + listing = [obj_dict_1, obj_dict_2] + fake_body = json.dumps(listing) + + resp = self._make_cont_get_req(fake_body, 'json', override=True) + + self.assertEqual('200 OK', resp.status) + body = resp.body + self.assertEqual(len(body), int(resp.headers['Content-Length'])) + body_json = json.loads(body) + self.assertEqual(2, len(body_json)) + self.assertDictEqual(obj_dict_1, body_json[0]) + self.assertDictEqual(obj_dict_2, body_json[1]) + + def test_cont_get_json_req_with_cipher_mismatch(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['cipher'] = 'unknown_cipher' + key = fetch_crypto_keys()['container'] + pt_etag = 'c6e8196d7f0fff6444b90861fe8d609d' + ct_etag = encrypt_and_append_meta(pt_etag, key, + crypto_meta=bad_crypto_meta) + + obj_dict_1 = {"bytes": 16, + "last_modified": "2015-04-14T23:33:06.439040", + "hash": ct_etag, + "name": "testfile", + "content_type": "image/jpeg"} + + listing = [obj_dict_1] + fake_body = json.dumps(listing) + + resp = self._make_cont_get_req(fake_body, 'json') + + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Error decrypting container listing', resp.body) + self.assertIn("Cipher must be AES_CTR_256", + self.decrypter.logger.get_lines_for_level('error')[0]) + + def _assert_element_contains_dict(self, expected, element): + for k, v in expected.items(): + entry = element.getElementsByTagName(k) + self.assertIsNotNone(entry, 'Key %s not found' % k) + actual = entry[0].childNodes[0].nodeValue + self.assertEqual(v, actual, + "Expected %s but got %s for key %s" + % (v, actual, k)) + + def test_GET_container_xml(self): + content_type_1 = u'\uF10F\uD20D\uB30B\u9409' + content_type_2 = 'text/plain; param=foo' + pt_etag1 = 'c6e8196d7f0fff6444b90861fe8d609d' + pt_etag2 = 'ac0374ed4d43635f803c82469d0b5a10' + key = fetch_crypto_keys()['container'] + + fake_body = ''' +\ +\ +''' + encrypt_and_append_meta(pt_etag1.encode('utf8'), key) + '''\ +\ +''' + content_type_1 + '''\ +testfile16\ +2015-04-19T02:37:39.601660\ +\ +''' + encrypt_and_append_meta(pt_etag2.encode('utf8'), key) + '''\ +\ +''' + content_type_2 + '''\ +testfile224\ +2015-04-19T02:37:39.684740\ +''' + + resp = self._make_cont_get_req(fake_body, 'xml') + self.assertEqual('200 OK', resp.status) + body = resp.body + self.assertEqual(len(body), int(resp.headers['Content-Length'])) + + tree = minidom.parseString(body) + containers = tree.getElementsByTagName('container') + self.assertEqual(1, len(containers)) + self.assertEqual('testc', + containers[0].attributes.getNamedItem("name").value) + + objs = tree.getElementsByTagName('object') + self.assertEqual(2, len(objs)) + + obj_dict_1 = {"bytes": "16", + "last_modified": "2015-04-19T02:37:39.601660", + "hash": pt_etag1, + "name": "testfile", + "content_type": content_type_1} + self._assert_element_contains_dict(obj_dict_1, objs[0]) + obj_dict_2 = {"bytes": "24", + "last_modified": "2015-04-19T02:37:39.684740", + "hash": pt_etag2, + "name": "testfile2", + "content_type": content_type_2} + self._assert_element_contains_dict(obj_dict_2, objs[1]) + + def test_GET_container_xml_with_crypto_override(self): + content_type_1 = 'image/jpeg' + content_type_2 = 'text/plain; param=foo' + + fake_body = ''' +\ +c6e8196d7f0fff6444b90861fe8d609d\ +''' + content_type_1 + '''\ +testfile16\ +2015-04-19T02:37:39.601660\ +ac0374ed4d43635f803c82469d0b5a10\ +''' + content_type_2 + '''\ +testfile224\ +2015-04-19T02:37:39.684740\ +''' + + resp = self._make_cont_get_req(fake_body, 'xml', override=True) + + self.assertEqual('200 OK', resp.status) + body = resp.body + self.assertEqual(len(body), int(resp.headers['Content-Length'])) + + tree = minidom.parseString(body) + containers = tree.getElementsByTagName('container') + self.assertEqual(1, len(containers)) + self.assertEqual('testc', + containers[0].attributes.getNamedItem("name").value) + + objs = tree.getElementsByTagName('object') + self.assertEqual(2, len(objs)) + + obj_dict_1 = {"bytes": "16", + "last_modified": "2015-04-19T02:37:39.601660", + "hash": "c6e8196d7f0fff6444b90861fe8d609d", + "name": "testfile", + "content_type": content_type_1} + self._assert_element_contains_dict(obj_dict_1, objs[0]) + obj_dict_2 = {"bytes": "24", + "last_modified": "2015-04-19T02:37:39.684740", + "hash": "ac0374ed4d43635f803c82469d0b5a10", + "name": "testfile2", + "content_type": content_type_2} + self._assert_element_contains_dict(obj_dict_2, objs[1]) + + def test_cont_get_xml_req_with_cipher_mismatch(self): + bad_crypto_meta = fake_get_crypto_meta() + bad_crypto_meta['cipher'] = 'unknown_cipher' + + fake_body = ''' +\ +''' + encrypt_and_append_meta('c6e8196d7f0fff6444b90861fe8d609d', + fetch_crypto_keys()['container'], + crypto_meta=bad_crypto_meta) + '''\ +\ +image/jpeg\ +testfile16\ +2015-04-19T02:37:39.601660\ +''' + + resp = self._make_cont_get_req(fake_body, 'xml') + + self.assertEqual('500 Internal Error', resp.status) + self.assertEqual('Error decrypting container listing', resp.body) + self.assertIn("Cipher must be AES_CTR_256", + self.decrypter.logger.get_lines_for_level('error')[0]) + + +class TestModuleMethods(unittest.TestCase): + def test_purge_crypto_sysmeta_headers(self): + retained_headers = {'x-object-sysmeta-test1': 'keep', + 'x-object-meta-test2': 'retain', + 'x-object-transient-sysmeta-test3': 'leave intact', + 'etag': 'hold onto', + 'x-other': 'cherish', + 'x-object-not-meta': 'do not remove'} + purged_headers = {'x-object-sysmeta-crypto-test1': 'remove', + 'x-object-transient-sysmeta-crypto-test2': 'purge'} + test_headers = retained_headers.copy() + test_headers.update(purged_headers) + actual = decrypter.purge_crypto_sysmeta_headers(test_headers.items()) + + for k, v in actual: + k = k.lower() + self.assertNotIn(k, purged_headers) + self.assertEqual(retained_headers[k], v) + retained_headers.pop(k) + self.assertFalse(retained_headers) + + +class TestDecrypter(unittest.TestCase): + def test_app_exception(self): + app = decrypter.Decrypter(FakeAppThatExcepts(HTTPException), {}) + req = Request.blank('/', environ={'REQUEST_METHOD': 'GET'}) + with self.assertRaises(HTTPException) as catcher: + req.get_response(app) + self.assertEqual(FakeAppThatExcepts.MESSAGE, catcher.exception.body) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/unit/common/middleware/crypto/test_encrypter.py b/test/unit/common/middleware/crypto/test_encrypter.py new file mode 100644 index 0000000000..0f9553cad7 --- /dev/null +++ b/test/unit/common/middleware/crypto/test_encrypter.py @@ -0,0 +1,820 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import hashlib +import hmac +import json +import os +import unittest +import urllib + +import mock + +from swift.common.middleware.crypto import encrypter +from swift.common.middleware.crypto.crypto_utils import ( + CRYPTO_KEY_CALLBACK, Crypto) +from swift.common.swob import ( + Request, HTTPException, HTTPCreated, HTTPAccepted, HTTPOk, HTTPBadRequest) +from swift.common.utils import FileLikeIter + +from test.unit import FakeLogger, EMPTY_ETAG +from test.unit.common.middleware.crypto.crypto_helpers import ( + fetch_crypto_keys, md5hex, FAKE_IV, encrypt) +from test.unit.common.middleware.helpers import FakeSwift, FakeAppThatExcepts + + +@mock.patch('swift.common.middleware.crypto.crypto_utils.Crypto.create_iv', + lambda *args: FAKE_IV) +class TestEncrypter(unittest.TestCase): + def setUp(self): + self.app = FakeSwift() + self.encrypter = encrypter.Encrypter(self.app, {}) + self.encrypter.logger = FakeLogger() + + def _verify_user_metadata(self, req_hdrs, name, value, key): + # verify encrypted version of user metadata + self.assertNotIn('X-Object-Meta-' + name, req_hdrs) + expected_hdr = 'X-Object-Transient-Sysmeta-Crypto-Meta-' + name + self.assertIn(expected_hdr, req_hdrs) + enc_val, param = req_hdrs[expected_hdr].split(';') + param = param.strip() + self.assertTrue(param.startswith('swift_meta=')) + actual_meta = json.loads( + urllib.unquote_plus(param[len('swift_meta='):])) + self.assertEqual(Crypto.cipher, actual_meta['cipher']) + meta_iv = base64.b64decode(actual_meta['iv']) + self.assertEqual(FAKE_IV, meta_iv) + self.assertEqual( + base64.b64encode(encrypt(value, key, meta_iv)), + enc_val) + # if there is any encrypted user metadata then this header should exist + self.assertIn('X-Object-Transient-Sysmeta-Crypto-Meta', req_hdrs) + common_meta = json.loads(urllib.unquote_plus( + req_hdrs['X-Object-Transient-Sysmeta-Crypto-Meta'])) + self.assertDictEqual({'cipher': Crypto.cipher, + 'key_id': {'v': 'fake', 'path': '/a/c/fake'}}, + common_meta) + + def test_PUT_req(self): + body_key = os.urandom(32) + object_key = fetch_crypto_keys()['object'] + plaintext = 'FAKE APP' + plaintext_etag = md5hex(plaintext) + ciphertext = encrypt(plaintext, body_key, FAKE_IV) + ciphertext_etag = md5hex(ciphertext) + + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + hdrs = {'etag': plaintext_etag, + 'content-type': 'text/plain', + 'content-length': str(len(plaintext)), + 'x-object-meta-etag': 'not to be confused with the Etag!', + 'x-object-meta-test': 'encrypt me', + 'x-object-sysmeta-test': 'do not encrypt me'} + req = Request.blank( + '/v1/a/c/o', environ=env, body=plaintext, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + with mock.patch( + 'swift.common.middleware.crypto.crypto_utils.' + 'Crypto.create_random_key', + return_value=body_key): + resp = req.get_response(self.encrypter) + self.assertEqual('201 Created', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + + # verify metadata items + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual('PUT', self.app.calls[0][0]) + req_hdrs = self.app.headers[0] + + # verify body crypto meta + actual = req_hdrs['X-Object-Sysmeta-Crypto-Body-Meta'] + actual = json.loads(urllib.unquote_plus(actual)) + self.assertEqual(Crypto().cipher, actual['cipher']) + self.assertEqual(FAKE_IV, base64.b64decode(actual['iv'])) + + # verify wrapped body key + expected_wrapped_key = encrypt(body_key, object_key, FAKE_IV) + self.assertEqual(expected_wrapped_key, + base64.b64decode(actual['body_key']['key'])) + self.assertEqual(FAKE_IV, + base64.b64decode(actual['body_key']['iv'])) + self.assertEqual(fetch_crypto_keys()['id'], actual['key_id']) + + # verify etag + self.assertEqual(ciphertext_etag, req_hdrs['Etag']) + + encrypted_etag, _junk, etag_meta = \ + req_hdrs['X-Object-Sysmeta-Crypto-Etag'].partition('; swift_meta=') + # verify crypto_meta was appended to this etag + self.assertTrue(etag_meta) + actual_meta = json.loads(urllib.unquote_plus(etag_meta)) + self.assertEqual(Crypto().cipher, actual_meta['cipher']) + + # verify encrypted version of plaintext etag + actual = base64.b64decode(encrypted_etag) + etag_iv = base64.b64decode(actual_meta['iv']) + enc_etag = encrypt(plaintext_etag, object_key, etag_iv) + self.assertEqual(enc_etag, actual) + + # verify etag MAC for conditional requests + actual_hmac = base64.b64decode( + req_hdrs['X-Object-Sysmeta-Crypto-Etag-Mac']) + self.assertEqual(actual_hmac, hmac.new( + object_key, plaintext_etag, hashlib.sha256).digest()) + + # verify encrypted etag for container update + self.assertIn( + 'X-Object-Sysmeta-Container-Update-Override-Etag', req_hdrs) + parts = req_hdrs[ + 'X-Object-Sysmeta-Container-Update-Override-Etag'].rsplit(';', 1) + self.assertEqual(2, len(parts)) + + # extract crypto_meta from end of etag for container update + param = parts[1].strip() + crypto_meta_tag = 'swift_meta=' + self.assertTrue(param.startswith(crypto_meta_tag), param) + actual_meta = json.loads( + urllib.unquote_plus(param[len(crypto_meta_tag):])) + self.assertEqual(Crypto().cipher, actual_meta['cipher']) + self.assertEqual(fetch_crypto_keys()['id'], actual_meta['key_id']) + + cont_key = fetch_crypto_keys()['container'] + cont_etag_iv = base64.b64decode(actual_meta['iv']) + self.assertEqual(FAKE_IV, cont_etag_iv) + self.assertEqual(encrypt(plaintext_etag, cont_key, cont_etag_iv), + base64.b64decode(parts[0])) + + # content-type is not encrypted + self.assertEqual('text/plain', req_hdrs['Content-Type']) + + # user meta is encrypted + self._verify_user_metadata(req_hdrs, 'Test', 'encrypt me', object_key) + self._verify_user_metadata( + req_hdrs, 'Etag', 'not to be confused with the Etag!', object_key) + + # sysmeta is not encrypted + self.assertEqual('do not encrypt me', + req_hdrs['X-Object-Sysmeta-Test']) + + # verify object is encrypted by getting direct from the app + get_req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + resp = get_req.get_response(self.app) + self.assertEqual(ciphertext, resp.body) + self.assertEqual(ciphertext_etag, resp.headers['Etag']) + + def test_PUT_zero_size_object(self): + # object body encryption should be skipped for zero sized object body + object_key = fetch_crypto_keys()['object'] + plaintext_etag = EMPTY_ETAG + + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + hdrs = {'etag': EMPTY_ETAG, + 'content-type': 'text/plain', + 'content-length': '0', + 'x-object-meta-etag': 'not to be confused with the Etag!', + 'x-object-meta-test': 'encrypt me', + 'x-object-sysmeta-test': 'do not encrypt me'} + req = Request.blank( + '/v1/a/c/o', environ=env, body='', headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + + resp = req.get_response(self.encrypter) + + self.assertEqual('201 Created', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual('PUT', self.app.calls[0][0]) + req_hdrs = self.app.headers[0] + + # verify that there is no body crypto meta + self.assertNotIn('X-Object-Sysmeta-Crypto-Meta', req_hdrs) + # verify etag is md5 of plaintext + self.assertEqual(EMPTY_ETAG, req_hdrs['Etag']) + # verify there is no etag crypto meta + self.assertNotIn('X-Object-Sysmeta-Crypto-Etag', req_hdrs) + # verify there is no container update override for etag + self.assertNotIn( + 'X-Object-Sysmeta-Container-Update-Override-Etag', req_hdrs) + + # user meta is still encrypted + self._verify_user_metadata(req_hdrs, 'Test', 'encrypt me', object_key) + self._verify_user_metadata( + req_hdrs, 'Etag', 'not to be confused with the Etag!', object_key) + + # sysmeta is not encrypted + self.assertEqual('do not encrypt me', + req_hdrs['X-Object-Sysmeta-Test']) + + # verify object is empty by getting direct from the app + get_req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + resp = get_req.get_response(self.app) + self.assertEqual('', resp.body) + self.assertEqual(EMPTY_ETAG, resp.headers['Etag']) + + def test_PUT_with_other_footers(self): + # verify handling of another middleware's footer callback + cont_key = fetch_crypto_keys()['container'] + body_key = os.urandom(32) + object_key = fetch_crypto_keys()['object'] + plaintext = 'FAKE APP' + plaintext_etag = md5hex(plaintext) + ciphertext = encrypt(plaintext, body_key, FAKE_IV) + ciphertext_etag = md5hex(ciphertext) + other_footers = { + 'Etag': plaintext_etag, + 'X-Object-Sysmeta-Other': 'other sysmeta', + 'X-Object-Sysmeta-Container-Update-Override-Size': + 'other override', + 'X-Object-Sysmeta-Container-Update-Override-Etag': + 'final etag'} + + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys, + 'swift.callback.update_footers': + lambda footers: footers.update(other_footers)} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(plaintext)), + 'Etag': 'correct etag is in footers'} + req = Request.blank( + '/v1/a/c/o', environ=env, body=plaintext, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + + with mock.patch( + 'swift.common.middleware.crypto.crypto_utils.' + 'Crypto.create_random_key', + lambda *args: body_key): + resp = req.get_response(self.encrypter) + + self.assertEqual('201 Created', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + + # verify metadata items + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual('PUT', self.app.calls[0][0]) + req_hdrs = self.app.headers[0] + + # verify that other middleware's footers made it to app, including any + # container update overrides but nothing Etag-related + other_footers.pop('Etag') + other_footers.pop('X-Object-Sysmeta-Container-Update-Override-Etag') + for k, v in other_footers.items(): + self.assertEqual(v, req_hdrs[k]) + + # verify encryption footers are ok + encrypted_etag, _junk, etag_meta = \ + req_hdrs['X-Object-Sysmeta-Crypto-Etag'].partition('; swift_meta=') + self.assertTrue(etag_meta) + actual_meta = json.loads(urllib.unquote_plus(etag_meta)) + self.assertEqual(Crypto().cipher, actual_meta['cipher']) + + self.assertEqual(ciphertext_etag, req_hdrs['Etag']) + actual = base64.b64decode(encrypted_etag) + etag_iv = base64.b64decode(actual_meta['iv']) + self.assertEqual(encrypt(plaintext_etag, object_key, etag_iv), actual) + + # verify encrypted etag for container update + self.assertIn( + 'X-Object-Sysmeta-Container-Update-Override-Etag', req_hdrs) + parts = req_hdrs[ + 'X-Object-Sysmeta-Container-Update-Override-Etag'].rsplit(';', 1) + self.assertEqual(2, len(parts)) + + # extract crypto_meta from end of etag for container update + param = parts[1].strip() + crypto_meta_tag = 'swift_meta=' + self.assertTrue(param.startswith(crypto_meta_tag), param) + actual_meta = json.loads( + urllib.unquote_plus(param[len(crypto_meta_tag):])) + self.assertEqual(Crypto().cipher, actual_meta['cipher']) + + cont_key = fetch_crypto_keys()['container'] + cont_etag_iv = base64.b64decode(actual_meta['iv']) + self.assertEqual(FAKE_IV, cont_etag_iv) + self.assertEqual(encrypt('final etag', cont_key, cont_etag_iv), + base64.b64decode(parts[0])) + + # verify body crypto meta + actual = req_hdrs['X-Object-Sysmeta-Crypto-Body-Meta'] + actual = json.loads(urllib.unquote_plus(actual)) + self.assertEqual(Crypto().cipher, actual['cipher']) + self.assertEqual(FAKE_IV, base64.b64decode(actual['iv'])) + + # verify wrapped body key + expected_wrapped_key = encrypt(body_key, object_key, FAKE_IV) + self.assertEqual(expected_wrapped_key, + base64.b64decode(actual['body_key']['key'])) + self.assertEqual(FAKE_IV, + base64.b64decode(actual['body_key']['iv'])) + self.assertEqual(fetch_crypto_keys()['id'], actual['key_id']) + + def test_PUT_with_etag_override_in_headers(self): + # verify handling of another middleware's + # container-update-override-etag in headers + plaintext = 'FAKE APP' + plaintext_etag = md5hex(plaintext) + + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(plaintext)), + 'Etag': plaintext_etag, + 'X-Object-Sysmeta-Container-Update-Override-Etag': + 'final etag'} + req = Request.blank( + '/v1/a/c/o', environ=env, body=plaintext, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + resp = req.get_response(self.encrypter) + + self.assertEqual('201 Created', resp.status) + self.assertEqual(plaintext_etag, resp.headers['Etag']) + + # verify metadata items + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual(('PUT', '/v1/a/c/o'), self.app.calls[0]) + req_hdrs = self.app.headers[0] + + # verify encrypted etag for container update + self.assertIn( + 'X-Object-Sysmeta-Container-Update-Override-Etag', req_hdrs) + parts = req_hdrs[ + 'X-Object-Sysmeta-Container-Update-Override-Etag'].rsplit(';', 1) + self.assertEqual(2, len(parts)) + cont_key = fetch_crypto_keys()['container'] + + # extract crypto_meta from end of etag for container update + param = parts[1].strip() + crypto_meta_tag = 'swift_meta=' + self.assertTrue(param.startswith(crypto_meta_tag), param) + actual_meta = json.loads( + urllib.unquote_plus(param[len(crypto_meta_tag):])) + self.assertEqual(Crypto().cipher, actual_meta['cipher']) + self.assertEqual(fetch_crypto_keys()['id'], actual_meta['key_id']) + + cont_etag_iv = base64.b64decode(actual_meta['iv']) + self.assertEqual(FAKE_IV, cont_etag_iv) + self.assertEqual(encrypt('final etag', cont_key, cont_etag_iv), + base64.b64decode(parts[0])) + + def test_PUT_with_bad_etag_in_other_footers(self): + # verify that etag supplied in footers from other middleware overrides + # header etag when validating inbound plaintext etags + plaintext = 'FAKE APP' + plaintext_etag = md5hex(plaintext) + other_footers = { + 'Etag': 'bad etag', + 'X-Object-Sysmeta-Other': 'other sysmeta', + 'X-Object-Sysmeta-Container-Update-Override-Etag': + 'other override'} + + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys, + 'swift.callback.update_footers': + lambda footers: footers.update(other_footers)} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(plaintext)), + 'Etag': plaintext_etag} + req = Request.blank( + '/v1/a/c/o', environ=env, body=plaintext, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('422 Unprocessable Entity', resp.status) + self.assertNotIn('Etag', resp.headers) + + def test_PUT_with_bad_etag_in_headers_and_other_footers(self): + # verify that etag supplied in headers from other middleware is used if + # none is supplied in footers when validating inbound plaintext etags + plaintext = 'FAKE APP' + other_footers = { + 'X-Object-Sysmeta-Other': 'other sysmeta', + 'X-Object-Sysmeta-Container-Update-Override-Etag': + 'other override'} + + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys, + 'swift.callback.update_footers': + lambda footers: footers.update(other_footers)} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(plaintext)), + 'Etag': 'bad etag'} + req = Request.blank( + '/v1/a/c/o', environ=env, body=plaintext, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('422 Unprocessable Entity', resp.status) + self.assertNotIn('Etag', resp.headers) + + def test_PUT_nothing_read(self): + # simulate an artificial scenario of a downstream filter/app not + # actually reading the input stream from encrypter. + class NonReadingApp(object): + def __call__(self, env, start_response): + # note: no read from wsgi.input + req = Request(env) + env['swift.callback.update_footers'](req.headers) + call_headers.append(req.headers) + resp = HTTPCreated(req=req, headers={'Etag': 'response etag'}) + return resp(env, start_response) + + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + hdrs = {'content-type': 'text/plain', + 'content-length': 0, + 'etag': 'etag from client'} + req = Request.blank('/v1/a/c/o', environ=env, body='', headers=hdrs) + + call_headers = [] + resp = req.get_response(encrypter.Encrypter(NonReadingApp(), {})) + self.assertEqual('201 Created', resp.status) + self.assertEqual('response etag', resp.headers['Etag']) + self.assertEqual(1, len(call_headers)) + self.assertEqual('etag from client', call_headers[0]['etag']) + # verify no encryption footers + for k in call_headers[0]: + self.assertFalse(k.lower().startswith('x-object-sysmeta-crypto-')) + + # check that an upstream footer callback gets called + other_footers = { + 'Etag': 'other etag', + 'X-Object-Sysmeta-Other': 'other sysmeta', + 'X-Backend-Container-Update-Override-Etag': 'other override'} + env.update({'swift.callback.update_footers': + lambda footers: footers.update(other_footers)}) + req = Request.blank('/v1/a/c/o', environ=env, body='', headers=hdrs) + + call_headers = [] + resp = req.get_response(encrypter.Encrypter(NonReadingApp(), {})) + + self.assertEqual('201 Created', resp.status) + self.assertEqual('response etag', resp.headers['Etag']) + self.assertEqual(1, len(call_headers)) + # verify that other middleware's footers made it to app + for k, v in other_footers.items(): + self.assertEqual(v, call_headers[0][k]) + # verify no encryption footers + for k in call_headers[0]: + self.assertFalse(k.lower().startswith('x-object-sysmeta-crypto-')) + + def test_POST_req(self): + body = 'FAKE APP' + env = {'REQUEST_METHOD': 'POST', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + hdrs = {'x-object-meta-test': 'encrypt me', + 'x-object-sysmeta-test': 'do not encrypt me'} + req = Request.blank('/v1/a/c/o', environ=env, body=body, headers=hdrs) + key = fetch_crypto_keys()['object'] + self.app.register('POST', '/v1/a/c/o', HTTPAccepted, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('202 Accepted', resp.status) + self.assertNotIn('Etag', resp.headers) + + # verify metadata items + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual('POST', self.app.calls[0][0]) + req_hdrs = self.app.headers[0] + + # user meta is encrypted + self._verify_user_metadata(req_hdrs, 'Test', 'encrypt me', key) + + # sysmeta is not encrypted + self.assertEqual('do not encrypt me', + req_hdrs['X-Object-Sysmeta-Test']) + + def _test_no_user_metadata(self, method): + # verify that x-object-transient-sysmeta-crypto-meta is not set when + # there is no user metadata + env = {'REQUEST_METHOD': method, + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank('/v1/a/c/o', environ=env, body='body') + self.app.register(method, '/v1/a/c/o', HTTPAccepted, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('202 Accepted', resp.status) + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual(method, self.app.calls[0][0]) + self.assertNotIn('x-object-transient-sysmeta-crypto-meta', + self.app.headers[0]) + + def test_PUT_no_user_metadata(self): + self._test_no_user_metadata('PUT') + + def test_POST_no_user_metadata(self): + self._test_no_user_metadata('POST') + + def _test_if_match(self, method, match_header_name): + def do_test(method, plain_etags, expected_plain_etags=None): + env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + match_header_value = ', '.join(plain_etags) + req = Request.blank( + '/v1/a/c/o', environ=env, method=method, + headers={match_header_name: match_header_value}) + app = FakeSwift() + app.register(method, '/v1/a/c/o', HTTPOk, {}) + resp = req.get_response(encrypter.Encrypter(app, {})) + self.assertEqual('200 OK', resp.status) + + self.assertEqual(1, len(app.calls), app.calls) + self.assertEqual(method, app.calls[0][0]) + actual_headers = app.headers[0] + + # verify the alternate etag location has been specified + if match_header_value and match_header_value != '*': + self.assertIn('X-Backend-Etag-Is-At', actual_headers) + self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', + actual_headers['X-Backend-Etag-Is-At']) + + # verify etags have been supplemented with masked values + self.assertIn(match_header_name, actual_headers) + actual_etags = set(actual_headers[match_header_name].split(', ')) + key = fetch_crypto_keys()['object'] + masked_etags = [ + '"%s"' % base64.b64encode(hmac.new( + key, etag.strip('"'), hashlib.sha256).digest()) + for etag in plain_etags if etag not in ('*', '')] + expected_etags = set((expected_plain_etags or plain_etags) + + masked_etags) + self.assertEqual(expected_etags, actual_etags) + # check that the request environ was returned to original state + self.assertEqual(set(plain_etags), + set(req.headers[match_header_name].split(', '))) + + do_test(method, ['']) + do_test(method, ['"an etag"']) + do_test(method, ['"an etag"', '"another_etag"']) + do_test(method, ['*']) + # rfc2616 does not allow wildcard *and* etag but test it anyway + do_test(method, ['*', '"an etag"']) + # etags should be quoted but check we can cope if they are not + do_test( + method, ['*', 'an etag', 'another_etag'], + expected_plain_etags=['*', '"an etag"', '"another_etag"']) + + def test_GET_if_match(self): + self._test_if_match('GET', 'If-Match') + + def test_HEAD_if_match(self): + self._test_if_match('HEAD', 'If-Match') + + def test_GET_if_none_match(self): + self._test_if_match('GET', 'If-None-Match') + + def test_HEAD_if_none_match(self): + self._test_if_match('HEAD', 'If-None-Match') + + def _test_existing_etag_is_at_header(self, method, match_header_name): + # if another middleware has already set X-Backend-Etag-Is-At then + # encrypter should not override that value + env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank( + '/v1/a/c/o', environ=env, method=method, + headers={match_header_name: "an etag", + 'X-Backend-Etag-Is-At': 'X-Object-Sysmeta-Other-Etag'}) + self.app.register(method, '/v1/a/c/o', HTTPOk, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('200 OK', resp.status) + + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual(method, self.app.calls[0][0]) + actual_headers = self.app.headers[0] + self.assertIn('X-Backend-Etag-Is-At', actual_headers) + self.assertEqual( + 'X-Object-Sysmeta-Other-Etag,X-Object-Sysmeta-Crypto-Etag-Mac', + actual_headers['X-Backend-Etag-Is-At']) + actual_etags = set(actual_headers[match_header_name].split(', ')) + self.assertIn('"an etag"', actual_etags) + + def test_GET_if_match_with_existing_etag_is_at_header(self): + self._test_existing_etag_is_at_header('GET', 'If-Match') + + def test_HEAD_if_match_with_existing_etag_is_at_header(self): + self._test_existing_etag_is_at_header('HEAD', 'If-Match') + + def test_GET_if_none_match_with_existing_etag_is_at_header(self): + self._test_existing_etag_is_at_header('GET', 'If-None-Match') + + def test_HEAD_if_none_match_with_existing_etag_is_at_header(self): + self._test_existing_etag_is_at_header('HEAD', 'If-None-Match') + + def _test_etag_is_at_not_duplicated(self, method): + # verify only one occurrence of X-Object-Sysmeta-Crypto-Etag-Mac in + # X-Backend-Etag-Is-At + key = fetch_crypto_keys()['object'] + env = {CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + req = Request.blank( + '/v1/a/c/o', environ=env, method=method, + headers={'If-Match': '"an etag"', + 'If-None-Match': '"another etag"'}) + self.app.register(method, '/v1/a/c/o', HTTPOk, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('200 OK', resp.status) + + self.assertEqual(1, len(self.app.calls), self.app.calls) + self.assertEqual(method, self.app.calls[0][0]) + actual_headers = self.app.headers[0] + self.assertIn('X-Backend-Etag-Is-At', actual_headers) + self.assertEqual('X-Object-Sysmeta-Crypto-Etag-Mac', + actual_headers['X-Backend-Etag-Is-At']) + + self.assertIn('"%s"' % base64.b64encode( + hmac.new(key, 'an etag', hashlib.sha256).digest()), + actual_headers['If-Match']) + self.assertIn('"another etag"', actual_headers['If-None-Match']) + self.assertIn('"%s"' % base64.b64encode( + hmac.new(key, 'another etag', hashlib.sha256).digest()), + actual_headers['If-None-Match']) + + def test_GET_etag_is_at_not_duplicated(self): + self._test_etag_is_at_not_duplicated('GET') + + def test_HEAD_etag_is_at_not_duplicated(self): + self._test_etag_is_at_not_duplicated('HEAD') + + def test_PUT_response_inconsistent_etag_is_not_replaced(self): + # if response is success but etag does not match the ciphertext md5 + # then verify that we do *not* replace it with the plaintext etag + body = 'FAKE APP' + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body))} + req = Request.blank('/v1/a/c/o', environ=env, body=body, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, + {'Etag': 'not the ciphertext etag'}) + resp = req.get_response(self.encrypter) + self.assertEqual('201 Created', resp.status) + self.assertEqual('not the ciphertext etag', resp.headers['Etag']) + + def test_PUT_multiseg_no_client_etag(self): + body_key = os.urandom(32) + chunks = ['some', 'chunks', 'of data'] + body = ''.join(chunks) + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys, + 'wsgi.input': FileLikeIter(chunks)} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body))} + req = Request.blank('/v1/a/c/o', environ=env, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + + with mock.patch( + 'swift.common.middleware.crypto.crypto_utils.' + 'Crypto.create_random_key', + lambda *args: body_key): + resp = req.get_response(self.encrypter) + + self.assertEqual('201 Created', resp.status) + # verify object is encrypted by getting direct from the app + get_req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + self.assertEqual(encrypt(body, body_key, FAKE_IV), + get_req.get_response(self.app).body) + + def test_PUT_multiseg_good_client_etag(self): + body_key = os.urandom(32) + chunks = ['some', 'chunks', 'of data'] + body = ''.join(chunks) + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys, + 'wsgi.input': FileLikeIter(chunks)} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body)), + 'Etag': md5hex(body)} + req = Request.blank('/v1/a/c/o', environ=env, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + + with mock.patch( + 'swift.common.middleware.crypto.crypto_utils.' + 'Crypto.create_random_key', + lambda *args: body_key): + resp = req.get_response(self.encrypter) + + self.assertEqual('201 Created', resp.status) + # verify object is encrypted by getting direct from the app + get_req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + self.assertEqual(encrypt(body, body_key, FAKE_IV), + get_req.get_response(self.app).body) + + def test_PUT_multiseg_bad_client_etag(self): + chunks = ['some', 'chunks', 'of data'] + body = ''.join(chunks) + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: fetch_crypto_keys, + 'wsgi.input': FileLikeIter(chunks)} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body)), + 'Etag': 'badclientetag'} + req = Request.blank('/v1/a/c/o', environ=env, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('422 Unprocessable Entity', resp.status) + + def test_PUT_missing_key_callback(self): + body = 'FAKE APP' + env = {'REQUEST_METHOD': 'PUT'} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body))} + req = Request.blank('/v1/a/c/o', environ=env, body=body, headers=hdrs) + resp = req.get_response(self.encrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertIn('missing callback', + self.encrypter.logger.get_lines_for_level('error')[0]) + self.assertEqual('Unable to retrieve encryption keys.', resp.body) + + def test_PUT_error_in_key_callback(self): + def raise_exc(): + raise Exception('Testing') + + body = 'FAKE APP' + env = {'REQUEST_METHOD': 'PUT', + CRYPTO_KEY_CALLBACK: raise_exc} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body))} + req = Request.blank('/v1/a/c/o', environ=env, body=body, headers=hdrs) + resp = req.get_response(self.encrypter) + self.assertEqual('500 Internal Error', resp.status) + self.assertIn('from callback: Testing', + self.encrypter.logger.get_lines_for_level('error')[0]) + self.assertEqual('Unable to retrieve encryption keys.', resp.body) + + def test_PUT_encryption_override(self): + # set crypto override to disable encryption. + # simulate another middleware wanting to set footers + other_footers = { + 'Etag': 'other etag', + 'X-Object-Sysmeta-Other': 'other sysmeta', + 'X-Object-Sysmeta-Container-Update-Override-Etag': + 'other override'} + body = 'FAKE APP' + env = {'REQUEST_METHOD': 'PUT', + 'swift.crypto.override': True, + 'swift.callback.update_footers': + lambda footers: footers.update(other_footers)} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body))} + req = Request.blank('/v1/a/c/o', environ=env, body=body, headers=hdrs) + self.app.register('PUT', '/v1/a/c/o', HTTPCreated, {}) + resp = req.get_response(self.encrypter) + self.assertEqual('201 Created', resp.status) + + # verify that other middleware's footers made it to app + req_hdrs = self.app.headers[0] + for k, v in other_footers.items(): + self.assertEqual(v, req_hdrs[k]) + + # verify object is NOT encrypted by getting direct from the app + get_req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'}) + self.assertEqual(body, get_req.get_response(self.app).body) + + def _test_constraints_checking(self, method): + # verify that the check_metadata function is called on PUT and POST + body = 'FAKE APP' + env = {'REQUEST_METHOD': method, + CRYPTO_KEY_CALLBACK: fetch_crypto_keys} + hdrs = {'content-type': 'text/plain', + 'content-length': str(len(body))} + req = Request.blank('/v1/a/c/o', environ=env, body=body, headers=hdrs) + mocked_func = 'swift.common.middleware.crypto.encrypter.check_metadata' + with mock.patch(mocked_func) as mocked: + mocked.side_effect = [HTTPBadRequest('testing')] + resp = req.get_response(self.encrypter) + self.assertEqual('400 Bad Request', resp.status) + self.assertEqual(1, mocked.call_count) + mocked.assert_called_once_with(mock.ANY, 'object') + self.assertEqual(req.headers, + mocked.call_args_list[0][0][0].headers) + + def test_PUT_constraints_checking(self): + self._test_constraints_checking('PUT') + + def test_POST_constraints_checking(self): + self._test_constraints_checking('POST') + + def test_config_true_value_on_disable_encryption(self): + app = FakeSwift() + self.assertFalse(encrypter.Encrypter(app, {}).disable_encryption) + for val in ('true', '1', 'yes', 'on', 't', 'y'): + app = encrypter.Encrypter(app, + {'disable_encryption': val}) + self.assertTrue(app.disable_encryption) + + def test_PUT_app_exception(self): + app = encrypter.Encrypter(FakeAppThatExcepts(HTTPException), {}) + req = Request.blank('/', environ={'REQUEST_METHOD': 'PUT'}) + with self.assertRaises(HTTPException) as catcher: + req.get_response(app) + self.assertEqual(FakeAppThatExcepts.MESSAGE, catcher.exception.body) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/unit/common/middleware/crypto/test_encryption.py b/test/unit/common/middleware/crypto/test_encryption.py new file mode 100644 index 0000000000..e984a5f0ae --- /dev/null +++ b/test/unit/common/middleware/crypto/test_encryption.py @@ -0,0 +1,631 @@ +# Copyright (c) 2015-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import hashlib +import hmac +import json +import unittest +import uuid + +from swift.common import storage_policy, constraints +from swift.common.middleware import copy +from swift.common.middleware import crypto +from swift.common.middleware.crypto import keymaster +from swift.common.middleware.crypto.crypto_utils import ( + load_crypto_meta, Crypto) +from swift.common.ring import Ring +from swift.common.swob import Request +from swift.obj import diskfile + +from test.unit import FakeLogger +from test.unit.common.middleware.crypto.crypto_helpers import ( + md5hex, encrypt, TEST_KEYMASTER_CONF) +from test.unit.helpers import setup_servers, teardown_servers + + +class TestCryptoPipelineChanges(unittest.TestCase): + # Tests the consequences of crypto middleware being in/out of the pipeline + # or having encryption disabled for PUT/GET requests on same object. Uses + # real backend servers so that the handling of headers and sysmeta is + # verified to diskfile and back. + _test_context = None + + @classmethod + def setUpClass(cls): + cls._test_context = setup_servers() + cls.proxy_app = cls._test_context["test_servers"][0] + + @classmethod + def tearDownClass(cls): + if cls._test_context is not None: + teardown_servers(cls._test_context) + cls._test_context = None + + def setUp(self): + self.plaintext = 'unencrypted body content' + self.plaintext_etag = md5hex(self.plaintext) + self._setup_crypto_app() + + def _setup_crypto_app(self, disable_encryption=False): + # Set up a pipeline of crypto middleware ending in the proxy app so + # that tests can make requests to either the proxy server directly or + # via the crypto middleware. Make a fresh instance for each test to + # avoid any state coupling. + conf = {'disable_encryption': disable_encryption} + self.encryption = crypto.filter_factory(conf)(self.proxy_app) + self.km = keymaster.KeyMaster(self.encryption, TEST_KEYMASTER_CONF) + self.crypto_app = self.km # for clarity + + def _create_container(self, app, policy_name='one', container_path=None): + if not container_path: + # choose new container name so that the policy can be specified + self.container_name = uuid.uuid4().hex + self.container_path = 'http://foo:8080/v1/a/' + self.container_name + self.object_name = 'o' + self.object_path = self.container_path + '/' + self.object_name + container_path = self.container_path + req = Request.blank( + container_path, method='PUT', + headers={'X-Storage-Policy': policy_name}) + resp = req.get_response(app) + self.assertEqual('201 Created', resp.status) + # sanity check + req = Request.blank( + container_path, method='HEAD', + headers={'X-Storage-Policy': policy_name}) + resp = req.get_response(app) + self.assertEqual(policy_name, resp.headers['X-Storage-Policy']) + + def _put_object(self, app, body): + req = Request.blank(self.object_path, method='PUT', body=body, + headers={'Content-Type': 'application/test'}) + resp = req.get_response(app) + self.assertEqual('201 Created', resp.status) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + return resp + + def _post_object(self, app): + req = Request.blank(self.object_path, method='POST', + headers={'Content-Type': 'application/test', + 'X-Object-Meta-Fruit': 'Kiwi'}) + resp = req.get_response(app) + self.assertEqual('202 Accepted', resp.status) + return resp + + def _copy_object(self, app, destination): + req = Request.blank(self.object_path, method='COPY', + headers={'Destination': destination}) + resp = req.get_response(app) + self.assertEqual('201 Created', resp.status) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + return resp + + def _check_GET_and_HEAD(self, app, object_path=None): + object_path = object_path or self.object_path + req = Request.blank(object_path, method='GET') + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + self.assertEqual(self.plaintext, resp.body) + self.assertEqual('Kiwi', resp.headers['X-Object-Meta-Fruit']) + + req = Request.blank(object_path, method='HEAD') + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + self.assertEqual('', resp.body) + self.assertEqual('Kiwi', resp.headers['X-Object-Meta-Fruit']) + + def _check_match_requests(self, method, app, object_path=None): + object_path = object_path or self.object_path + # verify conditional match requests + expected_body = self.plaintext if method == 'GET' else '' + + # If-Match matches + req = Request.blank(object_path, method=method, + headers={'If-Match': '"%s"' % self.plaintext_etag}) + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + self.assertEqual(expected_body, resp.body) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + self.assertEqual('Kiwi', resp.headers['X-Object-Meta-Fruit']) + + # If-Match wildcard + req = Request.blank(object_path, method=method, + headers={'If-Match': '*'}) + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + self.assertEqual(expected_body, resp.body) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + self.assertEqual('Kiwi', resp.headers['X-Object-Meta-Fruit']) + + # If-Match does not match + req = Request.blank(object_path, method=method, + headers={'If-Match': '"not the etag"'}) + resp = req.get_response(app) + self.assertEqual('412 Precondition Failed', resp.status) + self.assertEqual('', resp.body) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + + # If-None-Match matches + req = Request.blank( + object_path, method=method, + headers={'If-None-Match': '"%s"' % self.plaintext_etag}) + resp = req.get_response(app) + self.assertEqual('304 Not Modified', resp.status) + self.assertEqual('', resp.body) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + + # If-None-Match wildcard + req = Request.blank(object_path, method=method, + headers={'If-None-Match': '*'}) + resp = req.get_response(app) + self.assertEqual('304 Not Modified', resp.status) + self.assertEqual('', resp.body) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + + # If-None-Match does not match + req = Request.blank(object_path, method=method, + headers={'If-None-Match': '"not the etag"'}) + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + self.assertEqual(expected_body, resp.body) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + self.assertEqual('Kiwi', resp.headers['X-Object-Meta-Fruit']) + + def _check_listing(self, app, expect_mismatch=False, container_path=None): + container_path = container_path or self.container_path + req = Request.blank( + container_path, method='GET', query_string='format=json') + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + listing = json.loads(resp.body) + self.assertEqual(1, len(listing)) + self.assertEqual(self.object_name, listing[0]['name']) + self.assertEqual(len(self.plaintext), listing[0]['bytes']) + if expect_mismatch: + self.assertNotEqual(self.plaintext_etag, listing[0]['hash']) + else: + self.assertEqual(self.plaintext_etag, listing[0]['hash']) + + def test_write_with_crypto_and_override_headers(self): + self._create_container(self.proxy_app, policy_name='one') + + def verify_overrides(): + # verify object sysmeta + req = Request.blank( + self.object_path, method='GET') + resp = req.get_response(self.crypto_app) + for k, v in overrides.items(): + self.assertIn(k, resp.headers) + self.assertEqual(overrides[k], resp.headers[k]) + + # check container listing + req = Request.blank( + self.container_path, method='GET', query_string='format=json') + resp = req.get_response(self.crypto_app) + self.assertEqual('200 OK', resp.status) + listing = json.loads(resp.body) + self.assertEqual(1, len(listing)) + self.assertEqual('o', listing[0]['name']) + self.assertEqual( + overrides['x-object-sysmeta-container-update-override-size'], + str(listing[0]['bytes'])) + self.assertEqual( + overrides['x-object-sysmeta-container-update-override-etag'], + listing[0]['hash']) + + # include overrides in headers + overrides = {'x-object-sysmeta-container-update-override-etag': 'foo', + 'x-object-sysmeta-container-update-override-size': + str(len(self.plaintext) + 1)} + req = Request.blank(self.object_path, method='PUT', + body=self.plaintext, headers=overrides.copy()) + resp = req.get_response(self.crypto_app) + self.assertEqual('201 Created', resp.status) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + verify_overrides() + + # include overrides in footers + overrides = {'x-object-sysmeta-container-update-override-etag': 'bar', + 'x-object-sysmeta-container-update-override-size': + str(len(self.plaintext) + 2)} + + def callback(footers): + footers.update(overrides) + + req = Request.blank( + self.object_path, method='PUT', body=self.plaintext) + req.environ['swift.callback.update_footers'] = callback + resp = req.get_response(self.crypto_app) + self.assertEqual('201 Created', resp.status) + self.assertEqual(self.plaintext_etag, resp.headers['Etag']) + verify_overrides() + + def test_write_with_crypto_read_with_crypto(self): + self._create_container(self.proxy_app, policy_name='one') + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.crypto_app) + self._check_listing(self.crypto_app) + + def test_write_with_crypto_read_with_crypto_ec(self): + self._create_container(self.proxy_app, policy_name='ec') + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.crypto_app) + self._check_listing(self.crypto_app) + + def test_put_without_crypto_post_with_crypto_read_with_crypto(self): + self._create_container(self.proxy_app, policy_name='one') + self._put_object(self.proxy_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.crypto_app) + self._check_listing(self.crypto_app) + + def test_write_without_crypto_read_with_crypto(self): + self._create_container(self.proxy_app, policy_name='one') + self._put_object(self.proxy_app, self.plaintext) + self._post_object(self.proxy_app) + self._check_GET_and_HEAD(self.proxy_app) # sanity check + self._check_GET_and_HEAD(self.crypto_app) + self._check_match_requests('GET', self.proxy_app) # sanity check + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.proxy_app) # sanity check + self._check_match_requests('HEAD', self.crypto_app) + self._check_listing(self.crypto_app) + + def test_write_without_crypto_read_with_crypto_ec(self): + self._create_container(self.proxy_app, policy_name='ec') + self._put_object(self.proxy_app, self.plaintext) + self._post_object(self.proxy_app) + self._check_GET_and_HEAD(self.proxy_app) # sanity check + self._check_GET_and_HEAD(self.crypto_app) + self._check_match_requests('GET', self.proxy_app) # sanity check + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.proxy_app) # sanity check + self._check_match_requests('HEAD', self.crypto_app) + self._check_listing(self.crypto_app) + + def _check_GET_and_HEAD_not_decrypted(self, app): + req = Request.blank(self.object_path, method='GET') + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + self.assertNotEqual(self.plaintext, resp.body) + self.assertEqual('%s' % len(self.plaintext), + resp.headers['Content-Length']) + self.assertNotEqual('Kiwi', resp.headers['X-Object-Meta-Fruit']) + + req = Request.blank(self.object_path, method='HEAD') + resp = req.get_response(app) + self.assertEqual('200 OK', resp.status) + self.assertEqual('', resp.body) + self.assertNotEqual('Kiwi', resp.headers['X-Object-Meta-Fruit']) + + def test_write_with_crypto_read_without_crypto(self): + self._create_container(self.proxy_app, policy_name='one') + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) # sanity check + # without crypto middleware, GET and HEAD returns ciphertext + self._check_GET_and_HEAD_not_decrypted(self.proxy_app) + self._check_listing(self.proxy_app, expect_mismatch=True) + + def test_write_with_crypto_read_without_crypto_ec(self): + self._create_container(self.proxy_app, policy_name='ec') + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) # sanity check + # without crypto middleware, GET and HEAD returns ciphertext + self._check_GET_and_HEAD_not_decrypted(self.proxy_app) + self._check_listing(self.proxy_app, expect_mismatch=True) + + def test_disable_encryption_config_option(self): + # check that on disable_encryption = true, object is not encrypted + self._setup_crypto_app(disable_encryption=True) + self._create_container(self.proxy_app, policy_name='one') + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) + # check as if no crypto middleware exists + self._check_GET_and_HEAD(self.proxy_app) + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.crypto_app) + self._check_match_requests('GET', self.proxy_app) + self._check_match_requests('HEAD', self.proxy_app) + + def test_write_with_crypto_read_with_disable_encryption_conf(self): + self._create_container(self.proxy_app, policy_name='one') + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) # sanity check + # turn on disable_encryption config option + self._setup_crypto_app(disable_encryption=True) + # GET and HEAD of encrypted objects should still work + self._check_GET_and_HEAD(self.crypto_app) + self._check_listing(self.crypto_app, expect_mismatch=False) + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.crypto_app) + + def _test_ondisk_data_after_write_with_crypto(self, policy_name): + policy = storage_policy.POLICIES.get_by_name(policy_name) + self._create_container(self.proxy_app, policy_name=policy_name) + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + + # Verify container listing etag is encrypted by direct GET to container + # server. We can use any server for all nodes since they all share same + # devices dir. + cont_server = self._test_context['test_servers'][3] + cont_ring = Ring(self._test_context['testdir'], ring_name='container') + part, nodes = cont_ring.get_nodes('a', self.container_name) + for node in nodes: + req = Request.blank('/%s/%s/a/%s' + % (node['device'], part, self.container_name), + method='GET', query_string='format=json') + resp = req.get_response(cont_server) + listing = json.loads(resp.body) + # sanity checks... + self.assertEqual(1, len(listing)) + self.assertEqual('o', listing[0]['name']) + self.assertEqual('application/test', listing[0]['content_type']) + # verify encrypted etag value + parts = listing[0]['hash'].rsplit(';', 1) + crypto_meta_param = parts[1].strip() + crypto_meta = crypto_meta_param[len('swift_meta='):] + listing_etag_iv = load_crypto_meta(crypto_meta)['iv'] + exp_enc_listing_etag = base64.b64encode( + encrypt(self.plaintext_etag, + self.km.create_key('/a/%s' % self.container_name), + listing_etag_iv)) + self.assertEqual(exp_enc_listing_etag, parts[0]) + + # Verify diskfile data and metadata is encrypted + ring_object = self.proxy_app.get_object_ring(int(policy)) + partition, nodes = ring_object.get_nodes('a', self.container_name, 'o') + conf = {'devices': self._test_context["testdir"], + 'mount_check': 'false'} + df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy] + ondisk_data = [] + exp_enc_body = None + for node_index, node in enumerate(nodes): + df = df_mgr.get_diskfile(node['device'], partition, + 'a', self.container_name, 'o', + policy=policy) + with df.open(): + meta = df.get_metadata() + contents = ''.join(df.reader()) + metadata = dict((k.lower(), v) for k, v in meta.items()) + # verify on disk data - body + body_iv = load_crypto_meta( + metadata['x-object-sysmeta-crypto-body-meta'])['iv'] + body_key_meta = load_crypto_meta( + metadata['x-object-sysmeta-crypto-body-meta'])['body_key'] + obj_key = self.km.create_key('/a/%s/o' % self.container_name) + body_key = Crypto().unwrap_key(obj_key, body_key_meta) + exp_enc_body = encrypt(self.plaintext, body_key, body_iv) + ondisk_data.append((node, contents)) + + # verify on disk user metadata + enc_val, meta = metadata[ + 'x-object-transient-sysmeta-crypto-meta-fruit'].split(';') + meta = meta.strip()[len('swift_meta='):] + metadata_iv = load_crypto_meta(meta)['iv'] + exp_enc_meta = base64.b64encode(encrypt('Kiwi', obj_key, + metadata_iv)) + self.assertEqual(exp_enc_meta, enc_val) + self.assertNotIn('x-object-meta-fruit', metadata) + + self.assertIn( + 'x-object-transient-sysmeta-crypto-meta', metadata) + meta = load_crypto_meta( + metadata['x-object-transient-sysmeta-crypto-meta']) + self.assertIn('key_id', meta) + self.assertIn('path', meta['key_id']) + self.assertEqual( + '/a/%s/%s' % (self.container_name, self.object_name), + meta['key_id']['path']) + self.assertIn('v', meta['key_id']) + self.assertEqual('1', meta['key_id']['v']) + self.assertIn('cipher', meta) + self.assertEqual(Crypto.cipher, meta['cipher']) + + # verify etag + actual_enc_etag, _junk, actual_etag_meta = metadata[ + 'x-object-sysmeta-crypto-etag'].partition('; swift_meta=') + etag_iv = load_crypto_meta(actual_etag_meta)['iv'] + exp_enc_etag = base64.b64encode(encrypt(self.plaintext_etag, + obj_key, etag_iv)) + self.assertEqual(exp_enc_etag, actual_enc_etag) + + # verify etag hmac + exp_etag_mac = hmac.new( + obj_key, self.plaintext_etag, digestmod=hashlib.sha256) + exp_etag_mac = base64.b64encode(exp_etag_mac.digest()) + self.assertEqual(exp_etag_mac, + metadata['x-object-sysmeta-crypto-etag-mac']) + + # verify etag override for container updates + override = 'x-object-sysmeta-container-update-override-etag' + parts = metadata[override].rsplit(';', 1) + crypto_meta_param = parts[1].strip() + crypto_meta = crypto_meta_param[len('swift_meta='):] + listing_etag_iv = load_crypto_meta(crypto_meta)['iv'] + cont_key = self.km.create_key('/a/%s' % self.container_name) + exp_enc_listing_etag = base64.b64encode( + encrypt(self.plaintext_etag, cont_key, + listing_etag_iv)) + self.assertEqual(exp_enc_listing_etag, parts[0]) + + self._check_GET_and_HEAD(self.crypto_app) + return exp_enc_body, ondisk_data + + def test_ondisk_data_after_write_with_crypto(self): + exp_body, ondisk_data = self._test_ondisk_data_after_write_with_crypto( + policy_name='one') + for node, body in ondisk_data: + self.assertEqual(exp_body, body) + + def test_ondisk_data_after_write_with_crypto_ec(self): + exp_body, ondisk_data = self._test_ondisk_data_after_write_with_crypto( + policy_name='ec') + policy = storage_policy.POLICIES.get_by_name('ec') + for frag_selection in (ondisk_data[:2], ondisk_data[1:]): + frags = [frag for node, frag in frag_selection] + self.assertEqual(exp_body, policy.pyeclib_driver.decode(frags)) + + def _test_copy_encrypted_to_encrypted( + self, src_policy_name, dest_policy_name): + self._create_container(self.proxy_app, policy_name=src_policy_name) + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + + copy_crypto_app = copy.ServerSideCopyMiddleware(self.crypto_app, {}) + + dest_container = uuid.uuid4().hex + dest_container_path = 'http://localhost:8080/v1/a/' + dest_container + self._create_container(copy_crypto_app, policy_name=dest_policy_name, + container_path=dest_container_path) + dest_obj_path = dest_container_path + '/o' + dest = '/%s/%s' % (dest_container, 'o') + self._copy_object(copy_crypto_app, dest) + + self._check_GET_and_HEAD(copy_crypto_app, object_path=dest_obj_path) + self._check_listing( + copy_crypto_app, container_path=dest_container_path) + self._check_match_requests( + 'GET', copy_crypto_app, object_path=dest_obj_path) + self._check_match_requests( + 'HEAD', copy_crypto_app, object_path=dest_obj_path) + + def test_copy_encrypted_to_encrypted(self): + self._test_copy_encrypted_to_encrypted('ec', 'ec') + self._test_copy_encrypted_to_encrypted('one', 'ec') + self._test_copy_encrypted_to_encrypted('ec', 'one') + self._test_copy_encrypted_to_encrypted('one', 'one') + + def _test_copy_encrypted_to_unencrypted( + self, src_policy_name, dest_policy_name): + self._create_container(self.proxy_app, policy_name=src_policy_name) + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + + # make a pipeline with encryption disabled, use it to copy object + self._setup_crypto_app(disable_encryption=True) + copy_app = copy.ServerSideCopyMiddleware(self.crypto_app, {}) + + dest_container = uuid.uuid4().hex + dest_container_path = 'http://localhost:8080/v1/a/' + dest_container + self._create_container(self.crypto_app, policy_name=dest_policy_name, + container_path=dest_container_path) + dest_obj_path = dest_container_path + '/o' + dest = '/%s/%s' % (dest_container, 'o') + self._copy_object(copy_app, dest) + + self._check_GET_and_HEAD(copy_app, object_path=dest_obj_path) + self._check_GET_and_HEAD(self.proxy_app, object_path=dest_obj_path) + self._check_listing(copy_app, container_path=dest_container_path) + self._check_listing(self.proxy_app, container_path=dest_container_path) + self._check_match_requests( + 'GET', self.proxy_app, object_path=dest_obj_path) + self._check_match_requests( + 'HEAD', self.proxy_app, object_path=dest_obj_path) + + def test_copy_encrypted_to_unencrypted(self): + self._test_copy_encrypted_to_unencrypted('ec', 'ec') + self._test_copy_encrypted_to_unencrypted('one', 'ec') + self._test_copy_encrypted_to_unencrypted('ec', 'one') + self._test_copy_encrypted_to_unencrypted('one', 'one') + + def _test_copy_unencrypted_to_encrypted( + self, src_policy_name, dest_policy_name): + self._create_container(self.proxy_app, policy_name=src_policy_name) + self._put_object(self.proxy_app, self.plaintext) + self._post_object(self.proxy_app) + + copy_crypto_app = copy.ServerSideCopyMiddleware(self.crypto_app, {}) + + dest_container = uuid.uuid4().hex + dest_container_path = 'http://localhost:8080/v1/a/' + dest_container + self._create_container(copy_crypto_app, policy_name=dest_policy_name, + container_path=dest_container_path) + dest_obj_path = dest_container_path + '/o' + dest = '/%s/%s' % (dest_container, 'o') + self._copy_object(copy_crypto_app, dest) + + self._check_GET_and_HEAD(copy_crypto_app, object_path=dest_obj_path) + self._check_listing( + copy_crypto_app, container_path=dest_container_path) + self._check_match_requests( + 'GET', copy_crypto_app, object_path=dest_obj_path) + self._check_match_requests( + 'HEAD', copy_crypto_app, object_path=dest_obj_path) + + def test_copy_unencrypted_to_encrypted(self): + self._test_copy_unencrypted_to_encrypted('ec', 'ec') + self._test_copy_unencrypted_to_encrypted('one', 'ec') + self._test_copy_unencrypted_to_encrypted('ec', 'one') + self._test_copy_unencrypted_to_encrypted('one', 'one') + + def test_crypto_max_length_path(self): + # the path is stashed in the key_id in crypto meta; check that a long + # path is ok + self.container_name = 'c' * constraints.MAX_CONTAINER_NAME_LENGTH + self.object_name = 'o' * constraints.MAX_OBJECT_NAME_LENGTH + self.container_path = 'http://foo:8080/v1/a/' + self.container_name + self.object_path = '%s/%s' % (self.container_path, self.object_name) + + self._create_container(self.proxy_app, policy_name='one', + container_path=self.container_path) + + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.crypto_app) + self._check_listing(self.crypto_app) + + def test_crypto_UTF8_path(self): + # check that UTF8 path is ok + self.container_name = self.object_name = u'\u010brypto' + self.container_path = 'http://foo:8080/v1/a/' + self.container_name + self.object_path = '%s/%s' % (self.container_path, self.object_name) + + self._create_container(self.proxy_app, policy_name='one', + container_path=self.container_path) + + self._put_object(self.crypto_app, self.plaintext) + self._post_object(self.crypto_app) + self._check_GET_and_HEAD(self.crypto_app) + self._check_match_requests('GET', self.crypto_app) + self._check_match_requests('HEAD', self.crypto_app) + self._check_listing(self.crypto_app) + + +class TestCryptoPipelineChangesFastPost(TestCryptoPipelineChanges): + @classmethod + def setUpClass(cls): + # set proxy config to use fast post + extra_conf = {'object_post_as_copy': 'False'} + cls._test_context = setup_servers(extra_conf=extra_conf) + cls.proxy_app = cls._test_context["test_servers"][0] + + +if __name__ == '__main__': + unittest.main() diff --git a/test/unit/common/middleware/crypto/test_keymaster.py b/test/unit/common/middleware/crypto/test_keymaster.py new file mode 100644 index 0000000000..2f8a1db458 --- /dev/null +++ b/test/unit/common/middleware/crypto/test_keymaster.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import os + +import unittest + +from swift.common import swob +from swift.common.middleware.crypto import keymaster +from swift.common.middleware.crypto.crypto_utils import CRYPTO_KEY_CALLBACK +from swift.common.swob import Request +from test.unit.common.middleware.helpers import FakeSwift, FakeAppThatExcepts +from test.unit.common.middleware.crypto.crypto_helpers import ( + TEST_KEYMASTER_CONF) + + +def capture_start_response(): + calls = [] + + def start_response(*args): + calls.append(args) + return start_response, calls + + +class TestKeymaster(unittest.TestCase): + + def setUp(self): + super(TestKeymaster, self).setUp() + self.swift = FakeSwift() + self.app = keymaster.KeyMaster(self.swift, TEST_KEYMASTER_CONF) + + def test_object_path(self): + self.verify_keys_for_path( + '/a/c/o', expected_keys=('object', 'container')) + + def test_container_path(self): + self.verify_keys_for_path( + '/a/c', expected_keys=('container',)) + + def verify_keys_for_path(self, path, expected_keys): + put_keys = None + for method, resp_class, status in ( + ('PUT', swob.HTTPCreated, '201'), + ('POST', swob.HTTPAccepted, '202'), + ('GET', swob.HTTPOk, '200'), + ('HEAD', swob.HTTPNoContent, '204')): + resp_headers = {} + self.swift.register( + method, '/v1' + path, resp_class, resp_headers, '') + req = Request.blank( + '/v1' + path, environ={'REQUEST_METHOD': method}) + start_response, calls = capture_start_response() + self.app(req.environ, start_response) + self.assertEqual(1, len(calls)) + self.assertTrue(calls[0][0].startswith(status)) + self.assertNotIn('swift.crypto.override', req.environ) + self.assertIn(CRYPTO_KEY_CALLBACK, req.environ, + '%s not set in env' % CRYPTO_KEY_CALLBACK) + keys = req.environ.get(CRYPTO_KEY_CALLBACK)() + self.assertIn('id', keys) + id = keys.pop('id') + self.assertEqual(path, id['path']) + self.assertEqual('1', id['v']) + self.assertListEqual(sorted(expected_keys), sorted(keys.keys()), + '%s %s got keys %r, but expected %r' + % (method, path, keys.keys(), expected_keys)) + if put_keys is not None: + # check all key sets were consistent for this path + self.assertDictEqual(put_keys, keys) + else: + put_keys = keys + return put_keys + + def test_key_uniqueness(self): + # a rudimentary check that different keys are made for different paths + ref_path_parts = ('a1', 'c1', 'o1') + path = '/' + '/'.join(ref_path_parts) + ref_keys = self.verify_keys_for_path( + path, expected_keys=('object', 'container')) + + # for same path and for each differing path check that keys are unique + # when path to object or container is unique and vice-versa + for path_parts in [(a, c, o) for a in ('a1', 'a2') + for c in ('c1', 'c2') + for o in ('o1', 'o2')]: + path = '/' + '/'.join(path_parts) + keys = self.verify_keys_for_path( + path, expected_keys=('object', 'container')) + # object keys should only be equal when complete paths are equal + self.assertEqual(path_parts == ref_path_parts, + keys['object'] == ref_keys['object'], + 'Path %s keys:\n%s\npath %s keys\n%s' % + (ref_path_parts, ref_keys, path_parts, keys)) + # container keys should only be equal when paths to container are + # equal + self.assertEqual(path_parts[:2] == ref_path_parts[:2], + keys['container'] == ref_keys['container'], + 'Path %s keys:\n%s\npath %s keys\n%s' % + (ref_path_parts, ref_keys, path_parts, keys)) + + def test_filter(self): + factory = keymaster.filter_factory(TEST_KEYMASTER_CONF) + self.assertTrue(callable(factory)) + self.assertTrue(callable(factory(self.swift))) + + def test_app_exception(self): + app = keymaster.KeyMaster( + FakeAppThatExcepts(), TEST_KEYMASTER_CONF) + req = Request.blank('/', environ={'REQUEST_METHOD': 'PUT'}) + start_response, _ = capture_start_response() + self.assertRaises(Exception, app, req.environ, start_response) + + def test_root_secret(self): + for secret in (os.urandom(32), os.urandom(33), os.urandom(50)): + encoded_secret = base64.b64encode(secret) + try: + app = keymaster.KeyMaster( + self.swift, {'encryption_root_secret': + bytes(encoded_secret)}) + self.assertEqual(secret, app.root_secret) + except AssertionError as err: + self.fail(str(err) + ' for secret %s' % secret) + try: + app = keymaster.KeyMaster( + self.swift, {'encryption_root_secret': + unicode(encoded_secret)}) + self.assertEqual(secret, app.root_secret) + except AssertionError as err: + self.fail(str(err) + ' for secret %s' % secret) + + def test_invalid_root_secret(self): + for secret in (bytes(base64.b64encode(os.urandom(31))), # too short + unicode(base64.b64encode(os.urandom(31))), + u'?' * 44, b'?' * 44, # not base64 + u'a' * 45, b'a' * 45, # bad padding + 99, None): + conf = {'encryption_root_secret': secret} + try: + with self.assertRaises(ValueError) as err: + keymaster.KeyMaster(self.swift, conf) + self.assertEqual( + 'encryption_root_secret option in proxy-server.conf ' + 'must be a base64 encoding of at least 32 raw bytes', + err.exception.message) + except AssertionError as err: + self.fail(str(err) + ' for conf %s' % str(conf)) + + +if __name__ == '__main__': + unittest.main() From f36bc513c5e0029b90207d7a2dec81965eed8300 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Tue, 7 Jun 2016 15:08:54 +0100 Subject: [PATCH 7/7] Add encryption overview doc Include a note in container-sync docs pointing to specific configuration needed to be compatible with encryption. Also remove the sample encryption root secret from proxy-server.conf-sample and in-process test setup. Remove encryption middleware from the default proxy pipeline. Change-Id: Ibceac485813f3ac819a53e644995749735592a55 --- doc/source/development_middleware.rst | 2 + doc/source/index.rst | 1 + doc/source/overview_container_sync.rst | 6 + doc/source/overview_encryption.rst | 472 +++++++++++++++++++++++++ etc/proxy-server.conf-sample | 8 +- test/functional/__init__.py | 6 - 6 files changed, 483 insertions(+), 12 deletions(-) create mode 100644 doc/source/overview_encryption.rst diff --git a/doc/source/development_middleware.rst b/doc/source/development_middleware.rst index b6dac83289..6fef62e22e 100644 --- a/doc/source/development_middleware.rst +++ b/doc/source/development_middleware.rst @@ -281,6 +281,8 @@ individual items of user metadata is not supported. In cases where middleware needs to store its own metadata with a POST request, it may use Object Transient Sysmeta. +.. _transient_sysmeta: + ^^^^^^^^^^^^^^^^^^^^^^^^ Object Transient-Sysmeta ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/index.rst b/doc/source/index.rst index c648d0af4f..4784d91337 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -57,6 +57,7 @@ Overview and Concepts cors crossdomain overview_erasure_code + overview_encryption overview_backing_store ring_background associated_projects diff --git a/doc/source/overview_container_sync.rst b/doc/source/overview_container_sync.rst index e69ec2743e..9947fc3d10 100644 --- a/doc/source/overview_container_sync.rst +++ b/doc/source/overview_container_sync.rst @@ -18,6 +18,12 @@ synchronization key. your manifest file and your segment files are synced if they happen to be in different containers. +.. note:: + + If you are using encryption middleware in the cluster from which objects + are being synced, then you should follow the instructions to configure + :ref:`container_sync_client_config` to be compatible with encryption. + -------------------------- Configuring Container Sync -------------------------- diff --git a/doc/source/overview_encryption.rst b/doc/source/overview_encryption.rst new file mode 100644 index 0000000000..6aa24636c6 --- /dev/null +++ b/doc/source/overview_encryption.rst @@ -0,0 +1,472 @@ +================= +Object Encryption +================= + +Swift supports the optional encryption of object data at rest on storage nodes. +The encryption of object data is intended to mitigate the risk of users' data +being read if an unauthorised party were to gain physical access to a disk. + +.. note:: + + Swift's data-at-rest encryption accepts plaintext object data from the + client, encrypts it in the cluster, and stores the encrypted data. This + protects object data from inadvertently being exposed if a data drive + leaves the Swift cluster. If a user wishes to ensure that the plaintext + data is always encrypted while in transit and in storage, it is strongly + recommended that the data be encrypted before sending it to the Swift + cluster. Encrypting on the client side is the only way to ensure that the + data is fully encrypted for its entire lifecycle. + +Encryption of data at rest is implemented by middleware that may be included in +the proxy server WSGI pipeline. The feature is internal to a Swift cluster and +not exposed through the API. Clients are unaware that data is encrypted by this +feature internally to the Swift service; internally encrypted data should never +be returned to clients via the Swift API. + +The following data are encrypted while at rest in Swift: + +* Object content i.e. the content of an object PUT request's body +* The entity tag (ETag) of objects that have non-zero content +* All custom user object metadata values i.e. metadata sent using + X-Object-Meta- prefixed headers with PUT or POST requests + +Any data or metadata not included in the list above are not encrypted, +including: + +* Account, container and object names +* Account and container custom user metadata values +* All custom user metadata names +* Object Content-Type values +* Object size +* System metadata + +.. note:: + + This feature is intended to provide `confidentiality` of data that is at + rest i.e. to protect user data from being read by an attacker that gains + access to disks on which object data is stored. + + This feature is not intended to prevent undetectable `modification` + of user data at rest. + + This feature is not intended to protect against an attacker that gains + access to Swift's internal network connections, or gains access to key + material or is able to modify the Swift code running on Swift nodes. + +.. _encryption_deployment: + +------------------------ +Deployment and operation +------------------------ + +Encryption is deployed by adding two middleware filters to the proxy +server WSGI pipeline and including their respective filter configuration +sections in the `proxy-server.conf` file. :ref:`Additional steps +` are required if the container sync feature is +being used. + +The `keymaster` and `encryption` middleware filters must be to the right of all +other middleware in the pipeline apart from the final proxy-logging middleware, +and in the order shown in this example:: + + keymaster encryption proxy-logging proxy-server + + [filter:keymaster] + use = egg:swift#keymaster + encryption_root_secret = your_secret + + [filter:encryption] + use = egg:swift#encryption + # disable_encryption = False + +See the `proxy-server.conf-sample` file for further details on the middleware +configuration options. + +The keymaster config option ``encryption_root_secret`` MUST be set to a value +of at least 44 valid base-64 characters before the middleware is used and +should be consistent across all proxy servers. The minimum length of 44 has +been chosen because it is the length of a base-64 encoded 32 byte value. + +.. note:: + + The ``encryption_root_secret`` option holds the master secret key used for + encryption. The security of all encrypted data critically depends on this + key and it should therefore be set to a high-entropy value. For example, a + suitable ``encryption_root_secret`` may be obtained by base-64 encoding a + 32 byte (or longer) value generated by a cryptographically secure random + number generator. + + The ``encryption_root_secret`` value is necessary to recover any encrypted + data from the storage system, and therefore, it must be guarded against + accidental loss. Its value (and consequently, the proxy-server.conf file) + should not be stored on any disk that is in any account, container or + object ring. + +One method for generating a suitable value for ``encryption_root_secret`` is to +use the ``openssl`` command line tool:: + + openssl rand -base64 32 + +Once deployed, the encryption filter will by default encrypt object data and +metadata when handling PUT and POST requests and decrypt object data and +metadata when handling GET and HEAD requests. COPY requests are transformed +into GET and PUT requests by the :ref:`copy` middleware before reaching the +encryption middleware and as a result object data and metadata is decrypted and +re-encrypted when copied. + +Upgrade Considerations +---------------------- + +When upgrading an existing cluster to deploy encryption, the following sequence +of steps is recommended: + +#. Upgrade all object servers +#. Upgrade all proxy servers +#. Add keymaster and encryption middlewares to every proxy server's middleware + pipeline with the encryption ``disable_encryption`` option set to ``True`` + and the keymaster ``encryption_root_secret`` value set as described above. +#. If required, follow the steps for :ref:`container_sync_client_config`. +#. Finally, change the encryption ``disable_encryption`` option to ``False`` + +Objects that existed in the cluster prior to the keymaster and encryption +middlewares being deployed are still readable with GET and HEAD requests. The +content of those objects will not be encrypted unless they are written again by +a PUT or COPY request. Any user metadata of those objects will not be encrypted +unless it is written again by a PUT, POST or COPY request. + +Disabling Encryption +-------------------- + +Once deployed, the keymaster and encryption middlewares should not be removed +from the pipeline. To do so will cause encrypted object data and/or metadata to +be returned in response to GET or HEAD requests for objects that were +previously encrypted. + +Encryption of inbound object data may be disabled by setting the encryption +``disable_encryption`` option to ``True``, in which case existing encrypted +objects will remain encrypted but new data written with PUT, POST or COPY +requests will not be encrypted. The keymaster and encryption middlewares should +remain in the pipeline even when encryption of new objects is not required. The +encryption middleware is needed to handle GET requests for objects that may +have been previously encrypted. The keymaster is needed to provide keys for +those requests. + +.. _container_sync_client_config: + +Container sync configuration +---------------------------- + +If container sync is being used then the keymaster and encryption middlewares +must be added to the container sync internal client pipeline. The following +configuration steps are required: + +#. Create a custom internal client configuration file for container sync (if + one is not already in use) based on the sample file + `internal-client.conf-sample`. For example, copy + `internal-client.conf-sample` to `/etc/swift/container-sync-client.conf`. +#. Modify this file to include the middlewares in the pipeline in + the same way as described above for the proxy server. +#. Modify the container-sync section of all container server config files to + point to this internal client config file using the + ``internal_client_conf_path`` option. For example:: + + internal_client_conf_path = /etc/swift/container-sync-client.conf + +.. note:: + + The ``encryption_root_secret`` value is necessary to recover any encrypted + data from the storage system, and therefore, it must be guarded against + accidental loss. Its value (and consequently, the custom internal client + configuration file) should not be stored on any disk that is in any + account, container or object ring. + +.. note:: + + These container sync configuration steps will be necessary for container + sync probe tests to pass if the encryption middlewares are included in the + proxy pipeline of a test cluster. + +-------------- +Implementation +-------------- + +Encryption scheme +----------------- + +Plaintext data is encrypted to ciphertext using the AES cipher with 256-bit +keys implemented by the python `cryptography package +`_. The cipher is used in counter +(CTR) mode so that any byte or range of bytes in the ciphertext may be +decrypted independently of any other bytes in the ciphertext. This enables very +simple handling of ranged GETs. + +In general an item of unencrypted data, ``plaintext``, is transformed to an +item of encrypted data, ``ciphertext``:: + + ciphertext = E(plaintext, k, iv) + +where ``E`` is the encryption function, ``k`` is an encryption key and ``iv`` +is a unique initialization vector (IV) chosen for each encryption context. For +example, the object body is one encryption context with a randomly chosen IV. +The IV is stored as metadata of the encrypted item so that it is available for +decryption:: + + plaintext = D(ciphertext, k, iv) + +where ``D`` is the decryption function. + +The implementation of CTR mode follows `NIST SP800-38A +`_, and the +full IV passed to the encryption or decryption function serves as the initial +counter block. + +In general any encrypted item has accompanying crypto-metadata that describes +the IV and the cipher algorithm used for the encryption:: + + crypto_metadata = {"iv": <16 byte value>, + "cipher": "AES_CTR_256"} + +This crypto-metadata is stored either with the ciphertext (for user +metadata and etags) or as a separate header (for object bodies). + +Key management +-------------- + +A keymaster middleware is responsible for providing the keys required for each +encryption and decryption operation. Two keys are required when handling object +requests: a `container key` that is uniquely associated with the container path +and an `object key` that is uniquely associated with the object path. These +keys are made available to the encryption middleware via a callback function +that the keymaster installs in the WSGI request environ. + +The current keymaster implementation derives container and object keys from the +``encryption_root_secret`` in a deterministic way by constructing a SHA256 +HMAC using the ``encryption_root_secret`` as a key and the container or object +path as a message, for example:: + + object_key = HMAC(encryption_root_secret, "/a/c/o") + +Other strategies for providing object and container keys may be employed by +future implementations of alternative keymaster middleware. + +During each object PUT, a random key is generated to encrypt the object body. +This random key is then encrypted using the object key provided by the +keymaster. This makes it safe to store the encrypted random key alongside the +encrypted object data and metadata. + +This process of `key wrapping` enables more efficient re-keying events when the +object key may need to be replaced and consequently any data encrypted using +that key must be re-encrypted. Key wrapping minimizes the amount of data +encrypted using those keys to just other randomly chosen keys which can be +re-wrapped efficiently without needing to re-encrypt the larger amounts of data +that were encrypted using the random keys. + +.. note:: + + Re-keying is not currently implemented. Key wrapping is implemented + in anticipation of future re-keying operations. + + +Encryption middleware +--------------------- + +The encryption middleware is composed of an `encrypter` component and a +`decrypter` component. + +Encrypter operation +^^^^^^^^^^^^^^^^^^^ + +Custom user metadata +++++++++++++++++++++ + +The encrypter encrypts each item of custom user metadata using the object key +provided by the keymaster and an IV that is randomly chosen for that metadata +item. The encrypted values are stored as :ref:`transient_sysmeta` with +associated crypto-metadata appended to the encrypted value. For example:: + + X-Object-Meta-Private1: value1 + X-Object-Meta-Private2: value2 + +are transformed to:: + + X-Object-Transient-Sysmeta-Crypto-Meta-Private1: + E(value1, object_key, header_iv_1); swift_meta={"iv": header_iv_1, + "cipher": "AES_CTR_256"} + X-Object-Transient-Sysmeta-Crypto-Meta-Private2: + E(value2, object_key, header_iv_2); swift_meta={"iv": header_iv_2, + "cipher": "AES_CTR_256"} + +The unencrypted custom user metadata headers are removed. + +Object body ++++++++++++ + +Encryption of an object body is performed using a randomly chosen body key +and a randomly chosen IV:: + + body_ciphertext = E(body_plaintext, body_key, body_iv) + +The body_key is wrapped using the object key provided by the keymaster and a +randomly chosen IV:: + + wrapped_body_key = E(body_key, object_key, body_key_iv) + +The encrypter stores the associated crypto-metadata in a system metadata +header:: + + X-Object-Sysmeta-Crypto-Body-Meta: + {"iv": body_iv, + "cipher": "AES_CTR_256", + "body_key": {"key": wrapped_body_key, + "iv": body_key_iv}} + +Note that in this case there is an extra item of crypto-metadata which stores +the wrapped body key and its IV. + +Entity tag +++++++++++ + +While encrypting the object body the encrypter also calculates the ETag (md5 +digest) of the plaintext body. This value is encrypted using the object key +provided by the keymaster and a randomly chosen IV, and saved as an item of +system metadata, with associated crypto-metadata appended to the encrypted +value:: + + X-Object-Sysmeta-Crypto-Etag: + E(md5(plaintext), object_key, etag_iv); swift_meta={"iv": etag_iv, + "cipher": "AES_CTR_256"} + +The encrypter also forces an encrypted version of the plaintext ETag to be sent +with container updates by adding an update override header to the PUT request. +The associated crypto-metadata is appended to the encrypted ETag value of this +update override header:: + + X-Object-Sysmeta-Container-Update-Override-Etag: + E(md5(plaintext), container_key, override_etag_iv); + meta={"iv": override_etag_iv, "cipher": "AES_CTR_256"} + +The container key is used for this encryption so that the decrypter is able +to decrypt the ETags in container listings when handling a container request, +since object keys may not be available in that context. + +Since the plaintext ETag value is only known once the encrypter has completed +processing the entire object body, the ``X-Object-Sysmeta-Crypto-Etag`` and +``X-Object-Sysmeta-Container-Update-Override-Etag`` headers are sent after the +encrypted object body using the proxy server's support for request footers. + +.. _conditional_requests: + +Conditional Requests +++++++++++++++++++++ + +In general, an object server evaluates conditional requests with +``If[-None]-Match`` headers by comparing values listed in an +``If[-None]-Match`` header against the ETag that is stored in the object +metadata. This is not possible when the ETag stored in object metadata has been +encrypted. The encrypter therefore calculates an HMAC using the object key and +the ETag while handling object PUT requests, and stores this under the metadata +key ``X-Object-Sysmeta-Crypto-Etag-Mac``:: + + X-Object-Sysmeta-Crypto-Etag-Mac: HMAC(object_key, md5(plaintext)) + +Like other ETag-related metadata, this is sent after the encrypted object body +using the proxy server's support for request footers. + +The encrypter similarly calculates an HMAC for each ETag value included in +``If[-None]-Match`` headers of conditional GET or HEAD requests, and appends +these to the ``If[-None]-Match`` header. The encrypter also sets the +``X-Backend-Etag-Is-At`` header to point to the previously stored +``X-Object-Sysmeta-Crypto-Etag-Mac`` metadata so that the object server +evaluates the conditional request by comparing the HMAC values included in the +``If[-None]-Match`` with the value stored under +``X-Object-Sysmeta-Crypto-Etag-Mac``. For example, given a conditional request +with header:: + + If-Match: match_etag + +the encrypter would transform the request headers to include:: + + If-Match: match_etag,HMAC(object_key, match_etag) + X-Backend-Etag-Is-At: X-Object-Sysmeta-Crypto-Etag-Mac + +This enables the object server to perform an encrypted comparison to check +whether the ETags match, without leaking the ETag itself or leaking information +about the object body. + +Decrypter operation +^^^^^^^^^^^^^^^^^^^ + +For each GET or HEAD request to an object, the decrypter inspects the response +for encrypted items (revealed by crypto-metadata headers), and if any are +discovered then it will: + +#. Fetch the object and container keys from the keymaster via its callback +#. Decrypt the ``X-Object-Sysmeta-Crypto-Etag`` value +#. Decrypt the ``X-Object-Sysmeta-Container-Update-Override-Etag`` value +#. Decrypt metadata header values using the object key +#. Decrypt the wrapped body key found in ``X-Object-Sysmeta-Crypto-Body-Meta`` +#. Decrypt the body using the body key + +For each GET request to a container that would include ETags in its response +body, the decrypter will: + +#. GET the response body with the container listing +#. Fetch the container key from the keymaster via its callback +#. Decrypt any encrypted ETag entries in the container listing using the + container key + + +Impact on other Swift services and features +------------------------------------------- + +Encryption has no impact on :ref:`versioned_writes` other than that any +previously unencrypted objects will be encrypted as they are copied to or from +the versions container. Keymaster and encryption middlewares should be placed +after ``versioned_writes`` in the proxy server pipeline, as described in +:ref:`encryption_deployment`. + +`Container Sync` uses an internal client to GET objects that are to be sync'd. +This internal client must be configured to use the keymaster and encryption +middlewares as described :ref:`above `. + +Encryption has no impact on the `object-auditor` service. Since the ETag +header saved with the object at rest is the md5 sum of the encrypted object +body then the auditor will verify that encrypted data is valid. + +Encryption has no impact on the `object-expirer` service. ``X-Delete-At`` and +``X-Delete-After`` headers are not encrypted. + +Encryption has no impact on the `object-replicator` and `object-reconstructor` +services. These services are unaware of the object or EC fragment data being +encrypted. + +Encryption has no impact on the `container-reconciler` service. The +`container-reconciler` uses an internal client to move objects between +different policy rings. The destination object has the same URL as the source +object and the object is moved without re-encryption. + + +Considerations for developers +----------------------------- + +Developers should be aware that keymaster and encryption middlewares rely on +the path of an object remaining unchanged. The included keymaster derives keys +for containers and objects based on their paths and the +``encryption_root_secret``. The keymaster does not rely on object metadata to +inform its generation of keys for GET and HEAD requests because when handling +:ref:`conditional_requests` it is required to provide the object key before any +metadata has been read from the object. + +Developers should therefore give careful consideration to any new features that +would relocate object data and metadata within a Swift cluster by means that do +not cause the object data and metadata to pass through the encryption +middlewares in the proxy pipeline and be re-encrypted. + +The crypto-metadata associated with each encrypted item does include some +`key_id` metadata that is provided by the keymaster and contains the path used +to derive keys. This `key_id` metadata is persisted in anticipation of future +scenarios when it may be necessary to decrypt an object that has been relocated +without re-encrypting, in which case the metadata could be used to derive the +keys that were used for encryption. However, this alone is not sufficient to +handle conditional requests and to decrypt container listings where objects +have been relocated, and further work will be required to solve those issues. diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index aebb872787..517a9c29ad 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -79,7 +79,7 @@ bind_port = 8080 [pipeline:main] # This sample pipeline uses tempauth and is used for SAIO dev work and # testing. See below for a pipeline using keystone. -pipeline = catch_errors gatekeeper healthcheck proxy-logging cache container_sync bulk tempurl ratelimit tempauth copy container-quotas account-quotas slo dlo versioned_writes keymaster encryption proxy-logging proxy-server +pipeline = catch_errors gatekeeper healthcheck proxy-logging cache container_sync bulk tempurl ratelimit tempauth copy container-quotas account-quotas slo dlo versioned_writes proxy-logging proxy-server # The following pipeline shows keystone integration. Comment out the one # above and uncomment this one. Additional steps for integrating keystone are @@ -781,11 +781,7 @@ use = egg:swift#keymaster # be obtained by base-64 encoding a 32 byte (or longer) value generated by a # cryptographically secure random number generator. Changing the root secret is # likely to result in data loss. -# TODO - STOP SETTING THIS DEFAULT! This is only here while work -# continues on the feature/crypto branch. Later, this will be added -# to the devstack proxy-config so that gate tests can pass. -# base64 encoding of "dontEverUseThisIn_PRODUCTION_xxxxxxxxxxxxxxx" -encryption_root_secret = ZG9udEV2ZXJVc2VUaGlzSW5fUFJPRFVDVElPTl94eHh4eHh4eHh4eHh4eHg= +encryption_root_secret = changeme [filter:encryption] use = egg:swift#encryption diff --git a/test/functional/__init__.py b/test/functional/__init__.py index 0bf324f85d..52be849bfa 100644 --- a/test/functional/__init__.py +++ b/test/functional/__init__.py @@ -361,12 +361,6 @@ def in_process_setup(the_object_server=object_server): 'allow_account_management': 'true', 'account_autocreate': 'true', 'allow_versions': 'True', - # TODO - Remove encryption_root_secret - this is only necessary while - # encryption middleware is in the default proxy pipeline in - # proxy-server.conf-sample - # base64 encoding of "dontEverUseThisIn_PRODUCTION_xxxxxxxxxxxxxxx" - 'encryption_root_secret': - 'ZG9udEV2ZXJVc2VUaGlzSW5fUFJPRFVDVElPTl94eHh4eHh4eHh4eHh4eHg=', # Below are values used by the functional test framework, as well as # by the various in-process swift servers 'auth_host': '127.0.0.1',