2013-09-20 01:00:54 +08:00
|
|
|
# Copyright (c) 2010-2012 OpenStack Foundation
|
2012-11-01 16:14:58 -07:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
|
|
# implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
import itertools
|
|
|
|
from collections import defaultdict
|
2012-11-01 16:14:58 -07:00
|
|
|
import unittest
|
2013-03-30 15:55:29 +03:00
|
|
|
from mock import patch
|
2013-02-08 11:48:26 +01:00
|
|
|
from swift.proxy.controllers.base import headers_to_container_info, \
|
2013-07-16 16:39:23 +02:00
|
|
|
headers_to_account_info, headers_to_object_info, get_container_info, \
|
2016-04-27 13:31:11 -05:00
|
|
|
get_cache_key, get_account_info, get_info, get_object_info, \
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
Controller, GetOrHeadHandler, bytes_to_skip
|
2016-03-02 10:28:51 +00:00
|
|
|
from swift.common.swob import Request, HTTPException, RESPONSE_REASONS
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
from swift.common import exceptions
|
2013-02-08 11:48:26 +01:00
|
|
|
from swift.common.utils import split_path
|
2016-03-02 10:28:51 +00:00
|
|
|
from swift.common.header_key_dict import HeaderKeyDict
|
2014-04-16 17:16:57 -07:00
|
|
|
from swift.common.http import is_success
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
from swift.common.storage_policy import StoragePolicy
|
2013-07-23 16:41:45 -07:00
|
|
|
from test.unit import fake_http_connect, FakeRing, FakeMemcache
|
2013-03-30 15:55:29 +03:00
|
|
|
from swift.proxy import server as proxy_server
|
2013-12-03 22:02:39 +00:00
|
|
|
from swift.common.request_helpers import get_sys_meta_prefix
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2014-05-27 01:17:13 -07:00
|
|
|
from test.unit import patch_policies
|
|
|
|
|
2013-07-23 16:41:45 -07:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
class FakeResponse(object):
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
base_headers = {}
|
2013-07-23 16:41:45 -07:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
def __init__(self, status_int=200, headers=None, body=''):
|
|
|
|
self.status_int = status_int
|
|
|
|
self._headers = headers or {}
|
|
|
|
self.body = body
|
2013-07-16 16:39:23 +02:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
@property
|
|
|
|
def headers(self):
|
|
|
|
if is_success(self.status_int):
|
|
|
|
self._headers.update(self.base_headers)
|
|
|
|
return self._headers
|
2013-02-08 11:48:26 +01:00
|
|
|
|
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
class AccountResponse(FakeResponse):
|
|
|
|
|
|
|
|
base_headers = {
|
|
|
|
'x-account-container-count': 333,
|
|
|
|
'x-account-object-count': 1000,
|
|
|
|
'x-account-bytes-used': 6666,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class ContainerResponse(FakeResponse):
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
base_headers = {
|
|
|
|
'x-container-object-count': 1000,
|
|
|
|
'x-container-bytes-used': 6666,
|
|
|
|
}
|
2013-02-08 11:48:26 +01:00
|
|
|
|
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
class ObjectResponse(FakeResponse):
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
base_headers = {
|
|
|
|
'content-length': 5555,
|
|
|
|
'content-type': 'text/plain'
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class DynamicResponseFactory(object):
|
|
|
|
|
|
|
|
def __init__(self, *statuses):
|
|
|
|
if statuses:
|
|
|
|
self.statuses = iter(statuses)
|
|
|
|
else:
|
|
|
|
self.statuses = itertools.repeat(200)
|
|
|
|
self.stats = defaultdict(int)
|
|
|
|
|
|
|
|
response_type = {
|
|
|
|
'obj': ObjectResponse,
|
|
|
|
'container': ContainerResponse,
|
|
|
|
'account': AccountResponse,
|
|
|
|
}
|
|
|
|
|
|
|
|
def _get_response(self, type_):
|
|
|
|
self.stats[type_] += 1
|
|
|
|
class_ = self.response_type[type_]
|
2015-06-15 22:10:45 +05:30
|
|
|
return class_(next(self.statuses))
|
2014-04-16 17:16:57 -07:00
|
|
|
|
|
|
|
def get_response(self, environ):
|
|
|
|
(version, account, container, obj) = split_path(
|
|
|
|
environ['PATH_INFO'], 2, 4, True)
|
|
|
|
if obj:
|
|
|
|
resp = self._get_response('obj')
|
|
|
|
elif container:
|
|
|
|
resp = self._get_response('container')
|
|
|
|
else:
|
|
|
|
resp = self._get_response('account')
|
|
|
|
resp.account = account
|
|
|
|
resp.container = container
|
|
|
|
resp.obj = obj
|
|
|
|
return resp
|
|
|
|
|
|
|
|
|
|
|
|
class FakeApp(object):
|
|
|
|
|
|
|
|
recheck_container_existence = 30
|
|
|
|
recheck_account_existence = 30
|
|
|
|
|
|
|
|
def __init__(self, response_factory=None, statuses=None):
|
|
|
|
self.responses = response_factory or \
|
|
|
|
DynamicResponseFactory(*statuses or [])
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
self.captured_envs = []
|
2014-04-16 17:16:57 -07:00
|
|
|
|
|
|
|
def __call__(self, environ, start_response):
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
self.captured_envs.append(environ)
|
2014-04-16 17:16:57 -07:00
|
|
|
response = self.responses.get_response(environ)
|
|
|
|
reason = RESPONSE_REASONS[response.status_int][0]
|
|
|
|
start_response('%d %s' % (response.status_int, reason),
|
|
|
|
[(k, v) for k, v in response.headers.items()])
|
|
|
|
return iter(response.body)
|
|
|
|
|
|
|
|
|
|
|
|
class FakeCache(FakeMemcache):
|
|
|
|
def __init__(self, stub=None, **pre_cached):
|
|
|
|
super(FakeCache, self).__init__()
|
|
|
|
if pre_cached:
|
|
|
|
self.store.update(pre_cached)
|
|
|
|
self.stub = stub
|
|
|
|
|
|
|
|
def get(self, key):
|
|
|
|
return self.stub or self.store.get(key)
|
2012-11-01 16:14:58 -07:00
|
|
|
|
|
|
|
|
2014-05-27 01:17:13 -07:00
|
|
|
@patch_policies([StoragePolicy(0, 'zero', True, object_ring=FakeRing())])
|
2012-11-01 16:14:58 -07:00
|
|
|
class TestFuncs(unittest.TestCase):
|
2013-03-30 15:55:29 +03:00
|
|
|
def setUp(self):
|
|
|
|
self.app = proxy_server.Application(None, FakeMemcache(),
|
|
|
|
account_ring=FakeRing(),
|
2014-05-27 01:17:13 -07:00
|
|
|
container_ring=FakeRing())
|
2013-03-30 15:55:29 +03:00
|
|
|
|
|
|
|
def test_get_info(self):
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp()
|
2013-03-30 15:55:29 +03:00
|
|
|
# Do a non cached call to account
|
|
|
|
env = {}
|
2014-04-16 17:16:57 -07:00
|
|
|
info_a = get_info(app, env, 'a')
|
2013-03-30 15:55:29 +03:00
|
|
|
# Check that you got proper info
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(info_a['status'], 200)
|
|
|
|
self.assertEqual(info_a['bytes'], 6666)
|
|
|
|
self.assertEqual(info_a['total_object_count'], 1000)
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
# Make sure the app was called
|
|
|
|
self.assertEqual(app.responses.stats['account'], 1)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
# Make sure the return value matches get_account_info
|
|
|
|
account_info = get_account_info({'PATH_INFO': '/v1/a'}, app)
|
|
|
|
self.assertEqual(info_a, account_info)
|
|
|
|
|
2013-03-30 15:55:29 +03:00
|
|
|
# Do an env cached call to account
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
app.responses.stats['account'] = 0
|
|
|
|
app.responses.stats['container'] = 0
|
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
info_a = get_info(app, env, 'a')
|
2013-03-30 15:55:29 +03:00
|
|
|
# Check that you got proper info
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(info_a['status'], 200)
|
|
|
|
self.assertEqual(info_a['bytes'], 6666)
|
|
|
|
self.assertEqual(info_a['total_object_count'], 1000)
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
# Make sure the app was NOT called AGAIN
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
self.assertEqual(app.responses.stats['account'], 0)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
|
|
|
# This time do env cached call to account and non cached to container
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
app.responses.stats['account'] = 0
|
|
|
|
app.responses.stats['container'] = 0
|
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
info_c = get_info(app, env, 'a', 'c')
|
2013-03-30 15:55:29 +03:00
|
|
|
# Check that you got proper info
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(info_c['status'], 200)
|
|
|
|
self.assertEqual(info_c['bytes'], 6666)
|
|
|
|
self.assertEqual(info_c['object_count'], 1000)
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
# Make sure the app was called for container but not account
|
|
|
|
self.assertEqual(app.responses.stats['account'], 0)
|
2014-04-16 17:16:57 -07:00
|
|
|
self.assertEqual(app.responses.stats['container'], 1)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
# This time do a non-cached call to account then non-cached to
|
2013-08-31 23:13:15 -04:00
|
|
|
# container
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
app.responses.stats['account'] = 0
|
|
|
|
app.responses.stats['container'] = 0
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp()
|
2013-08-31 23:13:15 -04:00
|
|
|
env = {} # abandon previous call to env
|
2014-04-16 17:16:57 -07:00
|
|
|
info_c = get_info(app, env, 'a', 'c')
|
2013-03-30 15:55:29 +03:00
|
|
|
# Check that you got proper info
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(info_c['status'], 200)
|
|
|
|
self.assertEqual(info_c['bytes'], 6666)
|
|
|
|
self.assertEqual(info_c['object_count'], 1000)
|
2014-04-16 17:16:57 -07:00
|
|
|
# check app calls both account and container
|
|
|
|
self.assertEqual(app.responses.stats['account'], 1)
|
|
|
|
self.assertEqual(app.responses.stats['container'], 1)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
# This time do an env-cached call to container while account is not
|
2013-08-31 23:13:15 -04:00
|
|
|
# cached
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
app.responses.stats['account'] = 0
|
|
|
|
app.responses.stats['container'] = 0
|
2014-04-16 17:16:57 -07:00
|
|
|
info_c = get_info(app, env, 'a', 'c')
|
2013-03-30 15:55:29 +03:00
|
|
|
# Check that you got proper info
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(info_a['status'], 200)
|
|
|
|
self.assertEqual(info_c['bytes'], 6666)
|
|
|
|
self.assertEqual(info_c['object_count'], 1000)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
# no additional calls were made
|
|
|
|
self.assertEqual(app.responses.stats['account'], 0)
|
|
|
|
self.assertEqual(app.responses.stats['container'], 0)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
2013-08-27 18:00:04 -07:00
|
|
|
def test_get_container_info_swift_source(self):
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp()
|
|
|
|
req = Request.blank("/v1/a/c", environ={'swift.cache': FakeCache()})
|
|
|
|
get_container_info(req.environ, app, swift_source='MC')
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
self.assertEqual([e['swift.source'] for e in app.captured_envs],
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
['MC', 'MC'])
|
2013-08-27 18:00:04 -07:00
|
|
|
|
|
|
|
def test_get_object_info_swift_source(self):
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp()
|
2013-08-27 18:00:04 -07:00
|
|
|
req = Request.blank("/v1/a/c/o",
|
2014-04-16 17:16:57 -07:00
|
|
|
environ={'swift.cache': FakeCache()})
|
|
|
|
get_object_info(req.environ, app, swift_source='LU')
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
self.assertEqual([e['swift.source'] for e in app.captured_envs],
|
|
|
|
['LU'])
|
2013-08-27 18:00:04 -07:00
|
|
|
|
2013-02-08 11:48:26 +01:00
|
|
|
def test_get_container_info_no_cache(self):
|
|
|
|
req = Request.blank("/v1/AUTH_account/cont",
|
|
|
|
environ={'swift.cache': FakeCache({})})
|
2014-04-16 17:16:57 -07:00
|
|
|
resp = get_container_info(req.environ, FakeApp())
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['storage_policy'], '0')
|
|
|
|
self.assertEqual(resp['bytes'], 6666)
|
|
|
|
self.assertEqual(resp['object_count'], 1000)
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2014-04-16 17:16:57 -07:00
|
|
|
def test_get_container_info_no_account(self):
|
|
|
|
responses = DynamicResponseFactory(404, 200)
|
|
|
|
app = FakeApp(responses)
|
|
|
|
req = Request.blank("/v1/AUTH_does_not_exist/cont")
|
|
|
|
info = get_container_info(req.environ, app)
|
|
|
|
self.assertEqual(info['status'], 0)
|
|
|
|
|
|
|
|
def test_get_container_info_no_auto_account(self):
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
responses = DynamicResponseFactory(200)
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp(responses)
|
|
|
|
req = Request.blank("/v1/.system_account/cont")
|
|
|
|
info = get_container_info(req.environ, app)
|
|
|
|
self.assertEqual(info['status'], 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(info['bytes'], 6666)
|
|
|
|
self.assertEqual(info['object_count'], 1000)
|
2014-04-16 17:16:57 -07:00
|
|
|
|
2013-02-08 11:48:26 +01:00
|
|
|
def test_get_container_info_cache(self):
|
2014-04-16 17:16:57 -07:00
|
|
|
cache_stub = {
|
|
|
|
'status': 404, 'bytes': 3333, 'object_count': 10,
|
|
|
|
'versions': u"\u1F4A9"}
|
2013-02-08 11:48:26 +01:00
|
|
|
req = Request.blank("/v1/account/cont",
|
2014-04-16 17:16:57 -07:00
|
|
|
environ={'swift.cache': FakeCache(cache_stub)})
|
|
|
|
resp = get_container_info(req.environ, FakeApp())
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['storage_policy'], '0')
|
|
|
|
self.assertEqual(resp['bytes'], 3333)
|
|
|
|
self.assertEqual(resp['object_count'], 10)
|
|
|
|
self.assertEqual(resp['status'], 404)
|
|
|
|
self.assertEqual(resp['versions'], "\xe1\xbd\x8a\x39")
|
2013-02-08 11:48:26 +01:00
|
|
|
|
|
|
|
def test_get_container_info_env(self):
|
2016-04-27 13:31:11 -05:00
|
|
|
cache_key = get_cache_key("account", "cont")
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
req = Request.blank(
|
|
|
|
"/v1/account/cont",
|
2016-04-27 13:31:11 -05:00
|
|
|
environ={'swift.infocache': {cache_key: {'bytes': 3867}},
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
'swift.cache': FakeCache({})})
|
2013-02-08 11:48:26 +01:00
|
|
|
resp = get_container_info(req.environ, 'xxx')
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['bytes'], 3867)
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2013-08-27 18:00:04 -07:00
|
|
|
def test_get_account_info_swift_source(self):
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp()
|
|
|
|
req = Request.blank("/v1/a", environ={'swift.cache': FakeCache()})
|
|
|
|
get_account_info(req.environ, app, swift_source='MC')
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
self.assertEqual([e['swift.source'] for e in app.captured_envs],
|
|
|
|
['MC'])
|
|
|
|
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
def test_get_account_info_swift_owner(self):
|
|
|
|
app = FakeApp()
|
|
|
|
req = Request.blank("/v1/a", environ={'swift.cache': FakeCache()})
|
|
|
|
get_account_info(req.environ, app)
|
|
|
|
self.assertEqual([e['swift_owner'] for e in app.captured_envs],
|
|
|
|
[True])
|
|
|
|
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
def test_get_account_info_infocache(self):
|
|
|
|
app = FakeApp()
|
|
|
|
ic = {}
|
|
|
|
req = Request.blank("/v1/a", environ={'swift.cache': FakeCache(),
|
|
|
|
'swift.infocache': ic})
|
|
|
|
get_account_info(req.environ, app)
|
|
|
|
got_infocaches = [e['swift.infocache'] for e in app.captured_envs]
|
|
|
|
self.assertEqual(1, len(got_infocaches))
|
|
|
|
self.assertIs(ic, got_infocaches[0])
|
2013-08-27 18:00:04 -07:00
|
|
|
|
2013-02-08 11:48:26 +01:00
|
|
|
def test_get_account_info_no_cache(self):
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp()
|
2013-02-08 11:48:26 +01:00
|
|
|
req = Request.blank("/v1/AUTH_account",
|
|
|
|
environ={'swift.cache': FakeCache({})})
|
2014-04-16 17:16:57 -07:00
|
|
|
resp = get_account_info(req.environ, app)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['bytes'], 6666)
|
|
|
|
self.assertEqual(resp['total_object_count'], 1000)
|
2013-02-08 11:48:26 +01:00
|
|
|
|
|
|
|
def test_get_account_info_cache(self):
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
# Works with fake apps that return ints in the headers
|
2013-02-08 11:48:26 +01:00
|
|
|
cached = {'status': 404,
|
|
|
|
'bytes': 3333,
|
|
|
|
'total_object_count': 10}
|
|
|
|
req = Request.blank("/v1/account/cont",
|
|
|
|
environ={'swift.cache': FakeCache(cached)})
|
2014-04-16 17:16:57 -07:00
|
|
|
resp = get_account_info(req.environ, FakeApp())
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['bytes'], 3333)
|
|
|
|
self.assertEqual(resp['total_object_count'], 10)
|
|
|
|
self.assertEqual(resp['status'], 404)
|
2013-02-08 11:48:26 +01:00
|
|
|
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
# Works with strings too, like you get when parsing HTTP headers
|
|
|
|
# that came in through a socket from the account server
|
2013-03-30 15:55:29 +03:00
|
|
|
cached = {'status': 404,
|
|
|
|
'bytes': '3333',
|
|
|
|
'container_count': '234',
|
|
|
|
'total_object_count': '10',
|
|
|
|
'meta': {}}
|
|
|
|
req = Request.blank("/v1/account/cont",
|
|
|
|
environ={'swift.cache': FakeCache(cached)})
|
2014-04-16 17:16:57 -07:00
|
|
|
resp = get_account_info(req.environ, FakeApp())
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['status'], 404)
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
self.assertEqual(resp['bytes'], 3333)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['container_count'], 234)
|
|
|
|
self.assertEqual(resp['meta'], {})
|
Fix up get_account_info and get_container_info
get_account_info used to work like this:
* make an account HEAD request
* ignore the response
* get the account info by digging around in the request environment,
where it had been deposited by elves or something
Not actually elves, but the proxy's GETorHEAD_base method would take
the HEAD response and cache it in the response environment, which was
the same object as the request environment, thus enabling
get_account_info to find it.
This was extraordinarily brittle. If a WSGI middleware were to
shallow-copy the request environment, then any middlewares to its left
could not use get_account_info, as the left middleware's request
environment would no longer be identical to the response environment
down in GETorHEAD_base.
Now, get_account_info works like this:
* make an account HEAD request.
* if the account info is in the request environment, return it. This
is an optimization to avoid a double-set in memcached.
* else, compute the account info from the response headers, store it
in caches, and return it.
This is much easier to think about; get_account_info can get and cache
account info all on its own; the cache check and cache set are right
next to each other.
All the above is true for get_container_info as well.
get_info() is still around, but it's just a shim. It was trying to
unify get_account_info and get_container_info to exploit the
commonalities, but the number of times that "if container:" showed up
in get_info and its helpers really indicated that something was
wrong. I'd rather have two functions with some duplication than one
function with no duplication but a bunch of "if container:" branches.
Other things of note:
* a HEAD request to a deleted account returns 410, but
get_account_info would return 404 since the 410 came from the
account controller *after* GETorHEAD_base ran. Now
get_account_info returns 410 as well.
* cache validity period (recheck_account_existence and
recheck_container_existence) is now communicated to
get_account_info via an X-Backend header. This way,
get_account_info doesn't need a reference to the
swift.proxy.server.Application object.
* both logged swift_source values are now correct for
get_container_info calls; before, on a cold cache,
get_container_info would call get_account_info but not pass along
swift_source, resulting in get_account_info logging "GET_INFO" as
the source. Amusingly, there was a unit test asserting this bogus
behavior.
* callers that modify the return value of get_account_info or of
get_container_info don't modify what's stored in swift.infocache.
* get_account_info on an account that *can* be autocreated but has
not been will return a 200, same as a HEAD request. The old
behavior was a 404 from get_account_info but a 200 from
HEAD. Callers can tell the difference by looking at
info['account_really_exists'] if they need to know the difference
(there is one call site that needs to know, in container
PUT). Note: this is for all accounts when the proxy's
"account_autocreate" setting is on.
Change-Id: I5167714025ec7237f7e6dd4759c2c6eb959b3fca
2016-02-11 15:51:45 -08:00
|
|
|
self.assertEqual(resp['total_object_count'], 10)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
2013-02-08 11:48:26 +01:00
|
|
|
def test_get_account_info_env(self):
|
2016-04-27 13:31:11 -05:00
|
|
|
cache_key = get_cache_key("account")
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
req = Request.blank(
|
|
|
|
"/v1/account",
|
2016-04-27 13:31:11 -05:00
|
|
|
environ={'swift.infocache': {cache_key: {'bytes': 3867}},
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
'swift.cache': FakeCache({})})
|
2013-02-08 11:48:26 +01:00
|
|
|
resp = get_account_info(req.environ, 'xxx')
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['bytes'], 3867)
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2013-07-16 16:39:23 +02:00
|
|
|
def test_get_object_info_env(self):
|
|
|
|
cached = {'status': 200,
|
|
|
|
'length': 3333,
|
|
|
|
'type': 'application/json',
|
|
|
|
'meta': {}}
|
2016-04-27 13:31:11 -05:00
|
|
|
cache_key = get_cache_key("account", "cont", "obj")
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
req = Request.blank(
|
|
|
|
"/v1/account/cont/obj",
|
2016-04-27 13:31:11 -05:00
|
|
|
environ={'swift.infocache': {cache_key: cached},
|
Make info caching work across subrequests
Previously, if you called get_account_info, get_container_info, or
get_object_info, then the results of that call would be cached in the
WSGI environment as top-level keys. This is okay, except that if you,
in middleware, copy the WSGI environment and then make a subrequest
using the copy, information retrieved in the subrequest is cached
only in the copy and not in the original. This can mean lots of extra
trips to memcache for, say, SLO validation where the segments are in
another container; the object HEAD ends up getting container info for
the segment container, but then the next object HEAD gets it again.
This commit moves the cache for get_*_info into a dictionary at
environ['swift.infocache']; this way, you can shallow-copy the request
environment and still get the benefits from the cache.
Change-Id: I3481b38b41c33cd1e39e19baab56193c5f9bf6ac
2016-01-21 13:19:30 -08:00
|
|
|
'swift.cache': FakeCache({})})
|
2013-07-16 16:39:23 +02:00
|
|
|
resp = get_object_info(req.environ, 'xxx')
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['length'], 3333)
|
|
|
|
self.assertEqual(resp['type'], 'application/json')
|
2013-07-16 16:39:23 +02:00
|
|
|
|
|
|
|
def test_get_object_info_no_env(self):
|
2014-04-16 17:16:57 -07:00
|
|
|
app = FakeApp()
|
2013-07-16 16:39:23 +02:00
|
|
|
req = Request.blank("/v1/account/cont/obj",
|
|
|
|
environ={'swift.cache': FakeCache({})})
|
2014-04-16 17:16:57 -07:00
|
|
|
resp = get_object_info(req.environ, app)
|
|
|
|
self.assertEqual(app.responses.stats['account'], 0)
|
|
|
|
self.assertEqual(app.responses.stats['container'], 0)
|
|
|
|
self.assertEqual(app.responses.stats['obj'], 1)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['length'], 5555)
|
|
|
|
self.assertEqual(resp['type'], 'text/plain')
|
2013-07-16 16:39:23 +02:00
|
|
|
|
2015-07-04 17:08:32 -05:00
|
|
|
def test_options(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
base.account_name = 'a'
|
|
|
|
base.container_name = 'c'
|
|
|
|
origin = 'http://m.com'
|
|
|
|
self.app.cors_allow_origin = [origin]
|
|
|
|
req = Request.blank('/v1/a/c/o',
|
|
|
|
environ={'swift.cache': FakeCache()},
|
|
|
|
headers={'Origin': origin,
|
|
|
|
'Access-Control-Request-Method': 'GET'})
|
|
|
|
|
|
|
|
with patch('swift.proxy.controllers.base.'
|
|
|
|
'http_connect', fake_http_connect(200)):
|
|
|
|
resp = base.OPTIONS(req)
|
|
|
|
self.assertEqual(resp.status_int, 200)
|
|
|
|
|
2015-10-22 10:14:29 -05:00
|
|
|
def test_options_with_null_allow_origin(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
base.account_name = 'a'
|
|
|
|
base.container_name = 'c'
|
|
|
|
|
|
|
|
def my_container_info(*args):
|
|
|
|
return {
|
|
|
|
'cors': {
|
|
|
|
'allow_origin': '*',
|
|
|
|
}
|
|
|
|
}
|
|
|
|
base.container_info = my_container_info
|
|
|
|
req = Request.blank('/v1/a/c/o',
|
|
|
|
environ={'swift.cache': FakeCache()},
|
|
|
|
headers={'Origin': '*',
|
|
|
|
'Access-Control-Request-Method': 'GET'})
|
|
|
|
|
|
|
|
with patch('swift.proxy.controllers.base.'
|
|
|
|
'http_connect', fake_http_connect(200)):
|
|
|
|
resp = base.OPTIONS(req)
|
|
|
|
self.assertEqual(resp.status_int, 200)
|
|
|
|
|
2015-07-04 17:08:32 -05:00
|
|
|
def test_options_unauthorized(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
base.account_name = 'a'
|
|
|
|
base.container_name = 'c'
|
|
|
|
self.app.cors_allow_origin = ['http://NOT_IT']
|
|
|
|
req = Request.blank('/v1/a/c/o',
|
|
|
|
environ={'swift.cache': FakeCache()},
|
|
|
|
headers={'Origin': 'http://m.com',
|
|
|
|
'Access-Control-Request-Method': 'GET'})
|
|
|
|
|
|
|
|
with patch('swift.proxy.controllers.base.'
|
|
|
|
'http_connect', fake_http_connect(200)):
|
|
|
|
resp = base.OPTIONS(req)
|
|
|
|
self.assertEqual(resp.status_int, 401)
|
|
|
|
|
2012-11-01 16:14:58 -07:00
|
|
|
def test_headers_to_container_info_missing(self):
|
|
|
|
resp = headers_to_container_info({}, 404)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['status'], 404)
|
|
|
|
self.assertEqual(resp['read_acl'], None)
|
|
|
|
self.assertEqual(resp['write_acl'], None)
|
2012-11-01 16:14:58 -07:00
|
|
|
|
|
|
|
def test_headers_to_container_info_meta(self):
|
|
|
|
headers = {'X-Container-Meta-Whatevs': 14,
|
|
|
|
'x-container-meta-somethingelse': 0}
|
|
|
|
resp = headers_to_container_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(len(resp['meta']), 2)
|
|
|
|
self.assertEqual(resp['meta']['whatevs'], 14)
|
|
|
|
self.assertEqual(resp['meta']['somethingelse'], 0)
|
2012-11-01 16:14:58 -07:00
|
|
|
|
2013-12-03 22:02:39 +00:00
|
|
|
def test_headers_to_container_info_sys_meta(self):
|
|
|
|
prefix = get_sys_meta_prefix('container')
|
|
|
|
headers = {'%sWhatevs' % prefix: 14,
|
|
|
|
'%ssomethingelse' % prefix: 0}
|
|
|
|
resp = headers_to_container_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(len(resp['sysmeta']), 2)
|
|
|
|
self.assertEqual(resp['sysmeta']['whatevs'], 14)
|
|
|
|
self.assertEqual(resp['sysmeta']['somethingelse'], 0)
|
2013-12-03 22:02:39 +00:00
|
|
|
|
2012-11-01 16:14:58 -07:00
|
|
|
def test_headers_to_container_info_values(self):
|
|
|
|
headers = {
|
|
|
|
'x-container-read': 'readvalue',
|
|
|
|
'x-container-write': 'writevalue',
|
|
|
|
'x-container-sync-key': 'keyvalue',
|
|
|
|
'x-container-meta-access-control-allow-origin': 'here',
|
|
|
|
}
|
|
|
|
resp = headers_to_container_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['read_acl'], 'readvalue')
|
|
|
|
self.assertEqual(resp['write_acl'], 'writevalue')
|
|
|
|
self.assertEqual(resp['cors']['allow_origin'], 'here')
|
2012-11-01 16:14:58 -07:00
|
|
|
|
|
|
|
headers['x-unused-header'] = 'blahblahblah'
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(
|
2012-11-01 16:14:58 -07:00
|
|
|
resp,
|
|
|
|
headers_to_container_info(headers.items(), 200))
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2015-10-22 10:14:29 -05:00
|
|
|
def test_container_info_without_req(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
base.account_name = 'a'
|
|
|
|
base.container_name = 'c'
|
|
|
|
|
|
|
|
container_info = \
|
|
|
|
base.container_info(base.account_name,
|
|
|
|
base.container_name)
|
|
|
|
self.assertEqual(container_info['status'], 0)
|
|
|
|
|
2013-02-08 11:48:26 +01:00
|
|
|
def test_headers_to_account_info_missing(self):
|
|
|
|
resp = headers_to_account_info({}, 404)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['status'], 404)
|
|
|
|
self.assertEqual(resp['bytes'], None)
|
|
|
|
self.assertEqual(resp['container_count'], None)
|
2013-02-08 11:48:26 +01:00
|
|
|
|
|
|
|
def test_headers_to_account_info_meta(self):
|
|
|
|
headers = {'X-Account-Meta-Whatevs': 14,
|
|
|
|
'x-account-meta-somethingelse': 0}
|
|
|
|
resp = headers_to_account_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(len(resp['meta']), 2)
|
|
|
|
self.assertEqual(resp['meta']['whatevs'], 14)
|
|
|
|
self.assertEqual(resp['meta']['somethingelse'], 0)
|
2013-02-08 11:48:26 +01:00
|
|
|
|
2013-12-03 22:02:39 +00:00
|
|
|
def test_headers_to_account_info_sys_meta(self):
|
|
|
|
prefix = get_sys_meta_prefix('account')
|
|
|
|
headers = {'%sWhatevs' % prefix: 14,
|
|
|
|
'%ssomethingelse' % prefix: 0}
|
|
|
|
resp = headers_to_account_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(len(resp['sysmeta']), 2)
|
|
|
|
self.assertEqual(resp['sysmeta']['whatevs'], 14)
|
|
|
|
self.assertEqual(resp['sysmeta']['somethingelse'], 0)
|
2013-12-03 22:02:39 +00:00
|
|
|
|
2013-02-08 11:48:26 +01:00
|
|
|
def test_headers_to_account_info_values(self):
|
|
|
|
headers = {
|
|
|
|
'x-account-object-count': '10',
|
|
|
|
'x-account-container-count': '20',
|
|
|
|
}
|
|
|
|
resp = headers_to_account_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['total_object_count'], '10')
|
|
|
|
self.assertEqual(resp['container_count'], '20')
|
2013-02-08 11:48:26 +01:00
|
|
|
|
|
|
|
headers['x-unused-header'] = 'blahblahblah'
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(
|
2013-02-08 11:48:26 +01:00
|
|
|
resp,
|
|
|
|
headers_to_account_info(headers.items(), 200))
|
2013-07-16 16:39:23 +02:00
|
|
|
|
|
|
|
def test_headers_to_object_info_missing(self):
|
|
|
|
resp = headers_to_object_info({}, 404)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['status'], 404)
|
|
|
|
self.assertEqual(resp['length'], None)
|
|
|
|
self.assertEqual(resp['etag'], None)
|
2013-07-16 16:39:23 +02:00
|
|
|
|
|
|
|
def test_headers_to_object_info_meta(self):
|
|
|
|
headers = {'X-Object-Meta-Whatevs': 14,
|
|
|
|
'x-object-meta-somethingelse': 0}
|
|
|
|
resp = headers_to_object_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(len(resp['meta']), 2)
|
|
|
|
self.assertEqual(resp['meta']['whatevs'], 14)
|
|
|
|
self.assertEqual(resp['meta']['somethingelse'], 0)
|
2013-07-16 16:39:23 +02:00
|
|
|
|
2015-02-09 18:16:25 -06:00
|
|
|
def test_headers_to_object_info_sys_meta(self):
|
|
|
|
prefix = get_sys_meta_prefix('object')
|
|
|
|
headers = {'%sWhatevs' % prefix: 14,
|
|
|
|
'%ssomethingelse' % prefix: 0}
|
|
|
|
resp = headers_to_object_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(len(resp['sysmeta']), 2)
|
|
|
|
self.assertEqual(resp['sysmeta']['whatevs'], 14)
|
|
|
|
self.assertEqual(resp['sysmeta']['somethingelse'], 0)
|
2015-02-09 18:16:25 -06:00
|
|
|
|
2013-07-16 16:39:23 +02:00
|
|
|
def test_headers_to_object_info_values(self):
|
|
|
|
headers = {
|
|
|
|
'content-length': '1024',
|
|
|
|
'content-type': 'application/json',
|
|
|
|
}
|
|
|
|
resp = headers_to_object_info(headers.items(), 200)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(resp['length'], '1024')
|
|
|
|
self.assertEqual(resp['type'], 'application/json')
|
2013-07-16 16:39:23 +02:00
|
|
|
|
|
|
|
headers['x-unused-header'] = 'blahblahblah'
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(
|
2013-07-16 16:39:23 +02:00
|
|
|
resp,
|
|
|
|
headers_to_object_info(headers.items(), 200))
|
2013-10-30 21:43:35 +00:00
|
|
|
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
def test_base_have_quorum(self):
|
2013-10-30 21:43:35 +00:00
|
|
|
base = Controller(self.app)
|
|
|
|
# just throw a bunch of test cases at it
|
|
|
|
self.assertEqual(base.have_quorum([201, 404], 3), False)
|
2016-04-27 16:59:00 -05:00
|
|
|
self.assertEqual(base.have_quorum([201, 201], 4), True)
|
|
|
|
self.assertEqual(base.have_quorum([201], 4), False)
|
|
|
|
self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), True)
|
|
|
|
self.assertEqual(base.have_quorum([201, 302, 418, 503], 4), False)
|
|
|
|
self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), True)
|
2013-10-30 21:43:35 +00:00
|
|
|
self.assertEqual(base.have_quorum([201, 201], 3), True)
|
|
|
|
self.assertEqual(base.have_quorum([404, 404], 3), True)
|
|
|
|
self.assertEqual(base.have_quorum([201, 201], 2), True)
|
2016-04-27 16:59:00 -05:00
|
|
|
self.assertEqual(base.have_quorum([201, 404], 2), True)
|
2013-10-30 21:43:35 +00:00
|
|
|
self.assertEqual(base.have_quorum([404, 404], 2), True)
|
|
|
|
self.assertEqual(base.have_quorum([201, 404, 201, 201], 4), True)
|
2013-11-04 17:06:06 +00:00
|
|
|
|
Treat 404s as 204 on object delete in proxy
This change adds an optional overrides map to _make_request method
in the base Controller class.
def make_requests(self, req, ring, part, method, path, headers,
query_string='', overrides=None)
Which will be passed on the the best_response method. If set and
no quorum it reached, the override map is used to attempt to find
quorum.
The overrides map is in the form:
{ <response>: <override response>, .. }
The ObjectController, in the DELETE method now passes an override map
to make_requests method in the base Controller class in the form of:
{ 404: 204 }
Statuses/responses that have been overridden are used in calculation
of the quorum but never returned to the user. They are replaced by:
(STATUS, '', '', '')
And left out of the search for best response.
Change-Id: Ibf969eac3a09d67668d5275e808ed626152dd7eb
Closes-Bug: 1318375
2014-08-14 14:39:18 +10:00
|
|
|
def test_best_response_overrides(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
responses = [
|
|
|
|
(302, 'Found', '', 'The resource has moved temporarily.'),
|
|
|
|
(100, 'Continue', '', ''),
|
|
|
|
(404, 'Not Found', '', 'Custom body'),
|
|
|
|
]
|
|
|
|
server_type = "Base DELETE"
|
|
|
|
req = Request.blank('/v1/a/c/o', method='DELETE')
|
|
|
|
statuses, reasons, headers, bodies = zip(*responses)
|
|
|
|
|
|
|
|
# First test that you can't make a quorum with only overridden
|
|
|
|
# responses
|
|
|
|
overrides = {302: 204, 100: 204}
|
|
|
|
resp = base.best_response(req, statuses, reasons, bodies, server_type,
|
|
|
|
headers=headers, overrides=overrides)
|
2015-04-15 11:25:13 -07:00
|
|
|
self.assertEqual(resp.status, '503 Service Unavailable')
|
Treat 404s as 204 on object delete in proxy
This change adds an optional overrides map to _make_request method
in the base Controller class.
def make_requests(self, req, ring, part, method, path, headers,
query_string='', overrides=None)
Which will be passed on the the best_response method. If set and
no quorum it reached, the override map is used to attempt to find
quorum.
The overrides map is in the form:
{ <response>: <override response>, .. }
The ObjectController, in the DELETE method now passes an override map
to make_requests method in the base Controller class in the form of:
{ 404: 204 }
Statuses/responses that have been overridden are used in calculation
of the quorum but never returned to the user. They are replaced by:
(STATUS, '', '', '')
And left out of the search for best response.
Change-Id: Ibf969eac3a09d67668d5275e808ed626152dd7eb
Closes-Bug: 1318375
2014-08-14 14:39:18 +10:00
|
|
|
|
|
|
|
# next make a 404 quorum and make sure the last delete (real) 404
|
|
|
|
# status is the one returned.
|
|
|
|
overrides = {100: 404}
|
|
|
|
resp = base.best_response(req, statuses, reasons, bodies, server_type,
|
|
|
|
headers=headers, overrides=overrides)
|
|
|
|
self.assertEqual(resp.status, '404 Not Found')
|
|
|
|
self.assertEqual(resp.body, 'Custom body')
|
|
|
|
|
2013-11-04 17:06:06 +00:00
|
|
|
def test_range_fast_forward(self):
|
|
|
|
req = Request.blank('/')
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None, {})
|
|
|
|
handler.fast_forward(50)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(handler.backend_headers['Range'], 'bytes=50-')
|
2013-11-04 17:06:06 +00:00
|
|
|
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None,
|
|
|
|
{'Range': 'bytes=23-50'})
|
|
|
|
handler.fast_forward(20)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(handler.backend_headers['Range'], 'bytes=43-50')
|
2013-11-04 17:06:06 +00:00
|
|
|
self.assertRaises(HTTPException,
|
|
|
|
handler.fast_forward, 80)
|
2016-04-15 16:08:26 -07:00
|
|
|
self.assertRaises(exceptions.RangeAlreadyComplete,
|
|
|
|
handler.fast_forward, 8)
|
2013-11-04 17:06:06 +00:00
|
|
|
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None,
|
|
|
|
{'Range': 'bytes=23-'})
|
|
|
|
handler.fast_forward(20)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(handler.backend_headers['Range'], 'bytes=43-')
|
2013-11-04 17:06:06 +00:00
|
|
|
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None,
|
|
|
|
{'Range': 'bytes=-100'})
|
|
|
|
handler.fast_forward(20)
|
2015-08-06 10:01:17 -05:00
|
|
|
self.assertEqual(handler.backend_headers['Range'], 'bytes=-80')
|
2016-04-15 16:08:26 -07:00
|
|
|
self.assertRaises(HTTPException,
|
|
|
|
handler.fast_forward, 100)
|
|
|
|
self.assertRaises(exceptions.RangeAlreadyComplete,
|
|
|
|
handler.fast_forward, 80)
|
|
|
|
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None,
|
|
|
|
{'Range': 'bytes=0-0'})
|
|
|
|
self.assertRaises(exceptions.RangeAlreadyComplete,
|
|
|
|
handler.fast_forward, 1)
|
2013-12-03 22:02:39 +00:00
|
|
|
|
Fix download resumption after getting no data.
When the proxy is handling an object GET response and an object server
fails to send data in a timely fashion, the proxy can pick up where it
left off with another object server; other than a pause in the
download, the client doesn't even know anything happened.
However, if the proxy received the GET response headers but no data,
it would resume at the wrong spot. In particular, for an N-byte
object, it would ask the second object server for the last N-1 bytes
(or equivalently, all but the first byte). For a replicated storage
policy, this would result in the client getting an abbreviated
download, while for an EC storage policy, the proxy would 500 after
trying to decode a fragment set with a bogus fragment in it.
This commit fixes the resumption logic to ask for all N bytes of the
object from the second object server.
Change-Id: Ib9e28c3dceaded1708e7a30844b534566c7a320c
2016-04-14 20:01:38 -07:00
|
|
|
def test_range_fast_forward_after_data_timeout(self):
|
|
|
|
req = Request.blank('/')
|
|
|
|
|
|
|
|
# We get a 200 and learn that it's a 1000-byte object, but receive 0
|
|
|
|
# bytes of data, so then we get a new node, fast_forward(0), and
|
|
|
|
# send out a new request. That new request must be for all 1000
|
|
|
|
# bytes.
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None, {})
|
|
|
|
handler.learn_size_from_content_range(0, 999, 1000)
|
|
|
|
handler.fast_forward(0)
|
|
|
|
self.assertEqual(handler.backend_headers['Range'], 'bytes=0-999')
|
|
|
|
|
|
|
|
# Same story as above, but a 1-byte object so we can have our byte
|
|
|
|
# indices be 0.
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None, {})
|
|
|
|
handler.learn_size_from_content_range(0, 0, 1)
|
|
|
|
handler.fast_forward(0)
|
|
|
|
self.assertEqual(handler.backend_headers['Range'], 'bytes=0-0')
|
|
|
|
|
|
|
|
# last 100 bytes
|
|
|
|
handler = GetOrHeadHandler(None, req, None, None, None, None,
|
|
|
|
{'Range': 'bytes=-100'})
|
|
|
|
handler.learn_size_from_content_range(900, 999, 1000)
|
|
|
|
handler.fast_forward(0)
|
|
|
|
self.assertEqual(handler.backend_headers['Range'], 'bytes=900-999')
|
|
|
|
|
2013-12-03 22:02:39 +00:00
|
|
|
def test_transfer_headers_with_sysmeta(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
good_hdrs = {'x-base-sysmeta-foo': 'ok',
|
|
|
|
'X-Base-sysmeta-Bar': 'also ok'}
|
|
|
|
bad_hdrs = {'x-base-sysmeta-': 'too short'}
|
|
|
|
hdrs = dict(good_hdrs)
|
|
|
|
hdrs.update(bad_hdrs)
|
|
|
|
dst_hdrs = HeaderKeyDict()
|
|
|
|
base.transfer_headers(hdrs, dst_hdrs)
|
|
|
|
self.assertEqual(HeaderKeyDict(good_hdrs), dst_hdrs)
|
|
|
|
|
|
|
|
def test_generate_request_headers(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
src_headers = {'x-remove-base-meta-owner': 'x',
|
|
|
|
'x-base-meta-size': '151M',
|
|
|
|
'new-owner': 'Kun'}
|
|
|
|
req = Request.blank('/v1/a/c/o', headers=src_headers)
|
|
|
|
dst_headers = base.generate_request_headers(req, transfer=True)
|
|
|
|
expected_headers = {'x-base-meta-owner': '',
|
2015-01-27 13:23:08 -07:00
|
|
|
'x-base-meta-size': '151M',
|
|
|
|
'connection': 'close'}
|
2015-06-24 09:36:37 +02:00
|
|
|
for k, v in expected_headers.items():
|
2013-12-03 22:02:39 +00:00
|
|
|
self.assertTrue(k in dst_headers)
|
|
|
|
self.assertEqual(v, dst_headers[k])
|
|
|
|
self.assertFalse('new-owner' in dst_headers)
|
|
|
|
|
|
|
|
def test_generate_request_headers_with_sysmeta(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
good_hdrs = {'x-base-sysmeta-foo': 'ok',
|
|
|
|
'X-Base-sysmeta-Bar': 'also ok'}
|
|
|
|
bad_hdrs = {'x-base-sysmeta-': 'too short'}
|
|
|
|
hdrs = dict(good_hdrs)
|
|
|
|
hdrs.update(bad_hdrs)
|
|
|
|
req = Request.blank('/v1/a/c/o', headers=hdrs)
|
|
|
|
dst_headers = base.generate_request_headers(req, transfer=True)
|
2015-06-24 09:36:37 +02:00
|
|
|
for k, v in good_hdrs.items():
|
2013-12-03 22:02:39 +00:00
|
|
|
self.assertTrue(k.lower() in dst_headers)
|
|
|
|
self.assertEqual(v, dst_headers[k.lower()])
|
2015-06-24 09:36:37 +02:00
|
|
|
for k, v in bad_hdrs.items():
|
2013-12-03 22:02:39 +00:00
|
|
|
self.assertFalse(k.lower() in dst_headers)
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
|
2015-10-22 10:14:29 -05:00
|
|
|
def test_generate_request_headers_with_no_orig_req(self):
|
|
|
|
base = Controller(self.app)
|
|
|
|
src_headers = {'x-remove-base-meta-owner': 'x',
|
|
|
|
'x-base-meta-size': '151M',
|
|
|
|
'new-owner': 'Kun'}
|
|
|
|
dst_headers = base.generate_request_headers(None,
|
|
|
|
additional=src_headers)
|
|
|
|
expected_headers = {'x-base-meta-size': '151M',
|
|
|
|
'connection': 'close'}
|
|
|
|
for k, v in expected_headers.items():
|
2015-12-15 15:49:42 +00:00
|
|
|
self.assertIn(k, dst_headers)
|
|
|
|
self.assertEqual(v, dst_headers[k])
|
2015-10-22 10:14:29 -05:00
|
|
|
self.assertEqual('', dst_headers['Referer'])
|
|
|
|
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
def test_client_chunk_size(self):
|
|
|
|
|
|
|
|
class TestSource(object):
|
|
|
|
def __init__(self, chunks):
|
|
|
|
self.chunks = list(chunks)
|
EC: support multiple ranges for GET requests
This commit lets clients receive multipart/byteranges responses (see
RFC 7233, Appendix A) for erasure-coded objects. Clients can already
do this for replicated objects, so this brings EC closer to feature
parity (ha!).
GetOrHeadHandler got a base class extracted from it that treats an
HTTP response as a sequence of byte-range responses. This way, it can
continue to yield whole fragments, not just N-byte pieces of the raw
HTTP response, since an N-byte piece of a multipart/byteranges
response is pretty much useless.
There are a couple of bonus fixes in here, too. For starters, download
resuming now works on multipart/byteranges responses. Before, it only
worked on 200 responses or 206 responses for a single byte
range. Also, BufferedHTTPResponse grew a readline() method.
Also, the MIME response for replicated objects got tightened up a
little. Before, it had some leading and trailing CRLFs which, while
allowed by RFC 7233, provide no benefit. Now, both replicated and EC
multipart/byteranges avoid extraneous bytes. This let me re-use the
Content-Length calculation in swob instead of having to either hack
around it or add extraneous whitespace to match.
Change-Id: I16fc65e0ec4e356706d327bdb02a3741e36330a0
2015-03-20 09:56:30 -07:00
|
|
|
self.status = 200
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
|
|
|
|
def read(self, _read_size):
|
|
|
|
if self.chunks:
|
|
|
|
return self.chunks.pop(0)
|
|
|
|
else:
|
|
|
|
return ''
|
|
|
|
|
EC: support multiple ranges for GET requests
This commit lets clients receive multipart/byteranges responses (see
RFC 7233, Appendix A) for erasure-coded objects. Clients can already
do this for replicated objects, so this brings EC closer to feature
parity (ha!).
GetOrHeadHandler got a base class extracted from it that treats an
HTTP response as a sequence of byte-range responses. This way, it can
continue to yield whole fragments, not just N-byte pieces of the raw
HTTP response, since an N-byte piece of a multipart/byteranges
response is pretty much useless.
There are a couple of bonus fixes in here, too. For starters, download
resuming now works on multipart/byteranges responses. Before, it only
worked on 200 responses or 206 responses for a single byte
range. Also, BufferedHTTPResponse grew a readline() method.
Also, the MIME response for replicated objects got tightened up a
little. Before, it had some leading and trailing CRLFs which, while
allowed by RFC 7233, provide no benefit. Now, both replicated and EC
multipart/byteranges avoid extraneous bytes. This let me re-use the
Content-Length calculation in swob instead of having to either hack
around it or add extraneous whitespace to match.
Change-Id: I16fc65e0ec4e356706d327bdb02a3741e36330a0
2015-03-20 09:56:30 -07:00
|
|
|
def getheader(self, header):
|
|
|
|
if header.lower() == "content-length":
|
|
|
|
return str(sum(len(c) for c in self.chunks))
|
|
|
|
|
|
|
|
def getheaders(self):
|
|
|
|
return [('content-length', self.getheader('content-length'))]
|
|
|
|
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
source = TestSource((
|
|
|
|
'abcd', '1234', 'abc', 'd1', '234abcd1234abcd1', '2'))
|
|
|
|
req = Request.blank('/v1/a/c/o')
|
|
|
|
node = {}
|
|
|
|
handler = GetOrHeadHandler(self.app, req, None, None, None, None, {},
|
|
|
|
client_chunk_size=8)
|
|
|
|
|
|
|
|
app_iter = handler._make_app_iter(req, node, source)
|
|
|
|
client_chunks = list(app_iter)
|
|
|
|
self.assertEqual(client_chunks, [
|
|
|
|
'abcd1234', 'abcd1234', 'abcd1234', 'abcd12'])
|
|
|
|
|
|
|
|
def test_client_chunk_size_resuming(self):
|
|
|
|
|
|
|
|
class TestSource(object):
|
|
|
|
def __init__(self, chunks):
|
|
|
|
self.chunks = list(chunks)
|
EC: support multiple ranges for GET requests
This commit lets clients receive multipart/byteranges responses (see
RFC 7233, Appendix A) for erasure-coded objects. Clients can already
do this for replicated objects, so this brings EC closer to feature
parity (ha!).
GetOrHeadHandler got a base class extracted from it that treats an
HTTP response as a sequence of byte-range responses. This way, it can
continue to yield whole fragments, not just N-byte pieces of the raw
HTTP response, since an N-byte piece of a multipart/byteranges
response is pretty much useless.
There are a couple of bonus fixes in here, too. For starters, download
resuming now works on multipart/byteranges responses. Before, it only
worked on 200 responses or 206 responses for a single byte
range. Also, BufferedHTTPResponse grew a readline() method.
Also, the MIME response for replicated objects got tightened up a
little. Before, it had some leading and trailing CRLFs which, while
allowed by RFC 7233, provide no benefit. Now, both replicated and EC
multipart/byteranges avoid extraneous bytes. This let me re-use the
Content-Length calculation in swob instead of having to either hack
around it or add extraneous whitespace to match.
Change-Id: I16fc65e0ec4e356706d327bdb02a3741e36330a0
2015-03-20 09:56:30 -07:00
|
|
|
self.status = 200
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
|
|
|
|
def read(self, _read_size):
|
|
|
|
if self.chunks:
|
|
|
|
chunk = self.chunks.pop(0)
|
|
|
|
if chunk is None:
|
|
|
|
raise exceptions.ChunkReadTimeout()
|
|
|
|
else:
|
|
|
|
return chunk
|
|
|
|
else:
|
|
|
|
return ''
|
|
|
|
|
EC: support multiple ranges for GET requests
This commit lets clients receive multipart/byteranges responses (see
RFC 7233, Appendix A) for erasure-coded objects. Clients can already
do this for replicated objects, so this brings EC closer to feature
parity (ha!).
GetOrHeadHandler got a base class extracted from it that treats an
HTTP response as a sequence of byte-range responses. This way, it can
continue to yield whole fragments, not just N-byte pieces of the raw
HTTP response, since an N-byte piece of a multipart/byteranges
response is pretty much useless.
There are a couple of bonus fixes in here, too. For starters, download
resuming now works on multipart/byteranges responses. Before, it only
worked on 200 responses or 206 responses for a single byte
range. Also, BufferedHTTPResponse grew a readline() method.
Also, the MIME response for replicated objects got tightened up a
little. Before, it had some leading and trailing CRLFs which, while
allowed by RFC 7233, provide no benefit. Now, both replicated and EC
multipart/byteranges avoid extraneous bytes. This let me re-use the
Content-Length calculation in swob instead of having to either hack
around it or add extraneous whitespace to match.
Change-Id: I16fc65e0ec4e356706d327bdb02a3741e36330a0
2015-03-20 09:56:30 -07:00
|
|
|
def getheader(self, header):
|
|
|
|
if header.lower() == "content-length":
|
|
|
|
return str(sum(len(c) for c in self.chunks
|
|
|
|
if c is not None))
|
|
|
|
|
|
|
|
def getheaders(self):
|
|
|
|
return [('content-length', self.getheader('content-length'))]
|
|
|
|
|
2016-02-01 18:06:54 +00:00
|
|
|
node = {'ip': '1.2.3.4', 'port': 6200, 'device': 'sda'}
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
|
|
|
|
source1 = TestSource(['abcd', '1234', 'abc', None])
|
|
|
|
source2 = TestSource(['efgh5678'])
|
|
|
|
req = Request.blank('/v1/a/c/o')
|
|
|
|
handler = GetOrHeadHandler(
|
|
|
|
self.app, req, 'Object', None, None, None, {},
|
|
|
|
client_chunk_size=8)
|
|
|
|
|
|
|
|
app_iter = handler._make_app_iter(req, node, source1)
|
|
|
|
with patch.object(handler, '_get_source_and_node',
|
|
|
|
lambda: (source2, node)):
|
|
|
|
client_chunks = list(app_iter)
|
|
|
|
self.assertEqual(client_chunks, ['abcd1234', 'efgh5678'])
|
|
|
|
|
2015-11-17 16:15:59 +09:00
|
|
|
def test_client_chunk_size_resuming_chunked(self):
|
|
|
|
|
|
|
|
class TestChunkedSource(object):
|
|
|
|
def __init__(self, chunks):
|
|
|
|
self.chunks = list(chunks)
|
|
|
|
self.status = 200
|
|
|
|
self.headers = {'transfer-encoding': 'chunked',
|
|
|
|
'content-type': 'text/plain'}
|
|
|
|
|
|
|
|
def read(self, _read_size):
|
|
|
|
if self.chunks:
|
|
|
|
chunk = self.chunks.pop(0)
|
|
|
|
if chunk is None:
|
|
|
|
raise exceptions.ChunkReadTimeout()
|
|
|
|
else:
|
|
|
|
return chunk
|
|
|
|
else:
|
|
|
|
return ''
|
|
|
|
|
|
|
|
def getheader(self, header):
|
|
|
|
return self.headers.get(header.lower())
|
|
|
|
|
|
|
|
def getheaders(self):
|
|
|
|
return self.headers
|
|
|
|
|
2016-02-01 18:06:54 +00:00
|
|
|
node = {'ip': '1.2.3.4', 'port': 6200, 'device': 'sda'}
|
2015-11-17 16:15:59 +09:00
|
|
|
|
|
|
|
source1 = TestChunkedSource(['abcd', '1234', 'abc', None])
|
|
|
|
source2 = TestChunkedSource(['efgh5678'])
|
|
|
|
req = Request.blank('/v1/a/c/o')
|
|
|
|
handler = GetOrHeadHandler(
|
|
|
|
self.app, req, 'Object', None, None, None, {},
|
|
|
|
client_chunk_size=8)
|
|
|
|
|
|
|
|
app_iter = handler._make_app_iter(req, node, source1)
|
|
|
|
with patch.object(handler, '_get_source_and_node',
|
|
|
|
lambda: (source2, node)):
|
|
|
|
client_chunks = list(app_iter)
|
|
|
|
self.assertEqual(client_chunks, ['abcd1234', 'efgh5678'])
|
|
|
|
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
def test_bytes_to_skip(self):
|
|
|
|
# if you start at the beginning, skip nothing
|
|
|
|
self.assertEqual(bytes_to_skip(1024, 0), 0)
|
|
|
|
|
|
|
|
# missed the first 10 bytes, so we've got 1014 bytes of partial
|
|
|
|
# record
|
|
|
|
self.assertEqual(bytes_to_skip(1024, 10), 1014)
|
|
|
|
|
|
|
|
# skipped some whole records first
|
|
|
|
self.assertEqual(bytes_to_skip(1024, 4106), 1014)
|
|
|
|
|
|
|
|
# landed on a record boundary
|
|
|
|
self.assertEqual(bytes_to_skip(1024, 1024), 0)
|
|
|
|
self.assertEqual(bytes_to_skip(1024, 2048), 0)
|
|
|
|
|
|
|
|
# big numbers
|
|
|
|
self.assertEqual(bytes_to_skip(2 ** 20, 2 ** 32), 0)
|
|
|
|
self.assertEqual(bytes_to_skip(2 ** 20, 2 ** 32 + 1), 2 ** 20 - 1)
|
|
|
|
self.assertEqual(bytes_to_skip(2 ** 20, 2 ** 32 + 2 ** 19), 2 ** 19)
|
|
|
|
|
|
|
|
# odd numbers
|
|
|
|
self.assertEqual(bytes_to_skip(123, 0), 0)
|
|
|
|
self.assertEqual(bytes_to_skip(123, 23), 100)
|
|
|
|
self.assertEqual(bytes_to_skip(123, 247), 122)
|
|
|
|
|
|
|
|
# prime numbers
|
|
|
|
self.assertEqual(bytes_to_skip(11, 7), 4)
|
|
|
|
self.assertEqual(bytes_to_skip(97, 7873823), 55)
|