2013-09-20 01:00:54 +08:00
|
|
|
# Copyright (c) 2010-2012 OpenStack Foundation
|
2013-09-07 16:29:15 +02:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
|
|
# implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
""" Swift tests """
|
|
|
|
|
2015-07-28 21:03:05 +05:30
|
|
|
from __future__ import print_function
|
2010-11-11 16:41:07 -06:00
|
|
|
import os
|
2012-04-30 16:38:15 -04:00
|
|
|
import copy
|
Adding StatsD logging to Swift.
Documentation, including a list of metrics reported and their semantics,
is in the Admin Guide in a new section, "Reporting Metrics to StatsD".
An optional "metric prefix" may be configured which will be prepended to
every metric name sent to StatsD.
Here is the rationale for doing a deep integration like this versus only
sending metrics to StatsD in middleware. It's the only way to report
some internal activities of Swift in a real-time manner. So to have one
way of reporting to StatsD and one place/style of configuration, even
some things (like, say, timing of PUT requests into the proxy-server)
which could be logged via middleware are consistently logged the same
way (deep integration via the logger delegate methods).
When log_statsd_host is configured, get_logger() injects a
swift.common.utils.StatsdClient object into the logger as
logger.statsd_client. Then a set of delegate methods on LogAdapter
either pass through to the StatsdClient object or become no-ops. This
allows StatsD logging to look like:
self.logger.increment('some.metric.here')
and do the right thing in all cases and with no messy conditional logic.
I wanted to use the pystatsd module for the StatsD client, but the
version on PyPi is lagging the git repo (and is missing both the prefix
functionality and timing_since() method). So I wrote my
swift.common.utils.StatsdClient. The interface is the same as
pystatsd.Client, but the code was written from scratch. It's pretty
simple, and the tests I added cover it. This also frees Swift from an
optional dependency on the pystatsd module, making this feature easier
to enable.
There's test coverage for the new code and all existing tests continue
to pass.
Refactored out _one_audit_pass() method in swift/account/auditor.py and
swift/container/auditor.py.
Fixed some misc. PEP8 violations.
Misc test cleanups and refactorings (particularly the way "fake logging"
is handled).
Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
|
|
|
import logging
|
2018-05-22 16:17:12 -05:00
|
|
|
import logging.handlers
|
2013-11-18 11:41:58 -08:00
|
|
|
import sys
|
2014-04-28 19:22:51 -07:00
|
|
|
from contextlib import contextmanager, closing
|
2021-10-20 12:30:09 -07:00
|
|
|
from collections import defaultdict
|
|
|
|
try:
|
|
|
|
from collections.abc import Iterable
|
|
|
|
except ImportError:
|
|
|
|
from collections import Iterable # py2
|
2015-03-31 22:35:37 -07:00
|
|
|
import itertools
|
2014-05-27 16:57:25 -07:00
|
|
|
from numbers import Number
|
2010-11-11 16:41:07 -06:00
|
|
|
from tempfile import NamedTemporaryFile
|
2013-08-30 21:37:07 -07:00
|
|
|
import time
|
2015-03-31 22:35:37 -07:00
|
|
|
import eventlet
|
2017-06-02 17:47:25 -07:00
|
|
|
from eventlet import greenpool, debug as eventlet_debug
|
2010-07-12 17:03:45 -05:00
|
|
|
from eventlet.green import socket
|
Add checksum to object extended attributes
Currently, our integrity checking for objects is pretty weak when it
comes to object metadata. If the extended attributes on a .data or
.meta file get corrupted in such a way that we can still unpickle it,
we don't have anything that detects that.
This could be especially bad with encrypted etags; if the encrypted
etag (X-Object-Sysmeta-Crypto-Etag or whatever it is) gets some bits
flipped, then we'll cheerfully decrypt the cipherjunk into plainjunk,
then send it to the client. Net effect is that the client sees a GET
response with an ETag that doesn't match the MD5 of the object *and*
Swift has no way of detecting and quarantining this object.
Note that, with an unencrypted object, if the ETag metadatum gets
mangled, then the object will be quarantined by the object server or
auditor, whichever notices first.
As part of this commit, I also ripped out some mocking of
getxattr/setxattr in tests. It appears to be there to allow unit tests
to run on systems where /tmp doesn't support xattrs. However, since
the mock is keyed off of inode number and inode numbers get re-used,
there's lots of leakage between different test runs. On a real FS,
unlinking a file and then creating a new one of the same name will
also reset the xattrs; this isn't the case with the mock.
The mock was pretty old; Ubuntu 12.04 and up all support xattrs in
/tmp, and recent Red Hat / CentOS releases do too. The xattr mock was
added in 2011; maybe it was to support Ubuntu Lucid Lynx?
Bonus: now you can pause a test with the debugger, inspect its files
in /tmp, and actually see the xattrs along with the data.
Since this patch now uses a real filesystem for testing filesystem
operations, tests are skipped if the underlying filesystem does not
support setting xattrs (eg tmpfs or more than 4k of xattrs on ext4).
References to "/tmp" have been replaced with calls to
tempfile.gettempdir(). This will allow setting the TMPDIR envvar in
test setup and getting an XFS filesystem instead of ext4 or tmpfs.
THIS PATCH SIGNIFICANTLY CHANGES TESTING ENVIRONMENTS
With this patch, every test environment will require TMPDIR to be
using a filesystem that supports at least 4k of extended attributes.
Neither ext4 nor tempfs support this. XFS is recommended.
So why all the SkipTests? Why not simply raise an error? We still need
the tests to run on the base image for OpenStack's CI system. Since
we were previously mocking out xattr, there wasn't a problem, but we
also weren't actually testing anything. This patch adds functionality
to validate xattr data, so we need to drop the mock.
`test.unit.skip_if_no_xattrs()` is also imported into `test.functional`
so that functional tests can import it from the functional test
namespace.
The related OpenStack CI infrastructure changes are made in
https://review.openstack.org/#/c/394600/.
Co-Authored-By: John Dickinson <me@not.mn>
Change-Id: I98a37c0d451f4960b7a12f648e4405c6c6716808
2016-06-30 16:52:58 -07:00
|
|
|
from tempfile import mkdtemp, mkstemp, gettempdir
|
2011-02-11 13:18:19 -06:00
|
|
|
from shutil import rmtree
|
2016-06-07 10:35:18 +00:00
|
|
|
import signal
|
2016-06-29 03:32:09 -05:00
|
|
|
import json
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
import random
|
Add checksum to object extended attributes
Currently, our integrity checking for objects is pretty weak when it
comes to object metadata. If the extended attributes on a .data or
.meta file get corrupted in such a way that we can still unpickle it,
we don't have anything that detects that.
This could be especially bad with encrypted etags; if the encrypted
etag (X-Object-Sysmeta-Crypto-Etag or whatever it is) gets some bits
flipped, then we'll cheerfully decrypt the cipherjunk into plainjunk,
then send it to the client. Net effect is that the client sees a GET
response with an ETag that doesn't match the MD5 of the object *and*
Swift has no way of detecting and quarantining this object.
Note that, with an unencrypted object, if the ETag metadatum gets
mangled, then the object will be quarantined by the object server or
auditor, whichever notices first.
As part of this commit, I also ripped out some mocking of
getxattr/setxattr in tests. It appears to be there to allow unit tests
to run on systems where /tmp doesn't support xattrs. However, since
the mock is keyed off of inode number and inode numbers get re-used,
there's lots of leakage between different test runs. On a real FS,
unlinking a file and then creating a new one of the same name will
also reset the xattrs; this isn't the case with the mock.
The mock was pretty old; Ubuntu 12.04 and up all support xattrs in
/tmp, and recent Red Hat / CentOS releases do too. The xattr mock was
added in 2011; maybe it was to support Ubuntu Lucid Lynx?
Bonus: now you can pause a test with the debugger, inspect its files
in /tmp, and actually see the xattrs along with the data.
Since this patch now uses a real filesystem for testing filesystem
operations, tests are skipped if the underlying filesystem does not
support setting xattrs (eg tmpfs or more than 4k of xattrs on ext4).
References to "/tmp" have been replaced with calls to
tempfile.gettempdir(). This will allow setting the TMPDIR envvar in
test setup and getting an XFS filesystem instead of ext4 or tmpfs.
THIS PATCH SIGNIFICANTLY CHANGES TESTING ENVIRONMENTS
With this patch, every test environment will require TMPDIR to be
using a filesystem that supports at least 4k of extended attributes.
Neither ext4 nor tempfs support this. XFS is recommended.
So why all the SkipTests? Why not simply raise an error? We still need
the tests to run on the base image for OpenStack's CI system. Since
we were previously mocking out xattr, there wasn't a problem, but we
also weren't actually testing anything. This patch adds functionality
to validate xattr data, so we need to drop the mock.
`test.unit.skip_if_no_xattrs()` is also imported into `test.functional`
so that functional tests can import it from the functional test
namespace.
The related OpenStack CI infrastructure changes are made in
https://review.openstack.org/#/c/394600/.
Co-Authored-By: John Dickinson <me@not.mn>
Change-Id: I98a37c0d451f4960b7a12f648e4405c6c6716808
2016-06-30 16:52:58 -07:00
|
|
|
import errno
|
|
|
|
import xattr
|
2019-10-14 14:03:16 -07:00
|
|
|
from io import BytesIO
|
2022-07-07 17:27:49 +10:00
|
|
|
from uuid import uuid4
|
Add checksum to object extended attributes
Currently, our integrity checking for objects is pretty weak when it
comes to object metadata. If the extended attributes on a .data or
.meta file get corrupted in such a way that we can still unpickle it,
we don't have anything that detects that.
This could be especially bad with encrypted etags; if the encrypted
etag (X-Object-Sysmeta-Crypto-Etag or whatever it is) gets some bits
flipped, then we'll cheerfully decrypt the cipherjunk into plainjunk,
then send it to the client. Net effect is that the client sees a GET
response with an ETag that doesn't match the MD5 of the object *and*
Swift has no way of detecting and quarantining this object.
Note that, with an unencrypted object, if the ETag metadatum gets
mangled, then the object will be quarantined by the object server or
auditor, whichever notices first.
As part of this commit, I also ripped out some mocking of
getxattr/setxattr in tests. It appears to be there to allow unit tests
to run on systems where /tmp doesn't support xattrs. However, since
the mock is keyed off of inode number and inode numbers get re-used,
there's lots of leakage between different test runs. On a real FS,
unlinking a file and then creating a new one of the same name will
also reset the xattrs; this isn't the case with the mock.
The mock was pretty old; Ubuntu 12.04 and up all support xattrs in
/tmp, and recent Red Hat / CentOS releases do too. The xattr mock was
added in 2011; maybe it was to support Ubuntu Lucid Lynx?
Bonus: now you can pause a test with the debugger, inspect its files
in /tmp, and actually see the xattrs along with the data.
Since this patch now uses a real filesystem for testing filesystem
operations, tests are skipped if the underlying filesystem does not
support setting xattrs (eg tmpfs or more than 4k of xattrs on ext4).
References to "/tmp" have been replaced with calls to
tempfile.gettempdir(). This will allow setting the TMPDIR envvar in
test setup and getting an XFS filesystem instead of ext4 or tmpfs.
THIS PATCH SIGNIFICANTLY CHANGES TESTING ENVIRONMENTS
With this patch, every test environment will require TMPDIR to be
using a filesystem that supports at least 4k of extended attributes.
Neither ext4 nor tempfs support this. XFS is recommended.
So why all the SkipTests? Why not simply raise an error? We still need
the tests to run on the base image for OpenStack's CI system. Since
we were previously mocking out xattr, there wasn't a problem, but we
also weren't actually testing anything. This patch adds functionality
to validate xattr data, so we need to drop the mock.
`test.unit.skip_if_no_xattrs()` is also imported into `test.functional`
so that functional tests can import it from the functional test
namespace.
The related OpenStack CI infrastructure changes are made in
https://review.openstack.org/#/c/394600/.
Co-Authored-By: John Dickinson <me@not.mn>
Change-Id: I98a37c0d451f4960b7a12f648e4405c6c6716808
2016-06-30 16:52:58 -07:00
|
|
|
|
2019-04-15 21:44:18 -07:00
|
|
|
import six
|
2018-05-22 16:17:12 -05:00
|
|
|
import six.moves.cPickle as pickle
|
|
|
|
from six.moves import range
|
|
|
|
from six.moves.http_client import HTTPException
|
2024-04-02 14:33:29 -07:00
|
|
|
from six.moves import configparser
|
2018-05-22 16:17:12 -05:00
|
|
|
|
2023-06-23 13:21:02 +01:00
|
|
|
from swift.common import storage_policy, swob, utils, exceptions
|
2022-01-06 08:45:16 -08:00
|
|
|
from swift.common.memcached import MemcacheConnectionError
|
2018-11-29 01:31:13 -06:00
|
|
|
from swift.common.storage_policy import (StoragePolicy, ECStoragePolicy,
|
|
|
|
VALID_EC_TYPES)
|
2024-01-04 05:06:32 +00:00
|
|
|
from swift.common.utils import Timestamp, md5, close_if_possible
|
2012-04-30 16:38:15 -04:00
|
|
|
from test import get_config
|
2021-01-22 14:21:23 -06:00
|
|
|
from test.debug_logger import FakeLogger
|
2016-03-02 10:28:51 +00:00
|
|
|
from swift.common.header_key_dict import HeaderKeyDict
|
2017-06-20 11:17:33 +01:00
|
|
|
from swift.common.ring import Ring, RingData, RingBuilder
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
from swift.obj import server
|
2018-11-29 01:31:13 -06:00
|
|
|
|
2014-03-17 12:18:25 -07:00
|
|
|
import functools
|
2014-04-28 19:22:51 -07:00
|
|
|
from gzip import GzipFile
|
|
|
|
import mock as mocklib
|
2015-03-31 22:35:37 -07:00
|
|
|
import inspect
|
2023-02-16 23:57:08 -08:00
|
|
|
from unittest import SkipTest
|
2015-03-31 22:35:37 -07:00
|
|
|
|
2020-04-03 10:44:25 -07:00
|
|
|
|
2020-09-11 16:28:11 -04:00
|
|
|
EMPTY_ETAG = md5(usedforsecurity=False).hexdigest()
|
2014-03-17 12:18:25 -07:00
|
|
|
|
2015-01-08 20:29:47 -08:00
|
|
|
# try not to import this module from swift
|
|
|
|
if not os.path.basename(sys.argv[0]).startswith('swift'):
|
|
|
|
# never patch HASH_PATH_SUFFIX AGAIN!
|
2016-11-23 10:14:21 -08:00
|
|
|
utils.HASH_PATH_SUFFIX = b'endcap'
|
2015-01-08 20:29:47 -08:00
|
|
|
|
2014-03-17 12:18:25 -07:00
|
|
|
|
2015-10-23 13:16:33 +00:00
|
|
|
EC_TYPE_PREFERENCE = [
|
|
|
|
'liberasurecode_rs_vand',
|
|
|
|
'jerasure_rs_vand',
|
|
|
|
]
|
|
|
|
for eclib_name in EC_TYPE_PREFERENCE:
|
|
|
|
if eclib_name in VALID_EC_TYPES:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
raise SystemExit('ERROR: unable to find suitable PyECLib type'
|
|
|
|
' (none of %r found in %r)' % (
|
|
|
|
EC_TYPE_PREFERENCE,
|
|
|
|
VALID_EC_TYPES,
|
|
|
|
))
|
|
|
|
DEFAULT_TEST_EC_TYPE = eclib_name
|
|
|
|
|
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
def patch_policies(thing_or_policies=None, legacy_only=False,
|
|
|
|
with_ec_default=False, fake_ring_args=None):
|
|
|
|
if isinstance(thing_or_policies, (
|
|
|
|
Iterable, storage_policy.StoragePolicyCollection)):
|
|
|
|
return PatchPolicies(thing_or_policies, fake_ring_args=fake_ring_args)
|
|
|
|
|
2014-03-17 12:18:25 -07:00
|
|
|
if legacy_only:
|
2015-03-31 22:35:37 -07:00
|
|
|
default_policies = [
|
|
|
|
StoragePolicy(0, name='legacy', is_default=True),
|
|
|
|
]
|
|
|
|
default_ring_args = [{}]
|
|
|
|
elif with_ec_default:
|
|
|
|
default_policies = [
|
|
|
|
ECStoragePolicy(0, name='ec', is_default=True,
|
2015-10-23 13:16:33 +00:00
|
|
|
ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=10,
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
ec_nparity=4, ec_segment_size=4096),
|
2015-03-31 22:35:37 -07:00
|
|
|
StoragePolicy(1, name='unu'),
|
|
|
|
]
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
default_ring_args = [{'replicas': 14}, {}]
|
2014-03-17 12:18:25 -07:00
|
|
|
else:
|
2014-06-23 16:01:02 -07:00
|
|
|
default_policies = [
|
2015-03-31 22:35:37 -07:00
|
|
|
StoragePolicy(0, name='nulo', is_default=True),
|
|
|
|
StoragePolicy(1, name='unu'),
|
2014-06-23 16:01:02 -07:00
|
|
|
]
|
2015-03-31 22:35:37 -07:00
|
|
|
default_ring_args = [{}, {}]
|
2014-03-17 12:18:25 -07:00
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
fake_ring_args = fake_ring_args or default_ring_args
|
|
|
|
decorator = PatchPolicies(default_policies, fake_ring_args=fake_ring_args)
|
2014-03-17 12:18:25 -07:00
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
if not thing_or_policies:
|
|
|
|
return decorator
|
2014-03-17 12:18:25 -07:00
|
|
|
else:
|
2015-03-31 22:35:37 -07:00
|
|
|
# it's a thing, we return the wrapped thing instead of the decorator
|
|
|
|
return decorator(thing_or_policies)
|
2014-03-17 12:18:25 -07:00
|
|
|
|
|
|
|
|
|
|
|
class PatchPolicies(object):
|
|
|
|
"""
|
|
|
|
Why not mock.patch? In my case, when used as a decorator on the class it
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
seemed to patch setUp at the wrong time (i.e. in setUp the global wasn't
|
2014-03-17 12:18:25 -07:00
|
|
|
patched yet)
|
|
|
|
"""
|
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
def __init__(self, policies, fake_ring_args=None):
|
2014-03-17 12:18:25 -07:00
|
|
|
if isinstance(policies, storage_policy.StoragePolicyCollection):
|
|
|
|
self.policies = policies
|
|
|
|
else:
|
|
|
|
self.policies = storage_policy.StoragePolicyCollection(policies)
|
2015-03-31 22:35:37 -07:00
|
|
|
self.fake_ring_args = fake_ring_args or [None] * len(self.policies)
|
|
|
|
|
|
|
|
def _setup_rings(self):
|
|
|
|
"""
|
|
|
|
Our tests tend to use the policies rings like their own personal
|
|
|
|
playground - which can be a problem in the particular case of a
|
|
|
|
patched TestCase class where the FakeRing objects are scoped in the
|
|
|
|
call to the patch_policies wrapper outside of the TestCase instance
|
|
|
|
which can lead to some bled state.
|
|
|
|
|
|
|
|
To help tests get better isolation without having to think about it,
|
|
|
|
here we're capturing the args required to *build* a new FakeRing
|
|
|
|
instances so we can ensure each test method gets a clean ring setup.
|
|
|
|
|
|
|
|
The TestCase can always "tweak" these fresh rings in setUp - or if
|
|
|
|
they'd prefer to get the same "reset" behavior with custom FakeRing's
|
|
|
|
they can pass in their own fake_ring_args to patch_policies instead of
|
|
|
|
setting the object_ring on the policy definitions.
|
|
|
|
"""
|
|
|
|
for policy, fake_ring_arg in zip(self.policies, self.fake_ring_args):
|
|
|
|
if fake_ring_arg is not None:
|
|
|
|
policy.object_ring = FakeRing(**fake_ring_arg)
|
2014-03-17 12:18:25 -07:00
|
|
|
|
|
|
|
def __call__(self, thing):
|
|
|
|
if isinstance(thing, type):
|
|
|
|
return self._patch_class(thing)
|
|
|
|
else:
|
|
|
|
return self._patch_method(thing)
|
|
|
|
|
|
|
|
def _patch_class(self, cls):
|
2015-03-31 22:35:37 -07:00
|
|
|
"""
|
|
|
|
Creating a new class that inherits from decorated class is the more
|
|
|
|
common way I've seen class decorators done - but it seems to cause
|
|
|
|
infinite recursion when super is called from inside methods in the
|
|
|
|
decorated class.
|
|
|
|
"""
|
2014-03-17 12:18:25 -07:00
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
orig_setUp = cls.setUp
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
|
|
|
|
def unpatch_cleanup(cls_self):
|
|
|
|
if cls_self._policies_patched:
|
2021-01-13 12:42:19 -08:00
|
|
|
self.__exit__(None, None, None)
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
cls_self._policies_patched = False
|
2014-03-17 12:18:25 -07:00
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
def setUp(cls_self):
|
|
|
|
if not getattr(cls_self, '_policies_patched', False):
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
self.__enter__()
|
2015-03-31 22:35:37 -07:00
|
|
|
cls_self._policies_patched = True
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
cls_self.addCleanup(unpatch_cleanup, cls_self)
|
2015-03-31 22:35:37 -07:00
|
|
|
orig_setUp(cls_self)
|
2014-03-17 12:18:25 -07:00
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
cls.setUp = setUp
|
|
|
|
|
|
|
|
return cls
|
2014-03-17 12:18:25 -07:00
|
|
|
|
|
|
|
def _patch_method(self, f):
|
|
|
|
@functools.wraps(f)
|
|
|
|
def mywrapper(*args, **kwargs):
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
with self:
|
2014-03-17 12:18:25 -07:00
|
|
|
return f(*args, **kwargs)
|
|
|
|
return mywrapper
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
self._orig_POLICIES = storage_policy._POLICIES
|
|
|
|
storage_policy._POLICIES = self.policies
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
try:
|
|
|
|
self._setup_rings()
|
|
|
|
except: # noqa
|
2021-01-13 12:42:19 -08:00
|
|
|
self.__exit__(None, None, None)
|
Clean up how PatchPolicies works
We've got these lovely __enter__ and __exit__ methods; let's use them!
Note that this also changes how we patch classes' setUp methods so we
don't set self._orig_POLICIES when the class is already patched. I
hope this may fix some sporadic failures that include tracebacks
that look like
proxy ERROR: ERROR 500 Traceback (most recent call last):
File ".../swift/obj/server.py", line 1105, in __call__
res = getattr(self, req.method)(req)
File ".../swift/common/utils.py", line 1626, in _timing_stats
resp = func(ctrl, *args, **kwargs)
File ".../swift/obj/server.py", line 880, in GET
policy=policy, frag_prefs=frag_prefs)
File ".../swift/obj/server.py", line 211, in get_diskfile
return self._diskfile_router[policy].get_diskfile(
File ".../swift/obj/diskfile.py", line 555, in __getitem__
return self.policy_to_manager[policy]
KeyError: ECStoragePolicy(...)
... and try to unpatch more gracefully with TestCase.addCleanup
Change-Id: Iaa3d42ec21758b0707155878a645e665aa36696c
2017-04-26 17:13:37 -07:00
|
|
|
raise
|
2014-03-17 12:18:25 -07:00
|
|
|
|
|
|
|
def __exit__(self, *args):
|
|
|
|
storage_policy._POLICIES = self._orig_POLICIES
|
2010-07-12 17:03:45 -05:00
|
|
|
|
2013-08-31 22:36:58 -04:00
|
|
|
|
2014-04-28 19:22:51 -07:00
|
|
|
class FakeRing(Ring):
|
2013-03-30 15:55:29 +03:00
|
|
|
|
2014-11-13 16:40:05 -08:00
|
|
|
def __init__(self, replicas=3, max_more_nodes=0, part_power=0,
|
2021-07-06 16:32:08 +10:00
|
|
|
base_port=1000, separate_replication=False,
|
|
|
|
next_part_power=None, reload_time=15):
|
2020-03-26 15:32:42 -04:00
|
|
|
self.serialized_path = '/foo/bar/object.ring.gz'
|
2014-11-13 16:40:05 -08:00
|
|
|
self._base_port = base_port
|
|
|
|
self.max_more_nodes = max_more_nodes
|
|
|
|
self._part_shift = 32 - part_power
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
self._init_device_char()
|
2020-06-15 17:09:15 -07:00
|
|
|
self.separate_replication = separate_replication
|
2013-03-30 15:55:29 +03:00
|
|
|
# 9 total nodes (6 more past the initial 3) is the cap, no matter if
|
|
|
|
# this is set higher, or R^2 for R replicas
|
2021-07-06 16:32:08 +10:00
|
|
|
self.reload_time = reload_time
|
2014-04-28 19:22:51 -07:00
|
|
|
self.set_replicas(replicas)
|
2021-07-06 16:32:08 +10:00
|
|
|
self._next_part_power = next_part_power
|
2014-04-28 19:22:51 -07:00
|
|
|
self._reload()
|
|
|
|
|
2017-06-02 17:47:25 -07:00
|
|
|
def has_changed(self):
|
|
|
|
"""
|
|
|
|
The real implementation uses getmtime on the serialized_path attribute,
|
|
|
|
which doesn't exist on our fake and relies on the implementation of
|
|
|
|
_reload which we override. So ... just NOOPE.
|
|
|
|
"""
|
|
|
|
return False
|
|
|
|
|
2014-04-28 19:22:51 -07:00
|
|
|
def _reload(self):
|
|
|
|
self._rtime = time.time()
|
|
|
|
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
@property
|
|
|
|
def device_char(self):
|
|
|
|
return next(self._device_char_iter)
|
|
|
|
|
|
|
|
def _init_device_char(self):
|
|
|
|
self._device_char_iter = itertools.cycle(
|
|
|
|
['sd%s' % chr(ord('a') + x) for x in range(26)])
|
|
|
|
|
2018-12-03 14:22:59 -08:00
|
|
|
def add_node(self, dev):
|
|
|
|
# round trip through json to ensure unicode like real rings
|
|
|
|
self._devs.append(json.loads(json.dumps(dev)))
|
|
|
|
|
2013-03-30 15:55:29 +03:00
|
|
|
def set_replicas(self, replicas):
|
|
|
|
self.replicas = replicas
|
2014-04-28 19:22:51 -07:00
|
|
|
self._devs = []
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
self._init_device_char()
|
2014-04-28 19:22:51 -07:00
|
|
|
for x in range(self.replicas):
|
|
|
|
ip = '10.0.0.%s' % x
|
2014-11-13 16:40:05 -08:00
|
|
|
port = self._base_port + x
|
2020-06-15 17:09:15 -07:00
|
|
|
if self.separate_replication:
|
|
|
|
repl_ip = '10.0.1.%s' % x
|
|
|
|
repl_port = port + 100
|
|
|
|
else:
|
|
|
|
repl_ip, repl_port = ip, port
|
2018-12-03 14:22:59 -08:00
|
|
|
dev = {
|
2014-04-28 19:22:51 -07:00
|
|
|
'ip': ip,
|
2020-06-15 17:09:15 -07:00
|
|
|
'replication_ip': repl_ip,
|
2014-04-28 19:22:51 -07:00
|
|
|
'port': port,
|
2020-06-15 17:09:15 -07:00
|
|
|
'replication_port': repl_port,
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
'device': self.device_char,
|
2014-04-28 19:22:51 -07:00
|
|
|
'zone': x % 3,
|
|
|
|
'region': x % 2,
|
|
|
|
'id': x,
|
2022-07-21 12:32:27 +10:00
|
|
|
'weight': 1,
|
2018-12-03 14:22:59 -08:00
|
|
|
}
|
|
|
|
self.add_node(dev)
|
2013-03-30 15:55:29 +03:00
|
|
|
|
|
|
|
@property
|
|
|
|
def replica_count(self):
|
|
|
|
return self.replicas
|
|
|
|
|
2014-04-28 19:22:51 -07:00
|
|
|
def _get_part_nodes(self, part):
|
Foundational support for PUT and GET of erasure-coded objects
This commit makes it possible to PUT an object into Swift and have it
stored using erasure coding instead of replication, and also to GET
the object back from Swift at a later time.
This works by splitting the incoming object into a number of segments,
erasure-coding each segment in turn to get fragments, then
concatenating the fragments into fragment archives. Segments are 1 MiB
in size, except the last, which is between 1 B and 1 MiB.
+====================================================================+
| object data |
+====================================================================+
|
+------------------------+----------------------+
| | |
v v v
+===================+ +===================+ +==============+
| segment 1 | | segment 2 | ... | segment N |
+===================+ +===================+ +==============+
| |
| |
v v
/=========\ /=========\
| pyeclib | | pyeclib | ...
\=========/ \=========/
| |
| |
+--> fragment A-1 +--> fragment A-2
| |
| |
| |
| |
| |
+--> fragment B-1 +--> fragment B-2
| |
| |
... ...
Then, object server A gets the concatenation of fragment A-1, A-2,
..., A-N, so its .data file looks like this (called a "fragment archive"):
+=====================================================================+
| fragment A-1 | fragment A-2 | ... | fragment A-N |
+=====================================================================+
Since this means that the object server never sees the object data as
the client sent it, we have to do a few things to ensure data
integrity.
First, the proxy has to check the Etag if the client provided it; the
object server can't do it since the object server doesn't see the raw
data.
Second, if the client does not provide an Etag, the proxy computes it
and uses the MIME-PUT mechanism to provide it to the object servers
after the object body. Otherwise, the object would not have an Etag at
all.
Third, the proxy computes the MD5 of each fragment archive and sends
it to the object server using the MIME-PUT mechanism. With replicated
objects, the proxy checks that the Etags from all the object servers
match, and if they don't, returns a 500 to the client. This mitigates
the risk of data corruption in one of the proxy --> object connections,
and signals to the client when it happens. With EC objects, we can't
use that same mechanism, so we must send the checksum with each
fragment archive to get comparable protection.
On the GET path, the inverse happens: the proxy connects to a bunch of
object servers (M of them, for an M+K scheme), reads one fragment at a
time from each fragment archive, decodes those fragments into a
segment, and serves the segment to the client.
When an object server dies partway through a GET response, any
partially-fetched fragment is discarded, the resumption point is wound
back to the nearest fragment boundary, and the GET is retried with the
next object server.
GET requests for a single byterange work; GET requests for multiple
byteranges do not.
There are a number of things _not_ included in this commit. Some of
them are listed here:
* multi-range GET
* deferred cleanup of old .data files
* durability (daemon to reconstruct missing archives)
Co-Authored-By: Alistair Coles <alistair.coles@hp.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com>
Co-Authored-By: Paul Luse <paul.e.luse@intel.com>
Co-Authored-By: Christian Schwede <christian.schwede@enovance.com>
Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com>
Change-Id: I9c13c03616489f8eab7dcd7c5f21237ed4cb6fd2
2014-10-22 13:18:34 -07:00
|
|
|
return [dict(node, index=i) for i, node in enumerate(list(self._devs))]
|
2013-03-30 15:55:29 +03:00
|
|
|
|
2013-06-13 11:24:29 -07:00
|
|
|
def get_more_nodes(self, part):
|
2019-02-04 15:46:40 -06:00
|
|
|
index_counter = itertools.count()
|
Fix missing container update
At PUT object request, proxy server makes backend headers (e.g.
X-Container-Partition) which help object-servers to determine
the container-server they should update. In addition, the backend
headers are created as many as the number of container replicas.
(i.e. 3 replica in container ring, 3 backend headers will be created)
On EC case, Swift fans out fragment archives to backend object-servers.
Basically the number of fragment archives will be more than the container
replica number and proxy-server assumes a request as success when quorum
number of object-server succeeded to store. That would cause to make an
orphaned object which is stored but not container updated.
For example, assuming k=10, m=4, container replica=3 case:
Assuming, proxy-server attempts to make 14 backend streams but
unfortunately first 3 nodes returns 507 (disk failure) and then
the Swift doesn't have any other disks.
In the case, proxy keeps 11 backend streams to store and current Swift
assumes it as sufficient because it is more than or equals quorum (right
now k+1 is sufficient i.e. 11 backend streams are enough to store)
However, in the case, the 11 streams doesn't have the container update
header so that the request will succeed but container will be never updated.
This patch allows to extract container updates up to object quorum_size
+ 1 to more nodes to ensure the updates. This approach sacrifices the
container update cost a bit because duplicated updates will be there but
quorum sizes + 1 seems reasonable (even if it's reaplicated case) to pay
to ensure that instead of whole objects incude the update headers.
Now Swift will work like as follows:
For example:
k=10, m=4, qurum_size=11 (k+1), 3 replica for container.
CU: container update
CA: commit ack
That result in like as
CU CU CU CU CU CU CU CU CU CU CU CU
[507, 507, 507, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201]
CA CA CA CA CA
In this case, at least 3 container updates are saved.
For another example:
7 replicated objects, qurum_size=4 (7//2+1), 3 replica for container.
CU: container update
CA: commit ack (201s for successful PUT on replicated)
CU CU CU CU CU
[507, 507, 507, 201, 201, 201, 201]
CA CA CA CA
In this replicated case, at least 2 container updates are saved.
Cleaned up some unit tests so that modifying policies doesn't leak
between tests.
Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Sam Merritt <sam@swiftstack.com>
Closes-Bug: #1460920
Change-Id: I04132858f44b42ee7ecf3b7994cb22a19d001d70
2015-05-22 16:53:12 -07:00
|
|
|
for x in range(self.replicas, (self.replicas + self.max_more_nodes)):
|
2020-06-15 17:09:15 -07:00
|
|
|
ip = '10.0.0.%s' % x
|
|
|
|
port = self._base_port + x
|
|
|
|
if self.separate_replication:
|
|
|
|
repl_ip = '10.0.1.%s' % x
|
|
|
|
repl_port = port + 100
|
|
|
|
else:
|
|
|
|
repl_ip, repl_port = ip, port
|
|
|
|
yield {'ip': ip,
|
|
|
|
'replication_ip': repl_ip,
|
|
|
|
'port': port,
|
|
|
|
'replication_port': repl_port,
|
2013-06-13 11:24:29 -07:00
|
|
|
'device': 'sda',
|
|
|
|
'zone': x % 3,
|
|
|
|
'region': x % 2,
|
2019-02-04 15:46:40 -06:00
|
|
|
'id': x,
|
|
|
|
'handoff_index': next(index_counter)}
|
2013-03-30 15:55:29 +03:00
|
|
|
|
|
|
|
|
2014-04-28 19:22:51 -07:00
|
|
|
def write_fake_ring(path, *devs):
|
|
|
|
"""
|
|
|
|
Pretty much just a two node, two replica, 2 part power ring...
|
|
|
|
"""
|
|
|
|
dev1 = {'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1',
|
2016-02-01 18:06:54 +00:00
|
|
|
'port': 6200}
|
2019-02-08 09:36:35 -08:00
|
|
|
dev2 = {'id': 1, 'zone': 0, 'device': 'sdb1', 'ip': '127.0.0.1',
|
2016-02-01 18:06:54 +00:00
|
|
|
'port': 6200}
|
2014-04-28 19:22:51 -07:00
|
|
|
|
|
|
|
dev1_updates, dev2_updates = devs or ({}, {})
|
|
|
|
|
|
|
|
dev1.update(dev1_updates)
|
|
|
|
dev2.update(dev2_updates)
|
|
|
|
|
|
|
|
replica2part2dev_id = [[0, 1, 0, 1], [1, 0, 1, 0]]
|
|
|
|
devs = [dev1, dev2]
|
|
|
|
part_shift = 30
|
|
|
|
with closing(GzipFile(path, 'wb')) as f:
|
|
|
|
pickle.dump(RingData(replica2part2dev_id, devs, part_shift), f)
|
|
|
|
|
|
|
|
|
2017-06-20 11:17:33 +01:00
|
|
|
def write_stub_builder(tmpdir, region=1, name=''):
|
|
|
|
"""
|
|
|
|
Pretty much just a three node, three replica, 8 part power builder...
|
|
|
|
|
|
|
|
:param tmpdir: a place to write the builder, be sure to clean it up!
|
|
|
|
:param region: an integer, fills in region and ip
|
|
|
|
:param name: the name of the builder (i.e. <name>.builder)
|
|
|
|
"""
|
|
|
|
name = name or str(region)
|
|
|
|
replicas = 3
|
|
|
|
builder = RingBuilder(8, replicas, 1)
|
|
|
|
for i in range(replicas):
|
|
|
|
dev = {'weight': 100,
|
|
|
|
'region': '%d' % region,
|
|
|
|
'zone': '1',
|
|
|
|
'ip': '10.0.0.%d' % region,
|
|
|
|
'port': '3600',
|
|
|
|
'device': 'sdb%d' % i}
|
|
|
|
builder.add_dev(dev)
|
|
|
|
builder.rebalance()
|
|
|
|
builder_file = os.path.join(tmpdir, '%s.builder' % name)
|
|
|
|
builder.save(builder_file)
|
|
|
|
return builder, builder_file
|
|
|
|
|
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
class FabricatedRing(Ring):
|
|
|
|
"""
|
|
|
|
When a FakeRing just won't do - you can fabricate one to meet
|
|
|
|
your tests needs.
|
|
|
|
"""
|
|
|
|
|
2016-02-01 18:06:54 +00:00
|
|
|
def __init__(self, replicas=6, devices=8, nodes=4, port=6200,
|
2015-03-31 22:35:37 -07:00
|
|
|
part_power=4):
|
|
|
|
self.devices = devices
|
|
|
|
self.nodes = nodes
|
|
|
|
self.port = port
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
self.replicas = replicas
|
2016-07-04 18:21:54 +02:00
|
|
|
self._part_shift = 32 - part_power
|
2015-03-31 22:35:37 -07:00
|
|
|
self._reload()
|
|
|
|
|
2019-02-04 15:46:40 -06:00
|
|
|
def has_changed(self):
|
|
|
|
return False
|
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
def _reload(self, *args, **kwargs):
|
|
|
|
self._rtime = time.time() * 2
|
|
|
|
if hasattr(self, '_replica2part2dev_id'):
|
|
|
|
return
|
|
|
|
self._devs = [{
|
|
|
|
'region': 1,
|
|
|
|
'zone': 1,
|
|
|
|
'weight': 1.0,
|
|
|
|
'id': i,
|
|
|
|
'device': 'sda%d' % i,
|
|
|
|
'ip': '10.0.0.%d' % (i % self.nodes),
|
|
|
|
'replication_ip': '10.0.0.%d' % (i % self.nodes),
|
|
|
|
'port': self.port,
|
|
|
|
'replication_port': self.port,
|
|
|
|
} for i in range(self.devices)]
|
|
|
|
|
|
|
|
self._replica2part2dev_id = [
|
|
|
|
[None] * 2 ** self.part_power
|
|
|
|
for i in range(self.replicas)
|
|
|
|
]
|
|
|
|
dev_ids = itertools.cycle(range(self.devices))
|
|
|
|
for p in range(2 ** self.part_power):
|
|
|
|
for r in range(self.replicas):
|
|
|
|
self._replica2part2dev_id[r][p] = next(dev_ids)
|
2019-02-04 15:46:40 -06:00
|
|
|
self._update_bookkeeping()
|
2015-03-31 22:35:37 -07:00
|
|
|
|
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
def track(f):
|
|
|
|
def wrapper(self, *a, **kw):
|
|
|
|
self.calls.append(getattr(mocklib.call, f.__name__)(*a, **kw))
|
|
|
|
return f(self, *a, **kw)
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
2013-03-30 15:55:29 +03:00
|
|
|
class FakeMemcache(object):
|
|
|
|
|
2022-04-26 16:12:49 -07:00
|
|
|
def __init__(self, error_on_set=None, error_on_get=None):
|
2013-03-30 15:55:29 +03:00
|
|
|
self.store = {}
|
2023-11-17 12:12:51 +00:00
|
|
|
self.times = {}
|
2020-11-03 11:08:56 +00:00
|
|
|
self.calls = []
|
2022-01-06 08:45:16 -08:00
|
|
|
self.error_on_incr = False
|
2022-04-26 16:12:49 -07:00
|
|
|
self.error_on_get = error_on_get or []
|
|
|
|
self.error_on_set = error_on_set or []
|
2022-01-06 08:45:16 -08:00
|
|
|
self.init_incr_return_neg = False
|
2020-11-03 11:08:56 +00:00
|
|
|
|
|
|
|
def clear_calls(self):
|
|
|
|
del self.calls[:]
|
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
@track
|
2022-04-26 16:12:49 -07:00
|
|
|
def get(self, key, raise_on_error=False):
|
|
|
|
if self.error_on_get and self.error_on_get.pop(0):
|
|
|
|
if raise_on_error:
|
|
|
|
raise MemcacheConnectionError()
|
2013-03-30 15:55:29 +03:00
|
|
|
return self.store.get(key)
|
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
@property
|
2013-03-30 15:55:29 +03:00
|
|
|
def keys(self):
|
2022-01-06 10:10:40 -08:00
|
|
|
return self.store.keys
|
2013-03-30 15:55:29 +03:00
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
@track
|
2022-04-26 16:12:49 -07:00
|
|
|
def set(self, key, value, serialize=True, time=0, raise_on_error=False):
|
|
|
|
if self.error_on_set and self.error_on_set.pop(0):
|
|
|
|
if raise_on_error:
|
|
|
|
raise MemcacheConnectionError()
|
2022-01-06 08:45:16 -08:00
|
|
|
if serialize:
|
|
|
|
value = json.loads(json.dumps(value))
|
|
|
|
else:
|
|
|
|
assert isinstance(value, (str, bytes))
|
2013-03-30 15:55:29 +03:00
|
|
|
self.store[key] = value
|
2023-11-17 12:12:51 +00:00
|
|
|
self.times[key] = time
|
2013-03-30 15:55:29 +03:00
|
|
|
return True
|
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
@track
|
2022-01-06 08:45:16 -08:00
|
|
|
def incr(self, key, delta=1, time=0):
|
|
|
|
if self.error_on_incr:
|
|
|
|
raise MemcacheConnectionError('Memcache restarting')
|
|
|
|
if self.init_incr_return_neg:
|
|
|
|
# simulate initial hit, force reset of memcache
|
|
|
|
self.init_incr_return_neg = False
|
|
|
|
return -10000000
|
|
|
|
self.store[key] = int(self.store.setdefault(key, 0)) + delta
|
|
|
|
if self.store[key] < 0:
|
|
|
|
self.store[key] = 0
|
2013-03-30 15:55:29 +03:00
|
|
|
return self.store[key]
|
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
# tracked via incr()
|
2022-01-06 08:45:16 -08:00
|
|
|
def decr(self, key, delta=1, time=0):
|
|
|
|
return self.incr(key, delta=-delta, time=time)
|
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
@track
|
2013-03-30 15:55:29 +03:00
|
|
|
def delete(self, key):
|
|
|
|
try:
|
|
|
|
del self.store[key]
|
2023-11-17 12:12:51 +00:00
|
|
|
del self.times[key]
|
2013-03-30 15:55:29 +03:00
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
return True
|
|
|
|
|
2020-11-03 11:08:56 +00:00
|
|
|
def delete_all(self):
|
|
|
|
self.store.clear()
|
2023-11-17 12:12:51 +00:00
|
|
|
self.times.clear()
|
2020-11-03 11:08:56 +00:00
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
|
2022-01-06 10:10:40 -08:00
|
|
|
# This decorator only makes sense in the context of FakeMemcache;
|
|
|
|
# may as well clean it up now
|
|
|
|
del track
|
|
|
|
|
|
|
|
|
2020-12-29 15:09:24 +00:00
|
|
|
class FakeIterable(object):
|
|
|
|
def __init__(self, values):
|
|
|
|
self.next_call_count = 0
|
|
|
|
self.close_call_count = 0
|
|
|
|
self.values = iter(values)
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __next__(self):
|
|
|
|
self.next_call_count += 1
|
|
|
|
return next(self.values)
|
|
|
|
|
|
|
|
next = __next__ # py2
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self.close_call_count += 1
|
|
|
|
|
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
def readuntil2crlfs(fd):
|
2018-09-10 16:34:50 -06:00
|
|
|
rv = b''
|
|
|
|
lc = b''
|
2010-07-12 17:03:45 -05:00
|
|
|
crlfs = 0
|
|
|
|
while crlfs < 2:
|
|
|
|
c = fd.read(1)
|
2013-04-24 14:01:56 -07:00
|
|
|
if not c:
|
|
|
|
raise ValueError("didn't get two CRLFs; just got %r" % rv)
|
2010-07-12 17:03:45 -05:00
|
|
|
rv = rv + c
|
2018-09-10 16:34:50 -06:00
|
|
|
if c == b'\r' and lc != b'\n':
|
2010-07-12 17:03:45 -05:00
|
|
|
crlfs = 0
|
2018-09-10 16:34:50 -06:00
|
|
|
if lc == b'\r' and c == b'\n':
|
2010-07-12 17:03:45 -05:00
|
|
|
crlfs += 1
|
|
|
|
lc = c
|
|
|
|
return rv
|
|
|
|
|
|
|
|
|
2023-04-25 14:31:58 -07:00
|
|
|
def readlength(fd, size, timeout=1.0):
|
|
|
|
buf = b''
|
|
|
|
with eventlet.Timeout(timeout):
|
|
|
|
while len(buf) < size:
|
|
|
|
chunk = fd.read(min(64, size - len(buf)))
|
|
|
|
buf += chunk
|
|
|
|
if len(buf) >= size:
|
|
|
|
break
|
|
|
|
return buf
|
|
|
|
|
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
def connect_tcp(hostport):
|
|
|
|
rv = socket.socket()
|
|
|
|
rv.connect(hostport)
|
|
|
|
return rv
|
2010-07-29 13:30:16 -05:00
|
|
|
|
2010-11-11 16:41:07 -06:00
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
def tmpfile(content):
|
|
|
|
with NamedTemporaryFile('w', delete=False) as f:
|
|
|
|
file_name = f.name
|
|
|
|
f.write(str(content))
|
|
|
|
try:
|
|
|
|
yield file_name
|
|
|
|
finally:
|
|
|
|
os.unlink(file_name)
|
|
|
|
|
|
|
|
|
2011-02-11 13:18:19 -06:00
|
|
|
@contextmanager
|
|
|
|
def temptree(files, contents=''):
|
|
|
|
# generate enough contents to fill the files
|
|
|
|
c = len(files)
|
|
|
|
contents = (list(contents) + [''] * c)[:c]
|
|
|
|
tempdir = mkdtemp()
|
|
|
|
for path, content in zip(files, contents):
|
|
|
|
if os.path.isabs(path):
|
|
|
|
path = '.' + path
|
|
|
|
new_path = os.path.join(tempdir, path)
|
|
|
|
subdir = os.path.dirname(new_path)
|
|
|
|
if not os.path.exists(subdir):
|
|
|
|
os.makedirs(subdir)
|
|
|
|
with open(new_path, 'w') as f:
|
|
|
|
f.write(str(content))
|
|
|
|
try:
|
|
|
|
yield tempdir
|
|
|
|
finally:
|
|
|
|
rmtree(tempdir)
|
|
|
|
|
|
|
|
|
2014-05-27 16:57:25 -07:00
|
|
|
def with_tempdir(f):
|
|
|
|
"""
|
|
|
|
Decorator to give a single test a tempdir as argument to test method.
|
|
|
|
"""
|
|
|
|
@functools.wraps(f)
|
|
|
|
def wrapped(*args, **kwargs):
|
|
|
|
tempdir = mkdtemp()
|
|
|
|
args = list(args)
|
|
|
|
args.append(tempdir)
|
|
|
|
try:
|
|
|
|
return f(*args, **kwargs)
|
|
|
|
finally:
|
|
|
|
rmtree(tempdir)
|
|
|
|
return wrapped
|
|
|
|
|
|
|
|
|
Adding StatsD logging to Swift.
Documentation, including a list of metrics reported and their semantics,
is in the Admin Guide in a new section, "Reporting Metrics to StatsD".
An optional "metric prefix" may be configured which will be prepended to
every metric name sent to StatsD.
Here is the rationale for doing a deep integration like this versus only
sending metrics to StatsD in middleware. It's the only way to report
some internal activities of Swift in a real-time manner. So to have one
way of reporting to StatsD and one place/style of configuration, even
some things (like, say, timing of PUT requests into the proxy-server)
which could be logged via middleware are consistently logged the same
way (deep integration via the logger delegate methods).
When log_statsd_host is configured, get_logger() injects a
swift.common.utils.StatsdClient object into the logger as
logger.statsd_client. Then a set of delegate methods on LogAdapter
either pass through to the StatsdClient object or become no-ops. This
allows StatsD logging to look like:
self.logger.increment('some.metric.here')
and do the right thing in all cases and with no messy conditional logic.
I wanted to use the pystatsd module for the StatsD client, but the
version on PyPi is lagging the git repo (and is missing both the prefix
functionality and timing_since() method). So I wrote my
swift.common.utils.StatsdClient. The interface is the same as
pystatsd.Client, but the code was written from scratch. It's pretty
simple, and the tests I added cover it. This also frees Swift from an
optional dependency on the pystatsd module, making this feature easier
to enable.
There's test coverage for the new code and all existing tests continue
to pass.
Refactored out _one_audit_pass() method in swift/account/auditor.py and
swift/container/auditor.py.
Fixed some misc. PEP8 violations.
Misc test cleanups and refactorings (particularly the way "fake logging"
is handled).
Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
|
|
|
class NullLoggingHandler(logging.Handler):
|
|
|
|
|
|
|
|
def emit(self, record):
|
|
|
|
pass
|
|
|
|
|
2013-09-01 15:10:39 -04:00
|
|
|
|
2013-08-30 21:37:07 -07:00
|
|
|
class UnmockTimeModule(object):
|
|
|
|
"""
|
|
|
|
Even if a test mocks time.time - you can restore unmolested behavior in a
|
|
|
|
another module who imports time directly by monkey patching it's imported
|
|
|
|
reference to the module with an instance of this class
|
|
|
|
"""
|
|
|
|
|
|
|
|
_orig_time = time.time
|
|
|
|
|
|
|
|
def __getattribute__(self, name):
|
|
|
|
if name == 'time':
|
|
|
|
return UnmockTimeModule._orig_time
|
|
|
|
return getattr(time, name)
|
|
|
|
|
|
|
|
|
|
|
|
# logging.LogRecord.__init__ calls time.time
|
|
|
|
logging.time = UnmockTimeModule()
|
|
|
|
|
Adding StatsD logging to Swift.
Documentation, including a list of metrics reported and their semantics,
is in the Admin Guide in a new section, "Reporting Metrics to StatsD".
An optional "metric prefix" may be configured which will be prepended to
every metric name sent to StatsD.
Here is the rationale for doing a deep integration like this versus only
sending metrics to StatsD in middleware. It's the only way to report
some internal activities of Swift in a real-time manner. So to have one
way of reporting to StatsD and one place/style of configuration, even
some things (like, say, timing of PUT requests into the proxy-server)
which could be logged via middleware are consistently logged the same
way (deep integration via the logger delegate methods).
When log_statsd_host is configured, get_logger() injects a
swift.common.utils.StatsdClient object into the logger as
logger.statsd_client. Then a set of delegate methods on LogAdapter
either pass through to the StatsdClient object or become no-ops. This
allows StatsD logging to look like:
self.logger.increment('some.metric.here')
and do the right thing in all cases and with no messy conditional logic.
I wanted to use the pystatsd module for the StatsD client, but the
version on PyPi is lagging the git repo (and is missing both the prefix
functionality and timing_since() method). So I wrote my
swift.common.utils.StatsdClient. The interface is the same as
pystatsd.Client, but the code was written from scratch. It's pretty
simple, and the tests I added cover it. This also frees Swift from an
optional dependency on the pystatsd module, making this feature easier
to enable.
There's test coverage for the new code and all existing tests continue
to pass.
Refactored out _one_audit_pass() method in swift/account/auditor.py and
swift/container/auditor.py.
Fixed some misc. PEP8 violations.
Misc test cleanups and refactorings (particularly the way "fake logging"
is handled).
Change-Id: Ie968a9ae8771f59ee7591e2ae11999c44bfe33b2
2012-04-01 16:47:08 -07:00
|
|
|
|
2012-04-30 16:38:15 -04:00
|
|
|
original_syslog_handler = logging.handlers.SysLogHandler
|
|
|
|
|
|
|
|
|
|
|
|
def fake_syslog_handler():
|
|
|
|
for attr in dir(original_syslog_handler):
|
|
|
|
if attr.startswith('LOG'):
|
|
|
|
setattr(FakeLogger, attr,
|
|
|
|
copy.copy(getattr(logging.handlers.SysLogHandler, attr)))
|
|
|
|
FakeLogger.priority_map = \
|
|
|
|
copy.deepcopy(logging.handlers.SysLogHandler.priority_map)
|
|
|
|
|
|
|
|
logging.handlers.SysLogHandler = FakeLogger
|
|
|
|
|
|
|
|
|
2015-01-08 20:29:47 -08:00
|
|
|
if utils.config_true_value(
|
|
|
|
get_config('unit_test').get('fake_syslog', 'False')):
|
2012-04-30 16:38:15 -04:00
|
|
|
fake_syslog_handler()
|
|
|
|
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2017-06-02 17:47:25 -07:00
|
|
|
@contextmanager
|
|
|
|
def quiet_eventlet_exceptions():
|
|
|
|
orig_state = greenpool.DEBUG
|
|
|
|
eventlet_debug.hub_exceptions(False)
|
|
|
|
try:
|
|
|
|
yield
|
|
|
|
finally:
|
|
|
|
eventlet_debug.hub_exceptions(orig_state)
|
|
|
|
|
|
|
|
|
2017-04-19 15:09:40 +02:00
|
|
|
@contextmanager
|
|
|
|
def mock_check_drive(isdir=False, ismount=False):
|
2010-07-29 13:30:16 -05:00
|
|
|
"""
|
2017-04-19 15:09:40 +02:00
|
|
|
All device/drive/mount checking should be done through the constraints
|
2017-10-17 15:16:43 -07:00
|
|
|
module. If we keep the mocking consistently within that module, we can
|
|
|
|
keep our tests robust to further rework on that interface.
|
2010-07-29 13:30:16 -05:00
|
|
|
|
2017-04-19 15:09:40 +02:00
|
|
|
Replace the constraint modules underlying os calls with mocks.
|
2012-04-30 16:38:15 -04:00
|
|
|
|
2017-04-19 15:09:40 +02:00
|
|
|
:param isdir: return value of constraints isdir calls, default False
|
|
|
|
:param ismount: return value of constraints ismount calls, default False
|
|
|
|
:returns: a dict of constraint module mocks
|
|
|
|
"""
|
|
|
|
mock_base = 'swift.common.constraints.'
|
|
|
|
with mocklib.patch(mock_base + 'isdir') as mock_isdir, \
|
|
|
|
mocklib.patch(mock_base + 'utils.ismount') as mock_ismount:
|
|
|
|
mock_isdir.return_value = isdir
|
|
|
|
mock_ismount.return_value = ismount
|
|
|
|
yield {
|
|
|
|
'isdir': mock_isdir,
|
|
|
|
'ismount': mock_ismount,
|
|
|
|
}
|
2012-08-21 12:51:59 -07:00
|
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
def mock(update):
|
|
|
|
returns = []
|
|
|
|
deletes = []
|
|
|
|
for key, value in update.items():
|
|
|
|
imports = key.split('.')
|
|
|
|
attr = imports.pop(-1)
|
|
|
|
module = __import__(imports[0], fromlist=imports[1:])
|
|
|
|
for modname in imports[1:]:
|
|
|
|
module = getattr(module, modname)
|
|
|
|
if hasattr(module, attr):
|
|
|
|
returns.append((module, attr, getattr(module, attr)))
|
|
|
|
else:
|
|
|
|
deletes.append((module, attr))
|
|
|
|
setattr(module, attr, value)
|
2013-07-12 13:27:56 -04:00
|
|
|
try:
|
|
|
|
yield True
|
|
|
|
finally:
|
|
|
|
for module, attr, value in returns:
|
|
|
|
setattr(module, attr, value)
|
|
|
|
for module, attr in deletes:
|
|
|
|
delattr(module, attr)
|
2013-03-20 19:26:45 -07:00
|
|
|
|
|
|
|
|
2015-05-06 16:29:06 -07:00
|
|
|
class FakeStatus(object):
|
|
|
|
"""
|
|
|
|
This will work with our fake_http_connect, if you hand in one of these
|
|
|
|
instead of a status int or status int tuple to the "codes" iter you can
|
|
|
|
add some eventlet sleep to the expect and response stages of the
|
|
|
|
connection.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, status, expect_sleep=None, response_sleep=None):
|
|
|
|
"""
|
|
|
|
:param status: the response status int, or a tuple of
|
|
|
|
([expect_status, ...], response_status)
|
|
|
|
:param expect_sleep: float, time to eventlet sleep during expect, can
|
|
|
|
be a iter of floats
|
|
|
|
:param response_sleep: float, time to eventlet sleep during response
|
|
|
|
"""
|
|
|
|
# connect exception
|
2018-05-01 15:12:05 +01:00
|
|
|
if inspect.isclass(status) and issubclass(status, Exception):
|
|
|
|
raise status('FakeStatus Error')
|
2015-05-06 16:29:06 -07:00
|
|
|
if isinstance(status, (Exception, eventlet.Timeout)):
|
|
|
|
raise status
|
|
|
|
if isinstance(status, tuple):
|
|
|
|
self.expect_status = list(status[:-1])
|
|
|
|
self.status = status[-1]
|
|
|
|
self.explicit_expect_list = True
|
|
|
|
else:
|
|
|
|
self.expect_status, self.status = ([], status)
|
|
|
|
self.explicit_expect_list = False
|
|
|
|
if not self.expect_status:
|
|
|
|
# when a swift backend service returns a status before reading
|
|
|
|
# from the body (mostly an error response) eventlet.wsgi will
|
|
|
|
# respond with that status line immediately instead of 100
|
|
|
|
# Continue, even if the client sent the Expect 100 header.
|
|
|
|
# BufferedHttp and the proxy both see these error statuses
|
|
|
|
# when they call getexpect, so our FakeConn tries to act like
|
|
|
|
# our backend services and return certain types of responses
|
|
|
|
# as expect statuses just like a real backend server would do.
|
|
|
|
if self.status in (507, 412, 409):
|
|
|
|
self.expect_status = [status]
|
|
|
|
else:
|
|
|
|
self.expect_status = [100, 100]
|
|
|
|
|
|
|
|
# setup sleep attributes
|
|
|
|
if not isinstance(expect_sleep, (list, tuple)):
|
|
|
|
expect_sleep = [expect_sleep] * len(self.expect_status)
|
|
|
|
self.expect_sleep_list = list(expect_sleep)
|
|
|
|
while len(self.expect_sleep_list) < len(self.expect_status):
|
|
|
|
self.expect_sleep_list.append(None)
|
|
|
|
self.response_sleep = response_sleep
|
|
|
|
|
2020-09-25 09:15:55 -05:00
|
|
|
def __repr__(self):
|
|
|
|
return '%s(%s, expect_status=%r, response_sleep=%s)' % (
|
|
|
|
self.__class__.__name__, self.status,
|
|
|
|
self.expect_status, self.response_sleep)
|
|
|
|
|
2015-05-06 16:29:06 -07:00
|
|
|
def get_response_status(self):
|
|
|
|
if self.response_sleep is not None:
|
|
|
|
eventlet.sleep(self.response_sleep)
|
|
|
|
if self.expect_status and self.explicit_expect_list:
|
|
|
|
raise Exception('Test did not consume all fake '
|
|
|
|
'expect status: %r' % (self.expect_status,))
|
|
|
|
if isinstance(self.status, (Exception, eventlet.Timeout)):
|
|
|
|
raise self.status
|
|
|
|
return self.status
|
|
|
|
|
|
|
|
def get_expect_status(self):
|
|
|
|
expect_sleep = self.expect_sleep_list.pop(0)
|
|
|
|
if expect_sleep is not None:
|
|
|
|
eventlet.sleep(expect_sleep)
|
|
|
|
expect_status = self.expect_status.pop(0)
|
|
|
|
if isinstance(expect_status, (Exception, eventlet.Timeout)):
|
|
|
|
raise expect_status
|
|
|
|
return expect_status
|
|
|
|
|
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
class SlowBody(object):
|
|
|
|
"""
|
|
|
|
This will work with our fake_http_connect, if you hand in these
|
|
|
|
instead of strings it will make reads take longer by the given
|
|
|
|
amount. It should be a little bit easier to extend than the
|
|
|
|
current slow kwarg - which inserts whitespace in the response.
|
|
|
|
Also it should be easy to detect if you have one of these (or a
|
|
|
|
subclass) for the body inside of FakeConn if we wanted to do
|
|
|
|
something smarter than just duck-type the str/buffer api
|
|
|
|
enough to get by.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, body, slowness):
|
|
|
|
self.body = body
|
|
|
|
self.slowness = slowness
|
|
|
|
|
|
|
|
def slowdown(self):
|
|
|
|
eventlet.sleep(self.slowness)
|
|
|
|
|
|
|
|
def __getitem__(self, s):
|
|
|
|
return SlowBody(self.body[s], self.slowness)
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self.body)
|
|
|
|
|
|
|
|
def __radd__(self, other):
|
|
|
|
self.slowdown()
|
|
|
|
return other + self.body
|
|
|
|
|
|
|
|
|
2013-03-20 19:26:45 -07:00
|
|
|
def fake_http_connect(*code_iter, **kwargs):
|
|
|
|
|
|
|
|
class FakeConn(object):
|
|
|
|
|
2018-06-13 14:28:28 -07:00
|
|
|
SLOW_READS = 4
|
|
|
|
SLOW_WRITES = 4
|
|
|
|
|
2019-06-25 11:53:32 -05:00
|
|
|
def __init__(self, status, etag=None, body=b'', timestamp=-1,
|
2015-03-31 22:35:37 -07:00
|
|
|
headers=None, expect_headers=None, connection_id=None,
|
Support for http footers - Replication and EC
Before this patch, the proxy ObjectController supported sending
metadata from the proxy server to object servers in "footers" that
trail the body of HTTP PUT requests, but this support was for EC
policies only. The encryption feature requires that footers are sent
with both EC and replicated policy requests in order to persist
encryption specific sysmeta, and to override container update headers
with an encrypted Etag value.
This patch:
- Moves most of the functionality of ECPutter into a generic Putter
class that is used for replicated object PUTs without footers.
- Creates a MIMEPutter subclass to support multipart and multiphase
behaviour required for any replicated object PUT with footers and
all EC PUTs.
- Modifies ReplicatedObjectController to use Putter objects in place
of raw connection objects.
- Refactors the _get_put_connections method and _put_connect_node methods
so that more code is in the BaseObjectController class and therefore
shared by [EC|Replicated]ObjectController classes.
- Adds support to call a callback that middleware may have placed
in the environ, so the callback can set footers. The
x-object-sysmeta-ec- namespace is reserved and any footer values
set by middleware in that namespace will not be forwarded to
object servers.
In addition this patch enables more than one value to be added to the
X-Backend-Etag-Is-At header. This header is used to point to an
(optional) alternative sysmeta header whose value should be used when
evaluating conditional requests with If-[None-]Match headers. This is
already used with EC policies when the ECObjectController has
calculated the actual body Etag and sent it using a footer
(X-Object-Sysmeta-EC-Etag). X-Backend-Etag-Is-At is in that case set
to X-Object-Sysmeta-Ec-Etag so as to point to the actual body Etag
value rather than the EC fragment Etag.
Encryption will also need to add a pointer to an encrypted Etag value.
However, the referenced sysmeta may not exist, for example if the
object was created before encryption was enabled. The
X-Backend-Etag-Is-At value is therefore changed to support a list of
possible locations for alternate Etag values. Encryption will place
its expected alternative Etag location on this list, as will the
ECObjectController, and the object server will look for the first
object metadata to match an entry on the list when matching
conditional requests. That way, if the object was not encrypted then
the object server will fall through to using the EC Etag value, or in
the case of a replicated policy will fall through to using the normal
Etag metadata.
If your proxy has a third-party middleware that uses X-Backend-Etag-Is-At
and it upgrades before an object server it's talking to then conditional
requests may be broken.
UpgradeImpact
Co-Authored-By: Alistair Coles <alistair.coles@hpe.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: Samuel Merritt <sam@swiftstack.com>
Co-Authored-By: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>
Closes-Bug: #1594739
Change-Id: I12a6e41150f90de746ce03623032b83ed1987ee1
2016-06-06 17:19:48 +01:00
|
|
|
give_send=None, give_expect=None):
|
2015-05-06 16:29:06 -07:00
|
|
|
if not isinstance(status, FakeStatus):
|
|
|
|
status = FakeStatus(status)
|
|
|
|
self._status = status
|
2013-03-20 19:26:45 -07:00
|
|
|
self.reason = 'Fake'
|
|
|
|
self.host = '1.2.3.4'
|
|
|
|
self.port = '1234'
|
|
|
|
self.sent = 0
|
|
|
|
self.received = 0
|
|
|
|
self.etag = etag
|
|
|
|
self.body = body
|
2013-04-11 12:52:33 -07:00
|
|
|
self.headers = headers or {}
|
2015-03-31 22:35:37 -07:00
|
|
|
self.expect_headers = expect_headers or {}
|
2019-06-25 11:53:32 -05:00
|
|
|
if timestamp == -1:
|
|
|
|
# -1 is reserved to mean "magic default"
|
|
|
|
if status.status != 404:
|
|
|
|
self.timestamp = '1'
|
|
|
|
else:
|
|
|
|
self.timestamp = '0'
|
|
|
|
else:
|
|
|
|
# tests may specify int, string, Timestamp or None
|
|
|
|
self.timestamp = timestamp
|
2015-03-31 22:35:37 -07:00
|
|
|
self.connection_id = connection_id
|
|
|
|
self.give_send = give_send
|
Support for http footers - Replication and EC
Before this patch, the proxy ObjectController supported sending
metadata from the proxy server to object servers in "footers" that
trail the body of HTTP PUT requests, but this support was for EC
policies only. The encryption feature requires that footers are sent
with both EC and replicated policy requests in order to persist
encryption specific sysmeta, and to override container update headers
with an encrypted Etag value.
This patch:
- Moves most of the functionality of ECPutter into a generic Putter
class that is used for replicated object PUTs without footers.
- Creates a MIMEPutter subclass to support multipart and multiphase
behaviour required for any replicated object PUT with footers and
all EC PUTs.
- Modifies ReplicatedObjectController to use Putter objects in place
of raw connection objects.
- Refactors the _get_put_connections method and _put_connect_node methods
so that more code is in the BaseObjectController class and therefore
shared by [EC|Replicated]ObjectController classes.
- Adds support to call a callback that middleware may have placed
in the environ, so the callback can set footers. The
x-object-sysmeta-ec- namespace is reserved and any footer values
set by middleware in that namespace will not be forwarded to
object servers.
In addition this patch enables more than one value to be added to the
X-Backend-Etag-Is-At header. This header is used to point to an
(optional) alternative sysmeta header whose value should be used when
evaluating conditional requests with If-[None-]Match headers. This is
already used with EC policies when the ECObjectController has
calculated the actual body Etag and sent it using a footer
(X-Object-Sysmeta-EC-Etag). X-Backend-Etag-Is-At is in that case set
to X-Object-Sysmeta-Ec-Etag so as to point to the actual body Etag
value rather than the EC fragment Etag.
Encryption will also need to add a pointer to an encrypted Etag value.
However, the referenced sysmeta may not exist, for example if the
object was created before encryption was enabled. The
X-Backend-Etag-Is-At value is therefore changed to support a list of
possible locations for alternate Etag values. Encryption will place
its expected alternative Etag location on this list, as will the
ECObjectController, and the object server will look for the first
object metadata to match an entry on the list when matching
conditional requests. That way, if the object was not encrypted then
the object server will fall through to using the EC Etag value, or in
the case of a replicated policy will fall through to using the normal
Etag metadata.
If your proxy has a third-party middleware that uses X-Backend-Etag-Is-At
and it upgrades before an object server it's talking to then conditional
requests may be broken.
UpgradeImpact
Co-Authored-By: Alistair Coles <alistair.coles@hpe.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: Samuel Merritt <sam@swiftstack.com>
Co-Authored-By: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>
Closes-Bug: #1594739
Change-Id: I12a6e41150f90de746ce03623032b83ed1987ee1
2016-06-06 17:19:48 +01:00
|
|
|
self.give_expect = give_expect
|
|
|
|
self.closed = False
|
2014-03-24 16:10:18 -07:00
|
|
|
if 'slow' in kwargs and isinstance(kwargs['slow'], list):
|
|
|
|
try:
|
|
|
|
self._next_sleep = kwargs['slow'].pop(0)
|
|
|
|
except IndexError:
|
|
|
|
self._next_sleep = None
|
2018-06-13 14:28:28 -07:00
|
|
|
|
|
|
|
# if we're going to be slow, we need a body to send slowly
|
|
|
|
am_slow, _junk = self.get_slow()
|
|
|
|
if am_slow and len(self.body) < self.SLOW_READS:
|
2019-05-04 16:33:41 -07:00
|
|
|
self.body += b" " * (self.SLOW_READS - len(self.body))
|
2018-06-13 14:28:28 -07:00
|
|
|
|
2015-03-31 22:35:37 -07:00
|
|
|
# be nice to trixy bits with node_iter's
|
|
|
|
eventlet.sleep()
|
2013-03-20 19:26:45 -07:00
|
|
|
|
|
|
|
def getresponse(self):
|
2014-04-28 20:03:48 -07:00
|
|
|
exc = kwargs.get('raise_exc')
|
|
|
|
if exc:
|
2015-03-31 22:35:37 -07:00
|
|
|
if isinstance(exc, (Exception, eventlet.Timeout)):
|
2014-04-28 20:03:48 -07:00
|
|
|
raise exc
|
2013-03-20 19:26:45 -07:00
|
|
|
raise Exception('test')
|
|
|
|
if kwargs.get('raise_timeout_exc'):
|
2015-03-31 22:35:37 -07:00
|
|
|
raise eventlet.Timeout()
|
2015-05-06 16:29:06 -07:00
|
|
|
self.status = self._status.get_response_status()
|
2013-03-20 19:26:45 -07:00
|
|
|
return self
|
|
|
|
|
|
|
|
def getexpect(self):
|
Support for http footers - Replication and EC
Before this patch, the proxy ObjectController supported sending
metadata from the proxy server to object servers in "footers" that
trail the body of HTTP PUT requests, but this support was for EC
policies only. The encryption feature requires that footers are sent
with both EC and replicated policy requests in order to persist
encryption specific sysmeta, and to override container update headers
with an encrypted Etag value.
This patch:
- Moves most of the functionality of ECPutter into a generic Putter
class that is used for replicated object PUTs without footers.
- Creates a MIMEPutter subclass to support multipart and multiphase
behaviour required for any replicated object PUT with footers and
all EC PUTs.
- Modifies ReplicatedObjectController to use Putter objects in place
of raw connection objects.
- Refactors the _get_put_connections method and _put_connect_node methods
so that more code is in the BaseObjectController class and therefore
shared by [EC|Replicated]ObjectController classes.
- Adds support to call a callback that middleware may have placed
in the environ, so the callback can set footers. The
x-object-sysmeta-ec- namespace is reserved and any footer values
set by middleware in that namespace will not be forwarded to
object servers.
In addition this patch enables more than one value to be added to the
X-Backend-Etag-Is-At header. This header is used to point to an
(optional) alternative sysmeta header whose value should be used when
evaluating conditional requests with If-[None-]Match headers. This is
already used with EC policies when the ECObjectController has
calculated the actual body Etag and sent it using a footer
(X-Object-Sysmeta-EC-Etag). X-Backend-Etag-Is-At is in that case set
to X-Object-Sysmeta-Ec-Etag so as to point to the actual body Etag
value rather than the EC fragment Etag.
Encryption will also need to add a pointer to an encrypted Etag value.
However, the referenced sysmeta may not exist, for example if the
object was created before encryption was enabled. The
X-Backend-Etag-Is-At value is therefore changed to support a list of
possible locations for alternate Etag values. Encryption will place
its expected alternative Etag location on this list, as will the
ECObjectController, and the object server will look for the first
object metadata to match an entry on the list when matching
conditional requests. That way, if the object was not encrypted then
the object server will fall through to using the EC Etag value, or in
the case of a replicated policy will fall through to using the normal
Etag metadata.
If your proxy has a third-party middleware that uses X-Backend-Etag-Is-At
and it upgrades before an object server it's talking to then conditional
requests may be broken.
UpgradeImpact
Co-Authored-By: Alistair Coles <alistair.coles@hpe.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: Samuel Merritt <sam@swiftstack.com>
Co-Authored-By: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>
Closes-Bug: #1594739
Change-Id: I12a6e41150f90de746ce03623032b83ed1987ee1
2016-06-06 17:19:48 +01:00
|
|
|
if self.give_expect:
|
|
|
|
self.give_expect(self)
|
2015-05-06 16:29:06 -07:00
|
|
|
expect_status = self._status.get_expect_status()
|
2015-03-31 22:35:37 -07:00
|
|
|
headers = dict(self.expect_headers)
|
|
|
|
if expect_status == 409:
|
2014-06-25 20:34:39 -07:00
|
|
|
headers['X-Backend-Timestamp'] = self.timestamp
|
2015-08-04 23:15:37 -07:00
|
|
|
response = FakeConn(expect_status,
|
|
|
|
timestamp=self.timestamp,
|
|
|
|
headers=headers)
|
2015-05-06 16:29:06 -07:00
|
|
|
response.status = expect_status
|
|
|
|
return response
|
2013-03-20 19:26:45 -07:00
|
|
|
|
|
|
|
def getheaders(self):
|
|
|
|
etag = self.etag
|
|
|
|
if not etag:
|
2019-01-14 22:32:31 +00:00
|
|
|
if isinstance(self.body, bytes):
|
2020-09-11 16:28:11 -04:00
|
|
|
etag = ('"' + md5(
|
|
|
|
self.body, usedforsecurity=False).hexdigest() + '"')
|
2013-03-20 19:26:45 -07:00
|
|
|
else:
|
|
|
|
etag = '"68b329da9893e34099c7d8ad5cb9c940"'
|
|
|
|
|
2018-06-13 14:28:28 -07:00
|
|
|
am_slow, _junk = self.get_slow()
|
2016-03-02 10:28:51 +00:00
|
|
|
headers = HeaderKeyDict({
|
2014-11-11 17:03:29 -08:00
|
|
|
'content-length': len(self.body),
|
|
|
|
'content-type': 'x-application/test',
|
|
|
|
'x-timestamp': self.timestamp,
|
|
|
|
'x-backend-timestamp': self.timestamp,
|
|
|
|
'last-modified': self.timestamp,
|
|
|
|
'x-object-meta-test': 'testing',
|
|
|
|
'x-delete-at': '9876543210',
|
|
|
|
'etag': etag,
|
|
|
|
'x-works': 'yes',
|
|
|
|
})
|
2013-06-26 08:23:00 +03:00
|
|
|
if self.status // 100 == 2:
|
|
|
|
headers['x-account-container-count'] = \
|
|
|
|
kwargs.get('count', 12345)
|
2013-03-20 19:26:45 -07:00
|
|
|
if not self.timestamp:
|
2014-11-11 17:03:29 -08:00
|
|
|
# when timestamp is None, HeaderKeyDict raises KeyError
|
|
|
|
headers.pop('x-timestamp', None)
|
2013-03-20 19:26:45 -07:00
|
|
|
try:
|
2015-06-15 22:10:45 +05:30
|
|
|
if next(container_ts_iter) is False:
|
2013-03-20 19:26:45 -07:00
|
|
|
headers['x-container-timestamp'] = '1'
|
|
|
|
except StopIteration:
|
|
|
|
pass
|
2013-04-11 12:52:33 -07:00
|
|
|
headers.update(self.headers)
|
2013-03-20 19:26:45 -07:00
|
|
|
return headers.items()
|
|
|
|
|
2014-03-24 16:10:18 -07:00
|
|
|
def get_slow(self):
|
|
|
|
if 'slow' in kwargs and isinstance(kwargs['slow'], list):
|
|
|
|
if self._next_sleep is not None:
|
|
|
|
return True, self._next_sleep
|
|
|
|
else:
|
|
|
|
return False, 0.01
|
|
|
|
if kwargs.get('slow') and isinstance(kwargs['slow'], Number):
|
|
|
|
return True, kwargs['slow']
|
|
|
|
return bool(kwargs.get('slow')), 0.1
|
2013-11-04 17:06:06 +00:00
|
|
|
|
2013-03-20 19:26:45 -07:00
|
|
|
def read(self, amt=None):
|
2014-03-24 16:10:18 -07:00
|
|
|
am_slow, value = self.get_slow()
|
|
|
|
if am_slow:
|
2018-06-13 14:28:28 -07:00
|
|
|
if self.sent < self.SLOW_READS:
|
2019-05-04 16:33:41 -07:00
|
|
|
slowly_read_byte = self.body[self.sent:self.sent + 1]
|
2013-03-20 19:26:45 -07:00
|
|
|
self.sent += 1
|
2015-03-31 22:35:37 -07:00
|
|
|
eventlet.sleep(value)
|
2018-06-13 14:28:28 -07:00
|
|
|
return slowly_read_byte
|
|
|
|
if amt is None:
|
|
|
|
rv = self.body[self.sent:]
|
|
|
|
else:
|
|
|
|
rv = self.body[self.sent:self.sent + amt]
|
|
|
|
self.sent += len(rv)
|
2013-03-20 19:26:45 -07:00
|
|
|
return rv
|
|
|
|
|
2016-07-07 11:31:31 +01:00
|
|
|
def send(self, data=None):
|
2015-03-31 22:35:37 -07:00
|
|
|
if self.give_send:
|
2016-07-07 11:31:31 +01:00
|
|
|
self.give_send(self, data)
|
2014-03-24 16:10:18 -07:00
|
|
|
am_slow, value = self.get_slow()
|
|
|
|
if am_slow:
|
2018-06-13 14:28:28 -07:00
|
|
|
if self.received < self.SLOW_WRITES:
|
2013-03-20 19:26:45 -07:00
|
|
|
self.received += 1
|
2015-03-31 22:35:37 -07:00
|
|
|
eventlet.sleep(value)
|
2013-03-20 19:26:45 -07:00
|
|
|
|
|
|
|
def getheader(self, name, default=None):
|
2016-03-02 10:28:51 +00:00
|
|
|
return HeaderKeyDict(self.getheaders()).get(name, default)
|
2013-03-20 19:26:45 -07:00
|
|
|
|
2020-08-20 15:13:41 -07:00
|
|
|
def nuke_from_orbit(self):
|
|
|
|
# wrapped connections from buffered_http have this helper
|
|
|
|
self.close()
|
|
|
|
|
2015-02-17 16:55:34 -05:00
|
|
|
def close(self):
|
Support for http footers - Replication and EC
Before this patch, the proxy ObjectController supported sending
metadata from the proxy server to object servers in "footers" that
trail the body of HTTP PUT requests, but this support was for EC
policies only. The encryption feature requires that footers are sent
with both EC and replicated policy requests in order to persist
encryption specific sysmeta, and to override container update headers
with an encrypted Etag value.
This patch:
- Moves most of the functionality of ECPutter into a generic Putter
class that is used for replicated object PUTs without footers.
- Creates a MIMEPutter subclass to support multipart and multiphase
behaviour required for any replicated object PUT with footers and
all EC PUTs.
- Modifies ReplicatedObjectController to use Putter objects in place
of raw connection objects.
- Refactors the _get_put_connections method and _put_connect_node methods
so that more code is in the BaseObjectController class and therefore
shared by [EC|Replicated]ObjectController classes.
- Adds support to call a callback that middleware may have placed
in the environ, so the callback can set footers. The
x-object-sysmeta-ec- namespace is reserved and any footer values
set by middleware in that namespace will not be forwarded to
object servers.
In addition this patch enables more than one value to be added to the
X-Backend-Etag-Is-At header. This header is used to point to an
(optional) alternative sysmeta header whose value should be used when
evaluating conditional requests with If-[None-]Match headers. This is
already used with EC policies when the ECObjectController has
calculated the actual body Etag and sent it using a footer
(X-Object-Sysmeta-EC-Etag). X-Backend-Etag-Is-At is in that case set
to X-Object-Sysmeta-Ec-Etag so as to point to the actual body Etag
value rather than the EC fragment Etag.
Encryption will also need to add a pointer to an encrypted Etag value.
However, the referenced sysmeta may not exist, for example if the
object was created before encryption was enabled. The
X-Backend-Etag-Is-At value is therefore changed to support a list of
possible locations for alternate Etag values. Encryption will place
its expected alternative Etag location on this list, as will the
ECObjectController, and the object server will look for the first
object metadata to match an entry on the list when matching
conditional requests. That way, if the object was not encrypted then
the object server will fall through to using the EC Etag value, or in
the case of a replicated policy will fall through to using the normal
Etag metadata.
If your proxy has a third-party middleware that uses X-Backend-Etag-Is-At
and it upgrades before an object server it's talking to then conditional
requests may be broken.
UpgradeImpact
Co-Authored-By: Alistair Coles <alistair.coles@hpe.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: Samuel Merritt <sam@swiftstack.com>
Co-Authored-By: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>
Closes-Bug: #1594739
Change-Id: I12a6e41150f90de746ce03623032b83ed1987ee1
2016-06-06 17:19:48 +01:00
|
|
|
self.closed = True
|
2015-02-17 16:55:34 -05:00
|
|
|
|
2019-06-25 11:53:32 -05:00
|
|
|
# unless tests provide timestamps we use the "magic default"
|
|
|
|
timestamps_iter = iter(kwargs.get('timestamps') or [-1] * len(code_iter))
|
2013-03-20 19:26:45 -07:00
|
|
|
etag_iter = iter(kwargs.get('etags') or [None] * len(code_iter))
|
2015-03-31 22:35:37 -07:00
|
|
|
if isinstance(kwargs.get('headers'), (list, tuple)):
|
2013-04-11 12:52:33 -07:00
|
|
|
headers_iter = iter(kwargs['headers'])
|
|
|
|
else:
|
|
|
|
headers_iter = iter([kwargs.get('headers', {})] * len(code_iter))
|
2015-03-31 22:35:37 -07:00
|
|
|
if isinstance(kwargs.get('expect_headers'), (list, tuple)):
|
|
|
|
expect_headers_iter = iter(kwargs['expect_headers'])
|
|
|
|
else:
|
|
|
|
expect_headers_iter = iter([kwargs.get('expect_headers', {})] *
|
|
|
|
len(code_iter))
|
2013-04-11 12:52:33 -07:00
|
|
|
|
2013-03-20 19:26:45 -07:00
|
|
|
x = kwargs.get('missing_container', [False] * len(code_iter))
|
|
|
|
if not isinstance(x, (tuple, list)):
|
|
|
|
x = [x] * len(code_iter)
|
|
|
|
container_ts_iter = iter(x)
|
|
|
|
code_iter = iter(code_iter)
|
2015-03-31 22:35:37 -07:00
|
|
|
conn_id_and_code_iter = enumerate(code_iter)
|
2013-03-20 19:26:45 -07:00
|
|
|
static_body = kwargs.get('body', None)
|
|
|
|
body_iter = kwargs.get('body_iter', None)
|
|
|
|
if body_iter:
|
|
|
|
body_iter = iter(body_iter)
|
2017-02-16 14:14:09 -08:00
|
|
|
unexpected_requests = []
|
2013-03-20 19:26:45 -07:00
|
|
|
|
|
|
|
def connect(*args, **ckwargs):
|
2013-08-28 21:26:08 +00:00
|
|
|
if kwargs.get('slow_connect', False):
|
2015-03-31 22:35:37 -07:00
|
|
|
eventlet.sleep(0.1)
|
2013-03-20 19:26:45 -07:00
|
|
|
if 'give_content_type' in kwargs:
|
|
|
|
if len(args) >= 7 and 'Content-Type' in args[6]:
|
|
|
|
kwargs['give_content_type'](args[6]['Content-Type'])
|
|
|
|
else:
|
|
|
|
kwargs['give_content_type']('')
|
2017-02-16 14:14:09 -08:00
|
|
|
try:
|
|
|
|
i, status = next(conn_id_and_code_iter)
|
|
|
|
except StopIteration:
|
|
|
|
# the code under test may swallow the StopIteration, so by logging
|
|
|
|
# unexpected requests here we allow the test framework to check for
|
|
|
|
# them after the connect function has been used.
|
2020-09-25 09:15:55 -05:00
|
|
|
unexpected_requests.append((args, ckwargs))
|
2017-02-16 14:14:09 -08:00
|
|
|
raise
|
|
|
|
|
2013-03-20 19:26:45 -07:00
|
|
|
if 'give_connect' in kwargs:
|
2015-03-31 22:35:37 -07:00
|
|
|
give_conn_fn = kwargs['give_connect']
|
2021-07-15 20:44:12 +09:00
|
|
|
|
|
|
|
if six.PY2:
|
|
|
|
argspec = inspect.getargspec(give_conn_fn)
|
|
|
|
if argspec.keywords or 'connection_id' in argspec.args:
|
|
|
|
ckwargs['connection_id'] = i
|
|
|
|
else:
|
|
|
|
argspec = inspect.getfullargspec(give_conn_fn)
|
|
|
|
if argspec.varkw or 'connection_id' in argspec.args:
|
|
|
|
ckwargs['connection_id'] = i
|
2015-03-31 22:35:37 -07:00
|
|
|
give_conn_fn(*args, **ckwargs)
|
2015-06-15 22:10:45 +05:30
|
|
|
etag = next(etag_iter)
|
|
|
|
headers = next(headers_iter)
|
|
|
|
expect_headers = next(expect_headers_iter)
|
|
|
|
timestamp = next(timestamps_iter)
|
2013-03-20 19:26:45 -07:00
|
|
|
|
2019-01-30 17:35:37 -06:00
|
|
|
if isinstance(status, int) and status <= 0:
|
2013-03-20 19:26:45 -07:00
|
|
|
raise HTTPException()
|
|
|
|
if body_iter is None:
|
2019-01-30 17:35:37 -06:00
|
|
|
body = static_body or b''
|
2013-03-20 19:26:45 -07:00
|
|
|
else:
|
2015-06-15 22:10:45 +05:30
|
|
|
body = next(body_iter)
|
2020-08-20 15:13:41 -07:00
|
|
|
conn = FakeConn(status, etag, body=body, timestamp=timestamp,
|
2015-03-31 22:35:37 -07:00
|
|
|
headers=headers, expect_headers=expect_headers,
|
Support for http footers - Replication and EC
Before this patch, the proxy ObjectController supported sending
metadata from the proxy server to object servers in "footers" that
trail the body of HTTP PUT requests, but this support was for EC
policies only. The encryption feature requires that footers are sent
with both EC and replicated policy requests in order to persist
encryption specific sysmeta, and to override container update headers
with an encrypted Etag value.
This patch:
- Moves most of the functionality of ECPutter into a generic Putter
class that is used for replicated object PUTs without footers.
- Creates a MIMEPutter subclass to support multipart and multiphase
behaviour required for any replicated object PUT with footers and
all EC PUTs.
- Modifies ReplicatedObjectController to use Putter objects in place
of raw connection objects.
- Refactors the _get_put_connections method and _put_connect_node methods
so that more code is in the BaseObjectController class and therefore
shared by [EC|Replicated]ObjectController classes.
- Adds support to call a callback that middleware may have placed
in the environ, so the callback can set footers. The
x-object-sysmeta-ec- namespace is reserved and any footer values
set by middleware in that namespace will not be forwarded to
object servers.
In addition this patch enables more than one value to be added to the
X-Backend-Etag-Is-At header. This header is used to point to an
(optional) alternative sysmeta header whose value should be used when
evaluating conditional requests with If-[None-]Match headers. This is
already used with EC policies when the ECObjectController has
calculated the actual body Etag and sent it using a footer
(X-Object-Sysmeta-EC-Etag). X-Backend-Etag-Is-At is in that case set
to X-Object-Sysmeta-Ec-Etag so as to point to the actual body Etag
value rather than the EC fragment Etag.
Encryption will also need to add a pointer to an encrypted Etag value.
However, the referenced sysmeta may not exist, for example if the
object was created before encryption was enabled. The
X-Backend-Etag-Is-At value is therefore changed to support a list of
possible locations for alternate Etag values. Encryption will place
its expected alternative Etag location on this list, as will the
ECObjectController, and the object server will look for the first
object metadata to match an entry on the list when matching
conditional requests. That way, if the object was not encrypted then
the object server will fall through to using the EC Etag value, or in
the case of a replicated policy will fall through to using the normal
Etag metadata.
If your proxy has a third-party middleware that uses X-Backend-Etag-Is-At
and it upgrades before an object server it's talking to then conditional
requests may be broken.
UpgradeImpact
Co-Authored-By: Alistair Coles <alistair.coles@hpe.com>
Co-Authored-By: Thiago da Silva <thiago@redhat.com>
Co-Authored-By: Samuel Merritt <sam@swiftstack.com>
Co-Authored-By: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>
Closes-Bug: #1594739
Change-Id: I12a6e41150f90de746ce03623032b83ed1987ee1
2016-06-06 17:19:48 +01:00
|
|
|
connection_id=i, give_send=kwargs.get('give_send'),
|
|
|
|
give_expect=kwargs.get('give_expect'))
|
2020-08-20 15:13:41 -07:00
|
|
|
if 'capture_connections' in kwargs:
|
|
|
|
kwargs['capture_connections'].append(conn)
|
|
|
|
return conn
|
2013-03-20 19:26:45 -07:00
|
|
|
|
2017-02-16 14:14:09 -08:00
|
|
|
connect.unexpected_requests = unexpected_requests
|
2014-03-03 09:08:43 -08:00
|
|
|
connect.code_iter = code_iter
|
|
|
|
|
2013-03-20 19:26:45 -07:00
|
|
|
return connect
|
2014-04-28 19:22:51 -07:00
|
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
def mocked_http_conn(*args, **kwargs):
|
2014-11-17 20:29:45 -08:00
|
|
|
requests = []
|
2020-08-20 15:13:41 -07:00
|
|
|
responses = []
|
2014-11-17 20:29:45 -08:00
|
|
|
|
|
|
|
def capture_requests(ip, port, method, path, headers, qs, ssl):
|
2019-04-15 21:44:18 -07:00
|
|
|
if six.PY2 and not isinstance(ip, bytes):
|
|
|
|
ip = ip.encode('ascii')
|
2014-11-17 20:29:45 -08:00
|
|
|
req = {
|
|
|
|
'ip': ip,
|
|
|
|
'port': port,
|
|
|
|
'method': method,
|
|
|
|
'path': path,
|
|
|
|
'headers': headers,
|
|
|
|
'qs': qs,
|
|
|
|
'ssl': ssl,
|
|
|
|
}
|
|
|
|
requests.append(req)
|
|
|
|
kwargs.setdefault('give_connect', capture_requests)
|
2020-08-20 15:13:41 -07:00
|
|
|
kwargs['capture_connections'] = responses
|
2014-04-28 19:22:51 -07:00
|
|
|
fake_conn = fake_http_connect(*args, **kwargs)
|
2014-11-17 20:29:45 -08:00
|
|
|
fake_conn.requests = requests
|
2020-08-20 15:13:41 -07:00
|
|
|
fake_conn.responses = responses
|
2014-04-28 19:22:51 -07:00
|
|
|
with mocklib.patch('swift.common.bufferedhttp.http_connect_raw',
|
|
|
|
new=fake_conn):
|
|
|
|
yield fake_conn
|
2014-11-17 20:29:45 -08:00
|
|
|
left_over_status = list(fake_conn.code_iter)
|
|
|
|
if left_over_status:
|
|
|
|
raise AssertionError('left over status %r' % left_over_status)
|
2017-05-01 12:06:40 -07:00
|
|
|
if fake_conn.unexpected_requests:
|
2020-09-25 09:15:55 -05:00
|
|
|
raise AssertionError('unexpected requests:\n%s' % '\n '.join(
|
|
|
|
'%r' % (req,) for req in fake_conn.unexpected_requests))
|
2015-03-31 22:35:37 -07:00
|
|
|
|
|
|
|
|
Fix sporadic failure in test/unit/obj/test_server.py
In particular, in TestObjectController.test_object_delete_at_async_update
Rarely (<0.1% of the time?), it would fail with:
======================================================================
FAIL: test_object_delete_at_async_update
(test.unit.obj.test_server.TestObjectController)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/vagrant/swift/test/unit/obj/test_server.py", line 4826, in
test_object_delete_at_async_update
resp = req.get_response(self.object_controller)
File "/usr/lib/python2.7/contextlib.py", line 24, in __exit__
self.gen.next()
File "/vagrant/swift/test/unit/__init__.py", line 1075, in
mocked_http_conn
raise AssertionError('left over status %r' % left_over_status)
AssertionError: left over status [500, 500]
-------------------- >> begin captured stdout << ---------------------
test INFO: None - - [26/Apr/2017:22:32:13 +0000] "PUT /sda1/p/a/c/o" 400
19 "-" "-" "-" 0.0003 "-" 23801 0
--------------------- >> end captured stdout << ----------------------
>> raise AssertionError('left over status %r' % [500, 500])
----------------------------------------------------------------------
Related-Bug: 1514111
Change-Id: I1af4a291fb67cf4b1829f167998a08644117a800
2017-04-26 15:50:59 -07:00
|
|
|
def make_timestamp_iter(offset=0):
|
|
|
|
return iter(Timestamp(t)
|
|
|
|
for t in itertools.count(int(time.time()) + offset))
|
2016-06-07 10:35:18 +00:00
|
|
|
|
|
|
|
|
2018-05-01 15:12:05 +01:00
|
|
|
@contextmanager
|
2022-04-29 17:59:51 +01:00
|
|
|
def mock_timestamp_now(now=None, klass=Timestamp):
|
2018-05-01 15:12:05 +01:00
|
|
|
if now is None:
|
2022-04-29 17:59:51 +01:00
|
|
|
now = klass.now()
|
2018-05-01 15:12:05 +01:00
|
|
|
with mocklib.patch('swift.common.utils.Timestamp.now',
|
|
|
|
classmethod(lambda c: now)):
|
|
|
|
yield now
|
|
|
|
|
|
|
|
|
2021-02-03 21:38:34 +00:00
|
|
|
@contextmanager
|
|
|
|
def mock_timestamp_now_with_iter(ts_iter):
|
|
|
|
with mocklib.patch('swift.common.utils.Timestamp.now',
|
|
|
|
side_effect=ts_iter):
|
|
|
|
yield
|
|
|
|
|
|
|
|
|
2016-06-07 10:35:18 +00:00
|
|
|
class Timeout(object):
|
|
|
|
def __init__(self, seconds):
|
|
|
|
self.seconds = seconds
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
signal.signal(signal.SIGALRM, self._exit)
|
|
|
|
signal.alarm(self.seconds)
|
|
|
|
|
|
|
|
def __exit__(self, type, value, traceback):
|
|
|
|
signal.alarm(0)
|
|
|
|
|
|
|
|
def _exit(self, signum, frame):
|
|
|
|
class TimeoutException(Exception):
|
|
|
|
pass
|
|
|
|
raise TimeoutException
|
2015-03-05 18:18:25 +05:30
|
|
|
|
|
|
|
|
2018-03-12 18:07:37 +01:00
|
|
|
def requires_o_tmpfile_support_in_tmp(func):
|
|
|
|
@functools.wraps(func)
|
|
|
|
def wrapper(*args, **kwargs):
|
|
|
|
if not utils.o_tmpfile_in_tmpdir_supported():
|
|
|
|
raise SkipTest('Requires O_TMPFILE support in TMPDIR')
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
class StubResponse(object):
|
|
|
|
|
2020-11-04 18:01:42 -06:00
|
|
|
def __init__(self, status, body=b'', headers=None, frag_index=None,
|
2023-09-19 11:44:29 +01:00
|
|
|
slowdown=None, slowdown_after=0):
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
self.status = status
|
|
|
|
self.body = body
|
|
|
|
self.readable = BytesIO(body)
|
2020-11-04 18:01:42 -06:00
|
|
|
try:
|
|
|
|
self._slowdown = iter(slowdown)
|
|
|
|
except TypeError:
|
|
|
|
self._slowdown = iter([slowdown])
|
2023-09-19 11:44:29 +01:00
|
|
|
self.slowdown_after = slowdown_after
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
self.headers = HeaderKeyDict(headers)
|
|
|
|
if frag_index is not None:
|
|
|
|
self.headers['X-Object-Sysmeta-Ec-Frag-Index'] = frag_index
|
|
|
|
fake_reason = ('Fake', 'This response is a lie.')
|
|
|
|
self.reason = swob.RESPONSE_REASONS.get(status, fake_reason)[0]
|
2023-09-19 11:44:29 +01:00
|
|
|
self.bytes_read = 0
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
|
2020-11-04 18:01:42 -06:00
|
|
|
def slowdown(self):
|
2023-09-19 11:44:29 +01:00
|
|
|
if self.bytes_read < self.slowdown_after:
|
|
|
|
return
|
2020-11-04 18:01:42 -06:00
|
|
|
try:
|
|
|
|
wait = next(self._slowdown)
|
|
|
|
except StopIteration:
|
|
|
|
wait = None
|
|
|
|
if wait is not None:
|
|
|
|
eventlet.sleep(wait)
|
|
|
|
|
2020-08-20 15:13:41 -07:00
|
|
|
def nuke_from_orbit(self):
|
|
|
|
if hasattr(self, 'swift_conn'):
|
|
|
|
self.swift_conn.close()
|
|
|
|
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
def getheader(self, header_name, default=None):
|
|
|
|
return self.headers.get(header_name, default)
|
|
|
|
|
|
|
|
def getheaders(self):
|
|
|
|
if 'Content-Length' not in self.headers:
|
|
|
|
self.headers['Content-Length'] = len(self.body)
|
|
|
|
return self.headers.items()
|
|
|
|
|
|
|
|
def read(self, amt=0):
|
2020-11-04 18:01:42 -06:00
|
|
|
self.slowdown()
|
2023-09-19 11:44:29 +01:00
|
|
|
res = self.readable.read(amt)
|
|
|
|
self.bytes_read += len(res)
|
|
|
|
return res
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
|
2020-11-04 18:01:42 -06:00
|
|
|
def readline(self, size=-1):
|
|
|
|
self.slowdown()
|
2023-09-19 11:44:29 +01:00
|
|
|
res = self.readable.readline(size)
|
|
|
|
self.bytes_read += len(res)
|
|
|
|
return res
|
2020-11-04 18:01:42 -06:00
|
|
|
|
2019-12-20 14:30:20 -06:00
|
|
|
def __repr__(self):
|
|
|
|
info = ['Status: %s' % self.status]
|
|
|
|
if self.headers:
|
|
|
|
info.append('Headers: %r' % dict(self.headers))
|
|
|
|
if self.body:
|
|
|
|
info.append('Body: %r' % self.body)
|
|
|
|
return '<StubResponse %s>' % ', '.join(info)
|
|
|
|
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
|
2016-10-17 20:38:52 +01:00
|
|
|
def encode_frag_archive_bodies(policy, body):
|
|
|
|
"""
|
|
|
|
Given a stub body produce a list of complete frag_archive bodies as
|
|
|
|
strings in frag_index order.
|
|
|
|
|
|
|
|
:param policy: a StoragePolicy instance, with policy_type EC_POLICY
|
|
|
|
:param body: a string, the body to encode into frag archives
|
|
|
|
|
|
|
|
:returns: list of strings, the complete frag_archive bodies for the given
|
|
|
|
plaintext
|
|
|
|
"""
|
|
|
|
segment_size = policy.ec_segment_size
|
|
|
|
# split up the body into buffers
|
|
|
|
chunks = [body[x:x + segment_size]
|
|
|
|
for x in range(0, len(body), segment_size)]
|
|
|
|
# encode the buffers into fragment payloads
|
|
|
|
fragment_payloads = []
|
|
|
|
for chunk in chunks:
|
2017-02-25 20:28:13 -08:00
|
|
|
fragments = policy.pyeclib_driver.encode(chunk) \
|
|
|
|
* policy.ec_duplication_factor
|
2016-10-17 20:38:52 +01:00
|
|
|
if not fragments:
|
|
|
|
break
|
|
|
|
fragment_payloads.append(fragments)
|
|
|
|
|
|
|
|
# join up the fragment payloads per node
|
2018-11-29 01:31:13 -06:00
|
|
|
ec_archive_bodies = [b''.join(frags)
|
2016-10-17 20:38:52 +01:00
|
|
|
for frags in zip(*fragment_payloads)]
|
|
|
|
return ec_archive_bodies
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
|
|
|
|
|
|
|
|
def make_ec_object_stub(test_body, policy, timestamp):
|
|
|
|
segment_size = policy.ec_segment_size
|
|
|
|
test_body = test_body or (
|
2019-01-30 17:35:37 -06:00
|
|
|
b'test' * segment_size)[:-random.randint(1, 1000)]
|
2017-04-27 14:19:00 -07:00
|
|
|
timestamp = timestamp or utils.Timestamp.now()
|
2020-09-11 16:28:11 -04:00
|
|
|
etag = md5(test_body, usedforsecurity=False).hexdigest()
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
ec_archive_bodies = encode_frag_archive_bodies(policy, test_body)
|
|
|
|
|
|
|
|
return {
|
|
|
|
'body': test_body,
|
|
|
|
'etag': etag,
|
|
|
|
'frags': ec_archive_bodies,
|
|
|
|
'timestamp': timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def fake_ec_node_response(node_frags, policy):
|
|
|
|
"""
|
|
|
|
Given a list of entries for each node in ring order, where the entries
|
|
|
|
are a dict (or list of dicts) which describes the fragment (or
|
|
|
|
fragments) that are on the node; create a function suitable for use
|
|
|
|
with capture_http_requests that will accept a req object and return a
|
|
|
|
response that will suitably fake the behavior of an object server who
|
|
|
|
had the given fragments on disk at the time.
|
|
|
|
|
|
|
|
:param node_frags: a list. Each item in the list describes the
|
|
|
|
fragments that are on a node; each item is a dict or list of dicts,
|
|
|
|
each dict describing a single fragment; where the item is a list,
|
|
|
|
repeated calls to get_response will return fragments in the order
|
|
|
|
of the list; each dict has keys:
|
|
|
|
- obj: an object stub, as generated by _make_ec_object_stub,
|
|
|
|
that defines all of the fragments that compose an object
|
|
|
|
at a specific timestamp.
|
|
|
|
- frag: the index of a fragment to be selected from the object
|
|
|
|
stub
|
|
|
|
- durable (optional): True if the selected fragment is durable
|
|
|
|
:param policy: storage policy to return
|
|
|
|
"""
|
|
|
|
node_map = {} # maps node ip and port to node index
|
|
|
|
all_nodes = []
|
|
|
|
call_count = {} # maps node index to get_response call count for node
|
|
|
|
|
|
|
|
def _build_node_map(req, policy):
|
|
|
|
part = utils.split_path(req['path'], 5, 5, True)[1]
|
|
|
|
all_nodes.extend(policy.object_ring.get_part_nodes(part))
|
|
|
|
all_nodes.extend(policy.object_ring.get_more_nodes(part))
|
|
|
|
for i, node in enumerate(all_nodes):
|
2020-06-15 17:09:15 -07:00
|
|
|
node_map[(node['ip'], node['port'])] = i
|
EC Fragment Duplication - Foundational Global EC Cluster Support
This patch enables efficent PUT/GET for global distributed cluster[1].
Problem:
Erasure coding has the capability to decrease the amout of actual stored
data less then replicated model. For example, ec_k=6, ec_m=3 parameter
can be 1.5x of the original data which is smaller than 3x replicated.
However, unlike replication, erasure coding requires availability of at
least some ec_k fragments of the total ec_k + ec_m fragments to service
read (e.g. 6 of 9 in the case above). As such, if we stored the
EC object into a swift cluster on 2 geographically distributed data
centers which have the same volume of disks, it is likely the fragments
will be stored evenly (about 4 and 5) so we still need to access a
faraway data center to decode the original object. In addition, if one
of the data centers was lost in a disaster, the stored objects will be
lost forever, and we have to cry a lot. To ensure highly durable
storage, you would think of making *more* parity fragments (e.g.
ec_k=6, ec_m=10), unfortunately this causes *significant* performance
degradation due to the cost of mathmetical caluculation for erasure
coding encode/decode.
How this resolves the problem:
EC Fragment Duplication extends on the initial solution to add *more*
fragments from which to rebuild an object similar to the solution
described above. The difference is making *copies* of encoded fragments.
With experimental results[1][2], employing small ec_k and ec_m shows
enough performance to store/retrieve objects.
On PUT:
- Encode incomming object with small ec_k and ec_m <- faster!
- Make duplicated copies of the encoded fragments. The # of copies
are determined by 'ec_duplication_factor' in swift.conf
- Store all fragments in Swift Global EC Cluster
The duplicated fragments increase pressure on existing requirements
when decoding objects in service to a read request. All fragments are
stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the
X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index
encoded by PyECLib, there *will* be duplicates. Anytime we must decode
the original object data, we must only consider the ec_k fragments as
unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no
duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an
object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and
avoided if possible.
On GET:
This patch inclues following changes:
- Change GET Path to sort primary nodes grouping as subsets, so that
each subset will includes unique fragments
- Change Reconstructor to be more aware of possibly duplicate fragments
For example, with this change, a policy could be configured such that
swift.conf:
ec_num_data_fragments = 2
ec_num_parity_fragments = 1
ec_duplication_factor = 2
(object ring must have 6 replicas)
At Object-Server:
node index (from object ring): 0 1 2 3 4 5 <- keep node index for
reconstruct decision
X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual
fragment index for
backend (PyEClib)
Additional improvements to Global EC Cluster Support will require
features such as Composite Rings, and more efficient fragment
rebalance/reconstruction.
1: http://goo.gl/IYiNPk (Swift Design Spec Repository)
2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo)
Doc-Impact
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: Idd155401982a2c48110c30b480966a863f6bd305
2015-08-06 01:06:47 -07:00
|
|
|
call_count[i] = 0
|
|
|
|
|
|
|
|
# normalize node_frags to a list of fragments for each node even
|
|
|
|
# if there's only one fragment in the dataset provided.
|
|
|
|
for i, frags in enumerate(node_frags):
|
|
|
|
if isinstance(frags, dict):
|
|
|
|
node_frags[i] = [frags]
|
|
|
|
|
|
|
|
def get_response(req):
|
|
|
|
requested_policy = int(
|
|
|
|
req['headers']['X-Backend-Storage-Policy-Index'])
|
|
|
|
if int(policy) != requested_policy:
|
|
|
|
AssertionError(
|
|
|
|
"Requested polciy doesn't fit the fake response policy")
|
|
|
|
if not node_map:
|
|
|
|
_build_node_map(req, policy)
|
|
|
|
|
|
|
|
try:
|
|
|
|
node_index = node_map[(req['ip'], req['port'])]
|
|
|
|
except KeyError:
|
|
|
|
raise Exception("Couldn't find node %s:%s in %r" % (
|
|
|
|
req['ip'], req['port'], all_nodes))
|
|
|
|
try:
|
|
|
|
frags = node_frags[node_index]
|
|
|
|
except IndexError:
|
|
|
|
raise Exception('Found node %r:%r at index %s - '
|
|
|
|
'but only got %s stub response nodes' % (
|
|
|
|
req['ip'], req['port'], node_index,
|
|
|
|
len(node_frags)))
|
|
|
|
|
|
|
|
if not frags:
|
|
|
|
return StubResponse(404)
|
|
|
|
|
|
|
|
# determine response fragment (if any) for this call
|
|
|
|
resp_frag = frags[call_count[node_index]]
|
|
|
|
call_count[node_index] += 1
|
|
|
|
frag_prefs = req['headers'].get('X-Backend-Fragment-Preferences')
|
|
|
|
if not (frag_prefs or resp_frag.get('durable', True)):
|
|
|
|
return StubResponse(404)
|
|
|
|
|
|
|
|
# prepare durable timestamp and backend frags header for this node
|
|
|
|
obj_stub = resp_frag['obj']
|
|
|
|
ts2frags = defaultdict(list)
|
|
|
|
durable_timestamp = None
|
|
|
|
for frag in frags:
|
|
|
|
ts_frag = frag['obj']['timestamp']
|
|
|
|
if frag.get('durable', True):
|
|
|
|
durable_timestamp = ts_frag.internal
|
|
|
|
ts2frags[ts_frag].append(frag['frag'])
|
|
|
|
|
|
|
|
try:
|
|
|
|
body = obj_stub['frags'][resp_frag['frag']]
|
|
|
|
except IndexError as err:
|
|
|
|
raise Exception(
|
|
|
|
'Frag index %s not defined: node index %s, frags %r\n%s' %
|
|
|
|
(resp_frag['frag'], node_index, [f['frag'] for f in frags],
|
|
|
|
err))
|
|
|
|
headers = {
|
|
|
|
'X-Object-Sysmeta-Ec-Content-Length': len(obj_stub['body']),
|
|
|
|
'X-Object-Sysmeta-Ec-Etag': obj_stub['etag'],
|
|
|
|
'X-Object-Sysmeta-Ec-Frag-Index':
|
|
|
|
policy.get_backend_index(resp_frag['frag']),
|
|
|
|
'X-Backend-Timestamp': obj_stub['timestamp'].internal,
|
|
|
|
'X-Timestamp': obj_stub['timestamp'].normal,
|
|
|
|
'X-Backend-Data-Timestamp': obj_stub['timestamp'].internal,
|
|
|
|
'X-Backend-Fragments':
|
|
|
|
server._make_backend_fragments_header(ts2frags)
|
|
|
|
}
|
|
|
|
if durable_timestamp:
|
|
|
|
headers['X-Backend-Durable-Timestamp'] = durable_timestamp
|
|
|
|
|
|
|
|
return StubResponse(200, body, headers)
|
|
|
|
|
|
|
|
return get_response
|
Add checksum to object extended attributes
Currently, our integrity checking for objects is pretty weak when it
comes to object metadata. If the extended attributes on a .data or
.meta file get corrupted in such a way that we can still unpickle it,
we don't have anything that detects that.
This could be especially bad with encrypted etags; if the encrypted
etag (X-Object-Sysmeta-Crypto-Etag or whatever it is) gets some bits
flipped, then we'll cheerfully decrypt the cipherjunk into plainjunk,
then send it to the client. Net effect is that the client sees a GET
response with an ETag that doesn't match the MD5 of the object *and*
Swift has no way of detecting and quarantining this object.
Note that, with an unencrypted object, if the ETag metadatum gets
mangled, then the object will be quarantined by the object server or
auditor, whichever notices first.
As part of this commit, I also ripped out some mocking of
getxattr/setxattr in tests. It appears to be there to allow unit tests
to run on systems where /tmp doesn't support xattrs. However, since
the mock is keyed off of inode number and inode numbers get re-used,
there's lots of leakage between different test runs. On a real FS,
unlinking a file and then creating a new one of the same name will
also reset the xattrs; this isn't the case with the mock.
The mock was pretty old; Ubuntu 12.04 and up all support xattrs in
/tmp, and recent Red Hat / CentOS releases do too. The xattr mock was
added in 2011; maybe it was to support Ubuntu Lucid Lynx?
Bonus: now you can pause a test with the debugger, inspect its files
in /tmp, and actually see the xattrs along with the data.
Since this patch now uses a real filesystem for testing filesystem
operations, tests are skipped if the underlying filesystem does not
support setting xattrs (eg tmpfs or more than 4k of xattrs on ext4).
References to "/tmp" have been replaced with calls to
tempfile.gettempdir(). This will allow setting the TMPDIR envvar in
test setup and getting an XFS filesystem instead of ext4 or tmpfs.
THIS PATCH SIGNIFICANTLY CHANGES TESTING ENVIRONMENTS
With this patch, every test environment will require TMPDIR to be
using a filesystem that supports at least 4k of extended attributes.
Neither ext4 nor tempfs support this. XFS is recommended.
So why all the SkipTests? Why not simply raise an error? We still need
the tests to run on the base image for OpenStack's CI system. Since
we were previously mocking out xattr, there wasn't a problem, but we
also weren't actually testing anything. This patch adds functionality
to validate xattr data, so we need to drop the mock.
`test.unit.skip_if_no_xattrs()` is also imported into `test.functional`
so that functional tests can import it from the functional test
namespace.
The related OpenStack CI infrastructure changes are made in
https://review.openstack.org/#/c/394600/.
Co-Authored-By: John Dickinson <me@not.mn>
Change-Id: I98a37c0d451f4960b7a12f648e4405c6c6716808
2016-06-30 16:52:58 -07:00
|
|
|
|
|
|
|
|
|
|
|
supports_xattr_cached_val = None
|
|
|
|
|
|
|
|
|
|
|
|
def xattr_supported_check():
|
|
|
|
"""
|
|
|
|
This check simply sets more than 4k of metadata on a tempfile and
|
|
|
|
returns True if it worked and False if not.
|
|
|
|
|
|
|
|
We want to use *more* than 4k of metadata in this check because
|
|
|
|
some filesystems (eg ext4) only allow one blocksize worth of
|
|
|
|
metadata. The XFS filesystem doesn't have this limit, and so this
|
|
|
|
check returns True when TMPDIR is XFS. This check will return
|
|
|
|
False under ext4 (which supports xattrs <= 4k) and tmpfs (which
|
|
|
|
doesn't support xattrs at all).
|
|
|
|
|
|
|
|
"""
|
|
|
|
global supports_xattr_cached_val
|
|
|
|
|
|
|
|
if supports_xattr_cached_val is not None:
|
|
|
|
return supports_xattr_cached_val
|
|
|
|
|
|
|
|
# assume the worst -- xattrs aren't supported
|
|
|
|
supports_xattr_cached_val = False
|
|
|
|
|
2018-02-22 22:48:55 +00:00
|
|
|
big_val = b'x' * (4096 + 1) # more than 4k of metadata
|
Add checksum to object extended attributes
Currently, our integrity checking for objects is pretty weak when it
comes to object metadata. If the extended attributes on a .data or
.meta file get corrupted in such a way that we can still unpickle it,
we don't have anything that detects that.
This could be especially bad with encrypted etags; if the encrypted
etag (X-Object-Sysmeta-Crypto-Etag or whatever it is) gets some bits
flipped, then we'll cheerfully decrypt the cipherjunk into plainjunk,
then send it to the client. Net effect is that the client sees a GET
response with an ETag that doesn't match the MD5 of the object *and*
Swift has no way of detecting and quarantining this object.
Note that, with an unencrypted object, if the ETag metadatum gets
mangled, then the object will be quarantined by the object server or
auditor, whichever notices first.
As part of this commit, I also ripped out some mocking of
getxattr/setxattr in tests. It appears to be there to allow unit tests
to run on systems where /tmp doesn't support xattrs. However, since
the mock is keyed off of inode number and inode numbers get re-used,
there's lots of leakage between different test runs. On a real FS,
unlinking a file and then creating a new one of the same name will
also reset the xattrs; this isn't the case with the mock.
The mock was pretty old; Ubuntu 12.04 and up all support xattrs in
/tmp, and recent Red Hat / CentOS releases do too. The xattr mock was
added in 2011; maybe it was to support Ubuntu Lucid Lynx?
Bonus: now you can pause a test with the debugger, inspect its files
in /tmp, and actually see the xattrs along with the data.
Since this patch now uses a real filesystem for testing filesystem
operations, tests are skipped if the underlying filesystem does not
support setting xattrs (eg tmpfs or more than 4k of xattrs on ext4).
References to "/tmp" have been replaced with calls to
tempfile.gettempdir(). This will allow setting the TMPDIR envvar in
test setup and getting an XFS filesystem instead of ext4 or tmpfs.
THIS PATCH SIGNIFICANTLY CHANGES TESTING ENVIRONMENTS
With this patch, every test environment will require TMPDIR to be
using a filesystem that supports at least 4k of extended attributes.
Neither ext4 nor tempfs support this. XFS is recommended.
So why all the SkipTests? Why not simply raise an error? We still need
the tests to run on the base image for OpenStack's CI system. Since
we were previously mocking out xattr, there wasn't a problem, but we
also weren't actually testing anything. This patch adds functionality
to validate xattr data, so we need to drop the mock.
`test.unit.skip_if_no_xattrs()` is also imported into `test.functional`
so that functional tests can import it from the functional test
namespace.
The related OpenStack CI infrastructure changes are made in
https://review.openstack.org/#/c/394600/.
Co-Authored-By: John Dickinson <me@not.mn>
Change-Id: I98a37c0d451f4960b7a12f648e4405c6c6716808
2016-06-30 16:52:58 -07:00
|
|
|
try:
|
|
|
|
fd, tmppath = mkstemp()
|
|
|
|
xattr.setxattr(fd, 'user.swift.testing_key', big_val)
|
|
|
|
except IOError as e:
|
2019-07-15 19:53:14 +02:00
|
|
|
if errno.errorcode.get(e.errno) in ('ENOSPC', 'ENOTSUP', 'EOPNOTSUPP',
|
|
|
|
'ERANGE'):
|
Add checksum to object extended attributes
Currently, our integrity checking for objects is pretty weak when it
comes to object metadata. If the extended attributes on a .data or
.meta file get corrupted in such a way that we can still unpickle it,
we don't have anything that detects that.
This could be especially bad with encrypted etags; if the encrypted
etag (X-Object-Sysmeta-Crypto-Etag or whatever it is) gets some bits
flipped, then we'll cheerfully decrypt the cipherjunk into plainjunk,
then send it to the client. Net effect is that the client sees a GET
response with an ETag that doesn't match the MD5 of the object *and*
Swift has no way of detecting and quarantining this object.
Note that, with an unencrypted object, if the ETag metadatum gets
mangled, then the object will be quarantined by the object server or
auditor, whichever notices first.
As part of this commit, I also ripped out some mocking of
getxattr/setxattr in tests. It appears to be there to allow unit tests
to run on systems where /tmp doesn't support xattrs. However, since
the mock is keyed off of inode number and inode numbers get re-used,
there's lots of leakage between different test runs. On a real FS,
unlinking a file and then creating a new one of the same name will
also reset the xattrs; this isn't the case with the mock.
The mock was pretty old; Ubuntu 12.04 and up all support xattrs in
/tmp, and recent Red Hat / CentOS releases do too. The xattr mock was
added in 2011; maybe it was to support Ubuntu Lucid Lynx?
Bonus: now you can pause a test with the debugger, inspect its files
in /tmp, and actually see the xattrs along with the data.
Since this patch now uses a real filesystem for testing filesystem
operations, tests are skipped if the underlying filesystem does not
support setting xattrs (eg tmpfs or more than 4k of xattrs on ext4).
References to "/tmp" have been replaced with calls to
tempfile.gettempdir(). This will allow setting the TMPDIR envvar in
test setup and getting an XFS filesystem instead of ext4 or tmpfs.
THIS PATCH SIGNIFICANTLY CHANGES TESTING ENVIRONMENTS
With this patch, every test environment will require TMPDIR to be
using a filesystem that supports at least 4k of extended attributes.
Neither ext4 nor tempfs support this. XFS is recommended.
So why all the SkipTests? Why not simply raise an error? We still need
the tests to run on the base image for OpenStack's CI system. Since
we were previously mocking out xattr, there wasn't a problem, but we
also weren't actually testing anything. This patch adds functionality
to validate xattr data, so we need to drop the mock.
`test.unit.skip_if_no_xattrs()` is also imported into `test.functional`
so that functional tests can import it from the functional test
namespace.
The related OpenStack CI infrastructure changes are made in
https://review.openstack.org/#/c/394600/.
Co-Authored-By: John Dickinson <me@not.mn>
Change-Id: I98a37c0d451f4960b7a12f648e4405c6c6716808
2016-06-30 16:52:58 -07:00
|
|
|
# filesystem does not support xattr of this size
|
|
|
|
return False
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
supports_xattr_cached_val = True
|
|
|
|
return True
|
|
|
|
finally:
|
|
|
|
# clean up the tmpfile
|
|
|
|
os.close(fd)
|
|
|
|
os.unlink(tmppath)
|
|
|
|
|
|
|
|
|
|
|
|
def skip_if_no_xattrs():
|
|
|
|
if not xattr_supported_check():
|
|
|
|
raise SkipTest('Large xattrs not supported in `%s`. Skipping test' %
|
|
|
|
gettempdir())
|
2018-05-01 15:12:05 +01:00
|
|
|
|
|
|
|
|
|
|
|
def unlink_files(paths):
|
|
|
|
for path in paths:
|
|
|
|
try:
|
|
|
|
os.unlink(path)
|
|
|
|
except OSError as err:
|
|
|
|
if err.errno != errno.ENOENT:
|
|
|
|
raise
|
2018-05-02 10:47:51 +01:00
|
|
|
|
|
|
|
|
|
|
|
class FakeHTTPResponse(object):
|
|
|
|
|
|
|
|
def __init__(self, resp):
|
|
|
|
self.resp = resp
|
|
|
|
|
|
|
|
@property
|
|
|
|
def status(self):
|
|
|
|
return self.resp.status_int
|
|
|
|
|
|
|
|
@property
|
|
|
|
def data(self):
|
|
|
|
return self.resp.body
|
|
|
|
|
|
|
|
|
|
|
|
def attach_fake_replication_rpc(rpc, replicate_hook=None, errors=None):
|
|
|
|
class FakeReplConnection(object):
|
|
|
|
|
|
|
|
def __init__(self, node, partition, hash_, logger):
|
|
|
|
self.logger = logger
|
|
|
|
self.node = node
|
|
|
|
self.partition = partition
|
|
|
|
self.path = '/%s/%s/%s' % (node['device'], partition, hash_)
|
|
|
|
self.host = node['replication_ip']
|
|
|
|
|
|
|
|
def replicate(self, op, *sync_args):
|
|
|
|
print('REPLICATE: %s, %s, %r' % (self.path, op, sync_args))
|
|
|
|
resp = None
|
|
|
|
if errors and op in errors and errors[op]:
|
|
|
|
resp = errors[op].pop(0)
|
|
|
|
if not resp:
|
|
|
|
replicate_args = self.path.lstrip('/').split('/')
|
|
|
|
args = [op] + copy.deepcopy(list(sync_args))
|
|
|
|
with mock_check_drive(isdir=not rpc.mount_check,
|
|
|
|
ismount=rpc.mount_check):
|
|
|
|
swob_response = rpc.dispatch(replicate_args, args)
|
|
|
|
resp = FakeHTTPResponse(swob_response)
|
|
|
|
if replicate_hook:
|
|
|
|
replicate_hook(op, *sync_args)
|
|
|
|
return resp
|
|
|
|
|
|
|
|
return FakeReplConnection
|
2019-05-30 11:55:58 -07:00
|
|
|
|
|
|
|
|
|
|
|
def group_by_byte(contents):
|
|
|
|
# This looks a little funny, but iterating through a byte string on py3
|
|
|
|
# yields a sequence of ints, not a sequence of single-byte byte strings
|
|
|
|
# as it did on py2.
|
|
|
|
byte_iter = (contents[i:i + 1] for i in range(len(contents)))
|
|
|
|
return [
|
|
|
|
(char, sum(1 for _ in grp))
|
|
|
|
for char, grp in itertools.groupby(byte_iter)]
|
2022-07-07 17:27:49 +10:00
|
|
|
|
|
|
|
|
|
|
|
def generate_db_path(tempdir, server_type):
|
|
|
|
return os.path.join(
|
|
|
|
tempdir, '%ss' % server_type, 'part', 'suffix', 'hash',
|
|
|
|
'%s-%s.db' % (server_type, uuid4()))
|
2023-04-04 15:42:36 -05:00
|
|
|
|
|
|
|
|
|
|
|
class ConfigAssertMixin(object):
|
|
|
|
"""
|
|
|
|
Use this with a TestCase to get py2/3 compatible assert for DuplicateOption
|
|
|
|
"""
|
|
|
|
def assertDuplicateOption(self, app_config, option_name, option_value):
|
|
|
|
"""
|
|
|
|
PY3 added a DuplicateOptionError, PY2 didn't seem to care
|
|
|
|
"""
|
|
|
|
if six.PY3:
|
|
|
|
self.assertDuplicateOptionError(app_config, option_name)
|
|
|
|
else:
|
|
|
|
self.assertDuplicateOptionOK(app_config, option_name, option_value)
|
|
|
|
|
|
|
|
def assertDuplicateOptionError(self, app_config, option_name):
|
|
|
|
with self.assertRaises(
|
2024-04-02 14:33:29 -07:00
|
|
|
configparser.DuplicateOptionError) as ctx:
|
2023-04-04 15:42:36 -05:00
|
|
|
app_config()
|
|
|
|
msg = str(ctx.exception)
|
|
|
|
self.assertIn(option_name, msg)
|
|
|
|
self.assertIn('already exists', msg)
|
|
|
|
|
|
|
|
def assertDuplicateOptionOK(self, app_config, option_name, option_value):
|
|
|
|
app = app_config()
|
|
|
|
if hasattr(app, 'conf'):
|
|
|
|
found_value = app.conf[option_name]
|
|
|
|
else:
|
|
|
|
if hasattr(app, '_pipeline_final_app'):
|
|
|
|
# special case for proxy app!
|
|
|
|
app = app._pipeline_final_app
|
|
|
|
found_value = getattr(app, option_name)
|
|
|
|
self.assertEqual(found_value, option_value)
|
2023-06-23 13:21:02 +01:00
|
|
|
|
|
|
|
|
|
|
|
class FakeSource(object):
|
|
|
|
def __init__(self, chunks, headers=None, body=b''):
|
|
|
|
self.chunks = list(chunks)
|
|
|
|
self.headers = headers or {}
|
|
|
|
self.status = 200
|
|
|
|
self.swift_conn = None
|
|
|
|
self.body = body
|
|
|
|
|
|
|
|
def read(self, _read_size):
|
|
|
|
if self.chunks:
|
|
|
|
chunk = self.chunks.pop(0)
|
|
|
|
if chunk is None:
|
|
|
|
raise exceptions.ChunkReadTimeout()
|
|
|
|
else:
|
|
|
|
return chunk
|
|
|
|
else:
|
|
|
|
return self.body
|
|
|
|
|
|
|
|
def getheader(self, header):
|
|
|
|
# content-length for the whole object is generated dynamically
|
|
|
|
# by summing non-None chunks
|
|
|
|
if header.lower() == "content-length":
|
|
|
|
if self.chunks:
|
|
|
|
return str(sum(len(c) for c in self.chunks
|
|
|
|
if c is not None))
|
|
|
|
return len(self.read(-1))
|
|
|
|
return self.headers.get(header.lower())
|
|
|
|
|
|
|
|
def getheaders(self):
|
|
|
|
return [('content-length', self.getheader('content-length'))] + \
|
|
|
|
[(k, v) for k, v in self.headers.items()]
|
2023-10-06 14:53:29 +01:00
|
|
|
|
|
|
|
|
2024-01-04 05:06:32 +00:00
|
|
|
class CaptureIterator(object):
|
|
|
|
"""
|
|
|
|
Wraps an iterable, forwarding all calls to the wrapped iterable but
|
|
|
|
capturing the calls via a callback.
|
|
|
|
|
|
|
|
This class may be used to observe garbage collection, so tests should not
|
|
|
|
have to hold a reference to instances of this class because that would
|
|
|
|
prevent them being garbage collected. Calls are therefore captured via a
|
|
|
|
callback rather than being stashed locally.
|
|
|
|
|
|
|
|
:param wrapped: an iterable to wrap.
|
|
|
|
:param call_capture_callback: a function that will be called to capture
|
|
|
|
calls to this iterator.
|
|
|
|
"""
|
|
|
|
def __init__(self, wrapped, call_capture_callback):
|
|
|
|
self.call_capture_callback = call_capture_callback
|
|
|
|
self.wrapped_iter = wrapped
|
|
|
|
|
|
|
|
def _capture_call(self):
|
|
|
|
# call home to capture the call
|
|
|
|
self.call_capture_callback(inspect.stack()[1][3])
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def next(self):
|
|
|
|
self._capture_call()
|
|
|
|
return next(self.wrapped_iter)
|
|
|
|
|
|
|
|
__next__ = next
|
|
|
|
|
|
|
|
def __del__(self):
|
|
|
|
self._capture_call()
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
self._capture_call()
|
|
|
|
close_if_possible(self.wrapped_iter)
|
|
|
|
|
|
|
|
|
|
|
|
class CaptureIteratorFactory(object):
|
|
|
|
"""
|
|
|
|
Create instances of ``CaptureIterator`` to wrap a given iterable, and
|
|
|
|
provides a callback function for the ``CaptureIterator`` to capture its
|
|
|
|
calls.
|
|
|
|
|
|
|
|
:param wrapped: an iterable to wrap.
|
|
|
|
"""
|
|
|
|
def __init__(self, wrapped):
|
|
|
|
self.wrapped = wrapped
|
|
|
|
self.instance_count = 0
|
|
|
|
self.captured_calls = defaultdict(list)
|
|
|
|
|
|
|
|
def log_call(self, instance_number, call):
|
|
|
|
self.captured_calls[instance_number].append(call)
|
|
|
|
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
|
|
# note: do not keep a reference to the CaptureIterator because that
|
|
|
|
# would prevent it being garbage collected
|
|
|
|
self.instance_count += 1
|
|
|
|
return CaptureIterator(
|
|
|
|
self.wrapped(*args, **kwargs),
|
|
|
|
functools.partial(self.log_call, self.instance_count))
|
|
|
|
|
|
|
|
|
2023-10-06 14:53:29 +01:00
|
|
|
def get_node_error_stats(proxy_app, ring_node):
|
|
|
|
node_key = proxy_app.error_limiter.node_key(ring_node)
|
|
|
|
return proxy_app.error_limiter.stats.get(node_key) or {}
|
|
|
|
|
|
|
|
|
|
|
|
def node_error_count(proxy_app, ring_node):
|
|
|
|
# Reach into the proxy's internals to get the error count for a
|
|
|
|
# particular node
|
|
|
|
return get_node_error_stats(proxy_app, ring_node).get('errors', 0)
|
|
|
|
|
|
|
|
|
|
|
|
def node_error_counts(proxy_app, ring_nodes):
|
|
|
|
# Reach into the proxy's internals to get the error counts for a
|
|
|
|
# list of nodes
|
|
|
|
return sorted([get_node_error_stats(proxy_app, node).get('errors', 0)
|
|
|
|
for node in ring_nodes], reverse=True)
|
|
|
|
|
|
|
|
|
|
|
|
def node_last_error(proxy_app, ring_node):
|
|
|
|
# Reach into the proxy's internals to get the last error for a
|
|
|
|
# particular node
|
|
|
|
return get_node_error_stats(proxy_app, ring_node).get('last_error')
|
|
|
|
|
|
|
|
|
|
|
|
def set_node_errors(proxy_app, ring_node, value, last_error):
|
|
|
|
# Set the node's error count to value
|
|
|
|
node_key = proxy_app.error_limiter.node_key(ring_node)
|
|
|
|
stats = {'errors': value,
|
|
|
|
'last_error': last_error}
|
|
|
|
proxy_app.error_limiter.stats[node_key] = stats
|