2011-03-15 22:12:03 -07:00
|
|
|
#!/usr/bin/python -u
|
2013-09-20 01:00:54 +08:00
|
|
|
# Copyright (c) 2010-2012 OpenStack Foundation
|
2011-03-15 22:12:03 -07:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
|
|
# implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
Ignore 404s from handoffs for objects when calculating quorum
We previously realized we needed to do that for accounts and containers
where the consequences of treating the 404 as authoritative were more
obvious: we'd cache the non-existence which prevented writes until it
fell out of cache.
The same basic logic applies for objects, though: if we see
(Timeout, Timeout, Timeout, 404, 404, 404)
on a triple-replica policy, we don't really have any reason to think
that a 404 is appropriate. In fact, it seems reasonably likely that
there's a thundering-herd problem where there are too many concurrent
requests for data that *definitely is there*. By responding with a 503,
we apply some back-pressure to clients, who hopefully have some
exponential backoff in their retries.
The situation gets a bit more complicated with erasure-coded data, but
the same basic principle applies. We're just more likely to have
confirmation that there *is* data out there, we just can't reconstruct
it (right now).
Note that we *still want to check* those handoffs, of course. Our
fail-in-place strategy has us replicate (and, more recently,
reconstruct) to handoffs to maintain durability; it'd be silly *not* to
look.
UpgradeImpact:
--------------
Be aware that this may cause an increase in 503 Service Unavailable
responses served by proxy-servers. However, this should more accurately
reflect the state of the system.
Co-Authored-By: Thiago da Silva <thiagodasilva@gmail.com>
Change-Id: Ia832e9bab13167948f01bc50aa8a61974ce189fb
Closes-Bug: #1837819
Related-Bug: #1833612
Related-Change: I53ed04b5de20c261ddd79c98c629580472e09961
Related-Change: Ief44ed39d97f65e4270bf73051da9a2dd0ddbaec
2019-07-22 12:38:30 -07:00
|
|
|
import json
|
|
|
|
import os
|
2013-11-12 18:00:15 +00:00
|
|
|
import time
|
2012-06-30 20:23:24 +00:00
|
|
|
from os import listdir, unlink
|
|
|
|
from os.path import join as path_join
|
2015-02-12 11:30:21 -08:00
|
|
|
from unittest import main
|
2011-03-15 22:12:03 -07:00
|
|
|
from uuid import uuid4
|
|
|
|
|
2012-06-27 05:01:55 +00:00
|
|
|
from swiftclient import client
|
2012-06-30 20:23:24 +00:00
|
|
|
|
2012-06-27 05:01:55 +00:00
|
|
|
from swift.common import direct_client
|
2013-12-23 17:57:56 +01:00
|
|
|
from swift.common.exceptions import ClientException
|
2013-10-07 12:10:31 +00:00
|
|
|
from swift.common.utils import hash_path, readconf
|
2014-04-15 16:06:22 +08:00
|
|
|
from swift.obj.diskfile import write_metadata, read_metadata, get_data_dir
|
Ignore 404s from handoffs for objects when calculating quorum
We previously realized we needed to do that for accounts and containers
where the consequences of treating the 404 as authoritative were more
obvious: we'd cache the non-existence which prevented writes until it
fell out of cache.
The same basic logic applies for objects, though: if we see
(Timeout, Timeout, Timeout, 404, 404, 404)
on a triple-replica policy, we don't really have any reason to think
that a 404 is appropriate. In fact, it seems reasonably likely that
there's a thundering-herd problem where there are too many concurrent
requests for data that *definitely is there*. By responding with a 503,
we apply some back-pressure to clients, who hopefully have some
exponential backoff in their retries.
The situation gets a bit more complicated with erasure-coded data, but
the same basic principle applies. We're just more likely to have
confirmation that there *is* data out there, we just can't reconstruct
it (right now).
Note that we *still want to check* those handoffs, of course. Our
fail-in-place strategy has us replicate (and, more recently,
reconstruct) to handoffs to maintain durability; it'd be silly *not* to
look.
UpgradeImpact:
--------------
Be aware that this may cause an increase in 503 Service Unavailable
responses served by proxy-servers. However, this should more accurately
reflect the state of the system.
Co-Authored-By: Thiago da Silva <thiagodasilva@gmail.com>
Change-Id: Ia832e9bab13167948f01bc50aa8a61974ce189fb
Closes-Bug: #1837819
Related-Bug: #1833612
Related-Change: I53ed04b5de20c261ddd79c98c629580472e09961
Related-Change: Ief44ed39d97f65e4270bf73051da9a2dd0ddbaec
2019-07-22 12:38:30 -07:00
|
|
|
from test.probe.common import ReplProbeTest, ECProbeTest
|
2012-06-30 20:23:24 +00:00
|
|
|
|
|
|
|
|
2013-11-12 18:00:15 +00:00
|
|
|
RETRIES = 5
|
|
|
|
|
|
|
|
|
2012-06-30 20:23:24 +00:00
|
|
|
def get_data_file_path(obj_dir):
|
2013-11-12 18:00:15 +00:00
|
|
|
files = []
|
|
|
|
# We might need to try a few times if a request hasn't yet settled. For
|
|
|
|
# instance, a PUT can return success when just 2 of 3 nodes has completed.
|
2015-05-25 18:28:02 +02:00
|
|
|
for attempt in range(RETRIES + 1):
|
2013-11-12 18:00:15 +00:00
|
|
|
try:
|
|
|
|
files = sorted(listdir(obj_dir), reverse=True)
|
|
|
|
break
|
|
|
|
except Exception:
|
|
|
|
if attempt < RETRIES:
|
|
|
|
time.sleep(1)
|
|
|
|
else:
|
|
|
|
raise
|
2012-06-30 20:23:24 +00:00
|
|
|
for filename in files:
|
|
|
|
return path_join(obj_dir, filename)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2011-03-16 09:04:00 -07:00
|
|
|
|
2015-02-12 11:30:21 -08:00
|
|
|
class TestObjectFailures(ReplProbeTest):
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2011-03-16 07:55:07 -07:00
|
|
|
def _setup_data_file(self, container, obj, data):
|
2014-10-28 09:51:06 -07:00
|
|
|
client.put_container(self.url, self.token, container,
|
|
|
|
headers={'X-Storage-Policy':
|
|
|
|
self.policy.name})
|
2011-03-16 07:55:07 -07:00
|
|
|
client.put_object(self.url, self.token, container, obj, data)
|
2011-03-15 22:12:03 -07:00
|
|
|
odata = client.get_object(self.url, self.token, container, obj)[-1]
|
2015-08-06 09:28:51 -05:00
|
|
|
self.assertEqual(odata, data)
|
2011-03-15 22:12:03 -07:00
|
|
|
opart, onodes = self.object_ring.get_nodes(
|
|
|
|
self.account, container, obj)
|
|
|
|
onode = onodes[0]
|
2016-05-02 17:29:32 -07:00
|
|
|
node_id = (onode['port'] - 6000) / 10
|
2011-03-15 22:12:03 -07:00
|
|
|
device = onode['device']
|
|
|
|
hash_str = hash_path(self.account, container, obj)
|
2014-03-06 01:19:36 -08:00
|
|
|
obj_server_conf = readconf(self.configs['object-server'][node_id])
|
2011-03-15 22:12:03 -07:00
|
|
|
devices = obj_server_conf['app:object-server']['devices']
|
2014-04-15 16:06:22 +08:00
|
|
|
obj_dir = '%s/%s/%s/%s/%s/%s/' % (devices, device,
|
2014-10-28 09:51:06 -07:00
|
|
|
get_data_dir(self.policy),
|
2014-04-15 16:06:22 +08:00
|
|
|
opart, hash_str[-3:], hash_str)
|
2012-06-30 20:23:24 +00:00
|
|
|
data_file = get_data_file_path(obj_dir)
|
2011-03-16 07:55:07 -07:00
|
|
|
return onode, opart, data_file
|
|
|
|
|
|
|
|
def run_quarantine(self):
|
|
|
|
container = 'container-%s' % uuid4()
|
|
|
|
obj = 'object-%s' % uuid4()
|
|
|
|
onode, opart, data_file = self._setup_data_file(container, obj,
|
2019-07-16 17:01:19 -07:00
|
|
|
b'VERIFY')
|
2016-06-07 15:01:32 +01:00
|
|
|
# Stash the on disk data for future comparison - this may not equal
|
|
|
|
# 'VERIFY' if for example the proxy has crypto enabled
|
|
|
|
backend_data = direct_client.direct_get_object(
|
|
|
|
onode, opart, self.account, container, obj, headers={
|
|
|
|
'X-Backend-Storage-Policy-Index': self.policy.idx})[-1]
|
|
|
|
|
Simplify callers of diskfile.[read|write]_metadata()
As it happens, diskfile.read_metadata() and diskfile.write_metadata()
can take either an open file or a filename as their first arguments
(since xattr.[get|set]xattr() can), so we can clean up a couple places
where we were opening a file just to call read_metadata() or
write_metadata() on it. This results in 2 fewer system calls.
Example strace output:
/* read_metadata(filename) */
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", 0x0, 0) = 10
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", "some-value", 10) = 10
/* fp = open(filename); read_metadata(fp) */
open("/mnt/sdb1/1/node/sdb1/afile", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fgetxattr(4, "user.some.key", 0x0, 0) = 10
fgetxattr(4, "user.some.key", "some-value", 10) = 10
Change-Id: I321d8663b9e9e47b8f3ee6c21a1b65b408bb80e6
2013-10-21 13:26:11 -07:00
|
|
|
metadata = read_metadata(data_file)
|
2011-03-15 22:12:03 -07:00
|
|
|
metadata['ETag'] = 'badetag'
|
Simplify callers of diskfile.[read|write]_metadata()
As it happens, diskfile.read_metadata() and diskfile.write_metadata()
can take either an open file or a filename as their first arguments
(since xattr.[get|set]xattr() can), so we can clean up a couple places
where we were opening a file just to call read_metadata() or
write_metadata() on it. This results in 2 fewer system calls.
Example strace output:
/* read_metadata(filename) */
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", 0x0, 0) = 10
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", "some-value", 10) = 10
/* fp = open(filename); read_metadata(fp) */
open("/mnt/sdb1/1/node/sdb1/afile", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fgetxattr(4, "user.some.key", 0x0, 0) = 10
fgetxattr(4, "user.some.key", "some-value", 10) = 10
Change-Id: I321d8663b9e9e47b8f3ee6c21a1b65b408bb80e6
2013-10-21 13:26:11 -07:00
|
|
|
write_metadata(data_file, metadata)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2012-06-30 20:23:24 +00:00
|
|
|
odata = direct_client.direct_get_object(
|
2014-04-15 16:06:22 +08:00
|
|
|
onode, opart, self.account, container, obj, headers={
|
2014-06-23 12:52:50 -07:00
|
|
|
'X-Backend-Storage-Policy-Index': self.policy.idx})[-1]
|
2016-06-07 15:01:32 +01:00
|
|
|
self.assertEqual(odata, backend_data)
|
2011-03-15 22:12:03 -07:00
|
|
|
try:
|
2014-04-15 16:06:22 +08:00
|
|
|
direct_client.direct_get_object(
|
|
|
|
onode, opart, self.account, container, obj, headers={
|
2014-06-23 12:52:50 -07:00
|
|
|
'X-Backend-Storage-Policy-Index': self.policy.idx})
|
2012-06-30 20:23:24 +00:00
|
|
|
raise Exception("Did not quarantine object")
|
2013-12-23 17:57:56 +01:00
|
|
|
except ClientException as err:
|
2015-08-06 09:28:51 -05:00
|
|
|
self.assertEqual(err.http_status, 404)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
|
|
|
def run_quarantine_range_etag(self):
|
|
|
|
container = 'container-range-%s' % uuid4()
|
|
|
|
obj = 'object-range-%s' % uuid4()
|
2011-03-16 07:55:07 -07:00
|
|
|
onode, opart, data_file = self._setup_data_file(container, obj,
|
2019-07-16 17:01:19 -07:00
|
|
|
b'RANGE')
|
2016-06-07 15:01:32 +01:00
|
|
|
# Stash the on disk data for future comparison - this may not equal
|
|
|
|
# 'VERIFY' if for example the proxy has crypto enabled
|
|
|
|
backend_data = direct_client.direct_get_object(
|
|
|
|
onode, opart, self.account, container, obj, headers={
|
|
|
|
'X-Backend-Storage-Policy-Index': self.policy.idx})[-1]
|
Simplify callers of diskfile.[read|write]_metadata()
As it happens, diskfile.read_metadata() and diskfile.write_metadata()
can take either an open file or a filename as their first arguments
(since xattr.[get|set]xattr() can), so we can clean up a couple places
where we were opening a file just to call read_metadata() or
write_metadata() on it. This results in 2 fewer system calls.
Example strace output:
/* read_metadata(filename) */
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", 0x0, 0) = 10
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", "some-value", 10) = 10
/* fp = open(filename); read_metadata(fp) */
open("/mnt/sdb1/1/node/sdb1/afile", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fgetxattr(4, "user.some.key", 0x0, 0) = 10
fgetxattr(4, "user.some.key", "some-value", 10) = 10
Change-Id: I321d8663b9e9e47b8f3ee6c21a1b65b408bb80e6
2013-10-21 13:26:11 -07:00
|
|
|
|
|
|
|
metadata = read_metadata(data_file)
|
2011-03-15 22:12:03 -07:00
|
|
|
metadata['ETag'] = 'badetag'
|
Simplify callers of diskfile.[read|write]_metadata()
As it happens, diskfile.read_metadata() and diskfile.write_metadata()
can take either an open file or a filename as their first arguments
(since xattr.[get|set]xattr() can), so we can clean up a couple places
where we were opening a file just to call read_metadata() or
write_metadata() on it. This results in 2 fewer system calls.
Example strace output:
/* read_metadata(filename) */
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", 0x0, 0) = 10
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", "some-value", 10) = 10
/* fp = open(filename); read_metadata(fp) */
open("/mnt/sdb1/1/node/sdb1/afile", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fgetxattr(4, "user.some.key", 0x0, 0) = 10
fgetxattr(4, "user.some.key", "some-value", 10) = 10
Change-Id: I321d8663b9e9e47b8f3ee6c21a1b65b408bb80e6
2013-10-21 13:26:11 -07:00
|
|
|
write_metadata(data_file, metadata)
|
2014-06-23 12:52:50 -07:00
|
|
|
base_headers = {'X-Backend-Storage-Policy-Index': self.policy.idx}
|
2016-06-07 15:01:32 +01:00
|
|
|
for header, result in [({'Range': 'bytes=0-2'}, backend_data[0:3]),
|
|
|
|
({'Range': 'bytes=1-11'}, backend_data[1:]),
|
|
|
|
({'Range': 'bytes=0-11'}, backend_data)]:
|
2014-04-15 16:06:22 +08:00
|
|
|
req_headers = base_headers.copy()
|
|
|
|
req_headers.update(header)
|
2012-06-30 20:23:24 +00:00
|
|
|
odata = direct_client.direct_get_object(
|
2014-04-15 16:06:22 +08:00
|
|
|
onode, opart, self.account, container, obj,
|
|
|
|
headers=req_headers)[-1]
|
2015-08-06 09:28:51 -05:00
|
|
|
self.assertEqual(odata, result)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
|
|
|
try:
|
2014-04-15 16:06:22 +08:00
|
|
|
direct_client.direct_get_object(
|
|
|
|
onode, opart, self.account, container, obj, headers={
|
2014-06-23 12:52:50 -07:00
|
|
|
'X-Backend-Storage-Policy-Index': self.policy.idx})
|
2012-06-30 20:23:24 +00:00
|
|
|
raise Exception("Did not quarantine object")
|
2013-12-23 17:57:56 +01:00
|
|
|
except ClientException as err:
|
2015-08-06 09:28:51 -05:00
|
|
|
self.assertEqual(err.http_status, 404)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2011-03-16 07:55:07 -07:00
|
|
|
def run_quarantine_zero_byte_get(self):
|
|
|
|
container = 'container-zbyte-%s' % uuid4()
|
|
|
|
obj = 'object-zbyte-%s' % uuid4()
|
2019-07-16 17:01:19 -07:00
|
|
|
onode, opart, data_file = self._setup_data_file(
|
|
|
|
container, obj, b'DATA')
|
Simplify callers of diskfile.[read|write]_metadata()
As it happens, diskfile.read_metadata() and diskfile.write_metadata()
can take either an open file or a filename as their first arguments
(since xattr.[get|set]xattr() can), so we can clean up a couple places
where we were opening a file just to call read_metadata() or
write_metadata() on it. This results in 2 fewer system calls.
Example strace output:
/* read_metadata(filename) */
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", 0x0, 0) = 10
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", "some-value", 10) = 10
/* fp = open(filename); read_metadata(fp) */
open("/mnt/sdb1/1/node/sdb1/afile", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fgetxattr(4, "user.some.key", 0x0, 0) = 10
fgetxattr(4, "user.some.key", "some-value", 10) = 10
Change-Id: I321d8663b9e9e47b8f3ee6c21a1b65b408bb80e6
2013-10-21 13:26:11 -07:00
|
|
|
metadata = read_metadata(data_file)
|
2012-06-30 20:23:24 +00:00
|
|
|
unlink(data_file)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2012-06-30 20:23:24 +00:00
|
|
|
with open(data_file, 'w') as fpointer:
|
|
|
|
write_metadata(fpointer, metadata)
|
2011-03-16 07:55:07 -07:00
|
|
|
try:
|
2014-04-15 16:06:22 +08:00
|
|
|
direct_client.direct_get_object(
|
|
|
|
onode, opart, self.account, container, obj, conn_timeout=1,
|
2014-06-23 12:52:50 -07:00
|
|
|
response_timeout=1, headers={'X-Backend-Storage-Policy-Index':
|
|
|
|
self.policy.idx})
|
2012-06-30 20:23:24 +00:00
|
|
|
raise Exception("Did not quarantine object")
|
2013-12-23 17:57:56 +01:00
|
|
|
except ClientException as err:
|
2015-08-06 09:28:51 -05:00
|
|
|
self.assertEqual(err.http_status, 404)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2011-03-16 07:55:07 -07:00
|
|
|
def run_quarantine_zero_byte_head(self):
|
2011-03-15 22:12:03 -07:00
|
|
|
container = 'container-zbyte-%s' % uuid4()
|
|
|
|
obj = 'object-zbyte-%s' % uuid4()
|
2019-07-16 17:01:19 -07:00
|
|
|
onode, opart, data_file = self._setup_data_file(
|
|
|
|
container, obj, b'DATA')
|
Simplify callers of diskfile.[read|write]_metadata()
As it happens, diskfile.read_metadata() and diskfile.write_metadata()
can take either an open file or a filename as their first arguments
(since xattr.[get|set]xattr() can), so we can clean up a couple places
where we were opening a file just to call read_metadata() or
write_metadata() on it. This results in 2 fewer system calls.
Example strace output:
/* read_metadata(filename) */
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", 0x0, 0) = 10
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", "some-value", 10) = 10
/* fp = open(filename); read_metadata(fp) */
open("/mnt/sdb1/1/node/sdb1/afile", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fgetxattr(4, "user.some.key", 0x0, 0) = 10
fgetxattr(4, "user.some.key", "some-value", 10) = 10
Change-Id: I321d8663b9e9e47b8f3ee6c21a1b65b408bb80e6
2013-10-21 13:26:11 -07:00
|
|
|
metadata = read_metadata(data_file)
|
2012-06-30 20:23:24 +00:00
|
|
|
unlink(data_file)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2012-06-30 20:23:24 +00:00
|
|
|
with open(data_file, 'w') as fpointer:
|
|
|
|
write_metadata(fpointer, metadata)
|
2011-03-15 22:12:03 -07:00
|
|
|
try:
|
2014-04-15 16:06:22 +08:00
|
|
|
direct_client.direct_head_object(
|
|
|
|
onode, opart, self.account, container, obj, conn_timeout=1,
|
2014-06-23 12:52:50 -07:00
|
|
|
response_timeout=1, headers={'X-Backend-Storage-Policy-Index':
|
|
|
|
self.policy.idx})
|
2012-06-30 20:23:24 +00:00
|
|
|
raise Exception("Did not quarantine object")
|
2013-12-23 17:57:56 +01:00
|
|
|
except ClientException as err:
|
2015-08-06 09:28:51 -05:00
|
|
|
self.assertEqual(err.http_status, 404)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2011-03-16 07:55:07 -07:00
|
|
|
def run_quarantine_zero_byte_post(self):
|
|
|
|
container = 'container-zbyte-%s' % uuid4()
|
|
|
|
obj = 'object-zbyte-%s' % uuid4()
|
2019-07-16 17:01:19 -07:00
|
|
|
onode, opart, data_file = self._setup_data_file(
|
|
|
|
container, obj, b'DATA')
|
Simplify callers of diskfile.[read|write]_metadata()
As it happens, diskfile.read_metadata() and diskfile.write_metadata()
can take either an open file or a filename as their first arguments
(since xattr.[get|set]xattr() can), so we can clean up a couple places
where we were opening a file just to call read_metadata() or
write_metadata() on it. This results in 2 fewer system calls.
Example strace output:
/* read_metadata(filename) */
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", 0x0, 0) = 10
getxattr("/mnt/sdb1/1/node/sdb1/afile", "user.some.key", "some-value", 10) = 10
/* fp = open(filename); read_metadata(fp) */
open("/mnt/sdb1/1/node/sdb1/afile", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fgetxattr(4, "user.some.key", 0x0, 0) = 10
fgetxattr(4, "user.some.key", "some-value", 10) = 10
Change-Id: I321d8663b9e9e47b8f3ee6c21a1b65b408bb80e6
2013-10-21 13:26:11 -07:00
|
|
|
metadata = read_metadata(data_file)
|
2012-06-30 20:23:24 +00:00
|
|
|
unlink(data_file)
|
2011-03-16 07:55:07 -07:00
|
|
|
|
2012-06-30 20:23:24 +00:00
|
|
|
with open(data_file, 'w') as fpointer:
|
|
|
|
write_metadata(fpointer, metadata)
|
2011-03-16 07:55:07 -07:00
|
|
|
try:
|
2014-04-15 16:06:22 +08:00
|
|
|
headers = {'X-Object-Meta-1': 'One', 'X-Object-Meta-Two': 'Two',
|
2014-06-23 12:52:50 -07:00
|
|
|
'X-Backend-Storage-Policy-Index': self.policy.idx}
|
2012-06-30 20:23:24 +00:00
|
|
|
direct_client.direct_post_object(
|
2011-03-16 07:55:07 -07:00
|
|
|
onode, opart, self.account,
|
|
|
|
container, obj,
|
2014-04-15 16:06:22 +08:00
|
|
|
headers=headers,
|
2011-03-16 07:55:07 -07:00
|
|
|
conn_timeout=1,
|
|
|
|
response_timeout=1)
|
2012-06-30 20:23:24 +00:00
|
|
|
raise Exception("Did not quarantine object")
|
2013-12-23 17:57:56 +01:00
|
|
|
except ClientException as err:
|
2015-08-06 09:28:51 -05:00
|
|
|
self.assertEqual(err.http_status, 404)
|
2011-03-15 22:12:03 -07:00
|
|
|
|
|
|
|
def test_runner(self):
|
|
|
|
self.run_quarantine()
|
|
|
|
self.run_quarantine_range_etag()
|
2011-03-16 07:55:07 -07:00
|
|
|
self.run_quarantine_zero_byte_get()
|
|
|
|
self.run_quarantine_zero_byte_head()
|
|
|
|
self.run_quarantine_zero_byte_post()
|
2011-03-15 22:12:03 -07:00
|
|
|
|
2012-06-30 20:23:24 +00:00
|
|
|
|
Ignore 404s from handoffs for objects when calculating quorum
We previously realized we needed to do that for accounts and containers
where the consequences of treating the 404 as authoritative were more
obvious: we'd cache the non-existence which prevented writes until it
fell out of cache.
The same basic logic applies for objects, though: if we see
(Timeout, Timeout, Timeout, 404, 404, 404)
on a triple-replica policy, we don't really have any reason to think
that a 404 is appropriate. In fact, it seems reasonably likely that
there's a thundering-herd problem where there are too many concurrent
requests for data that *definitely is there*. By responding with a 503,
we apply some back-pressure to clients, who hopefully have some
exponential backoff in their retries.
The situation gets a bit more complicated with erasure-coded data, but
the same basic principle applies. We're just more likely to have
confirmation that there *is* data out there, we just can't reconstruct
it (right now).
Note that we *still want to check* those handoffs, of course. Our
fail-in-place strategy has us replicate (and, more recently,
reconstruct) to handoffs to maintain durability; it'd be silly *not* to
look.
UpgradeImpact:
--------------
Be aware that this may cause an increase in 503 Service Unavailable
responses served by proxy-servers. However, this should more accurately
reflect the state of the system.
Co-Authored-By: Thiago da Silva <thiagodasilva@gmail.com>
Change-Id: Ia832e9bab13167948f01bc50aa8a61974ce189fb
Closes-Bug: #1837819
Related-Bug: #1833612
Related-Change: I53ed04b5de20c261ddd79c98c629580472e09961
Related-Change: Ief44ed39d97f65e4270bf73051da9a2dd0ddbaec
2019-07-22 12:38:30 -07:00
|
|
|
class TestECObjectFailures(ECProbeTest):
|
|
|
|
|
|
|
|
def test_ec_missing_all_durable_fragments(self):
|
|
|
|
# This tests helps assert the behavior that when
|
|
|
|
# the proxy has enough fragments to reconstruct the object
|
|
|
|
# but none are marked as durable, the proxy should return a 404.
|
|
|
|
|
|
|
|
container_name = 'container-%s' % uuid4()
|
|
|
|
object_name = 'object-%s' % uuid4()
|
|
|
|
|
|
|
|
# create EC container
|
|
|
|
headers = {'X-Storage-Policy': self.policy.name}
|
|
|
|
client.put_container(self.url, self.token, container_name,
|
|
|
|
headers=headers)
|
|
|
|
|
|
|
|
# PUT object, should go to primary nodes
|
|
|
|
client.put_object(self.url, self.token, container_name,
|
2019-07-16 17:01:19 -07:00
|
|
|
object_name, contents=b'object contents')
|
Ignore 404s from handoffs for objects when calculating quorum
We previously realized we needed to do that for accounts and containers
where the consequences of treating the 404 as authoritative were more
obvious: we'd cache the non-existence which prevented writes until it
fell out of cache.
The same basic logic applies for objects, though: if we see
(Timeout, Timeout, Timeout, 404, 404, 404)
on a triple-replica policy, we don't really have any reason to think
that a 404 is appropriate. In fact, it seems reasonably likely that
there's a thundering-herd problem where there are too many concurrent
requests for data that *definitely is there*. By responding with a 503,
we apply some back-pressure to clients, who hopefully have some
exponential backoff in their retries.
The situation gets a bit more complicated with erasure-coded data, but
the same basic principle applies. We're just more likely to have
confirmation that there *is* data out there, we just can't reconstruct
it (right now).
Note that we *still want to check* those handoffs, of course. Our
fail-in-place strategy has us replicate (and, more recently,
reconstruct) to handoffs to maintain durability; it'd be silly *not* to
look.
UpgradeImpact:
--------------
Be aware that this may cause an increase in 503 Service Unavailable
responses served by proxy-servers. However, this should more accurately
reflect the state of the system.
Co-Authored-By: Thiago da Silva <thiagodasilva@gmail.com>
Change-Id: Ia832e9bab13167948f01bc50aa8a61974ce189fb
Closes-Bug: #1837819
Related-Bug: #1833612
Related-Change: I53ed04b5de20c261ddd79c98c629580472e09961
Related-Change: Ief44ed39d97f65e4270bf73051da9a2dd0ddbaec
2019-07-22 12:38:30 -07:00
|
|
|
|
|
|
|
# get our node lists
|
|
|
|
opart, onodes = self.object_ring.get_nodes(
|
|
|
|
self.account, container_name, object_name)
|
|
|
|
|
|
|
|
# sanity test
|
|
|
|
odata = client.get_object(self.url, self.token, container_name,
|
|
|
|
object_name)[-1]
|
2019-07-16 17:01:19 -07:00
|
|
|
self.assertEqual(b'object contents', odata)
|
Ignore 404s from handoffs for objects when calculating quorum
We previously realized we needed to do that for accounts and containers
where the consequences of treating the 404 as authoritative were more
obvious: we'd cache the non-existence which prevented writes until it
fell out of cache.
The same basic logic applies for objects, though: if we see
(Timeout, Timeout, Timeout, 404, 404, 404)
on a triple-replica policy, we don't really have any reason to think
that a 404 is appropriate. In fact, it seems reasonably likely that
there's a thundering-herd problem where there are too many concurrent
requests for data that *definitely is there*. By responding with a 503,
we apply some back-pressure to clients, who hopefully have some
exponential backoff in their retries.
The situation gets a bit more complicated with erasure-coded data, but
the same basic principle applies. We're just more likely to have
confirmation that there *is* data out there, we just can't reconstruct
it (right now).
Note that we *still want to check* those handoffs, of course. Our
fail-in-place strategy has us replicate (and, more recently,
reconstruct) to handoffs to maintain durability; it'd be silly *not* to
look.
UpgradeImpact:
--------------
Be aware that this may cause an increase in 503 Service Unavailable
responses served by proxy-servers. However, this should more accurately
reflect the state of the system.
Co-Authored-By: Thiago da Silva <thiagodasilva@gmail.com>
Change-Id: Ia832e9bab13167948f01bc50aa8a61974ce189fb
Closes-Bug: #1837819
Related-Bug: #1833612
Related-Change: I53ed04b5de20c261ddd79c98c629580472e09961
Related-Change: Ief44ed39d97f65e4270bf73051da9a2dd0ddbaec
2019-07-22 12:38:30 -07:00
|
|
|
|
|
|
|
# make all fragments non-durable
|
|
|
|
for node in onodes:
|
|
|
|
part_dir = self.storage_dir('object', node, part=opart)
|
|
|
|
for dirs, subdirs, files in os.walk(part_dir):
|
|
|
|
for fname in files:
|
|
|
|
if fname.endswith('.data'):
|
|
|
|
non_durable_fname = fname.replace('#d', '')
|
|
|
|
os.rename(os.path.join(dirs, fname),
|
|
|
|
os.path.join(dirs, non_durable_fname))
|
|
|
|
break
|
|
|
|
headers = direct_client.direct_head_object(
|
|
|
|
node, opart, self.account, container_name, object_name,
|
|
|
|
headers={
|
|
|
|
'X-Backend-Storage-Policy-Index': self.policy.idx,
|
|
|
|
'X-Backend-Fragment-Preferences': json.dumps([])})
|
|
|
|
self.assertNotIn('X-Backend-Durable-Timestamp', headers)
|
|
|
|
|
|
|
|
# Now a new GET should return *404* because all fragments
|
|
|
|
# are non-durable, even if they are reconstructable
|
|
|
|
try:
|
|
|
|
client.get_object(self.url, self.token, container_name,
|
|
|
|
object_name)
|
|
|
|
except client.ClientException as err:
|
|
|
|
self.assertEqual(err.http_status, 404)
|
|
|
|
else:
|
|
|
|
self.fail("Expected ClientException but didn't get it")
|
|
|
|
|
|
|
|
|
2011-03-15 22:12:03 -07:00
|
|
|
if __name__ == '__main__':
|
2012-06-30 20:23:24 +00:00
|
|
|
main()
|