Merge "Dark Data Watcher: switch to agreement across the whole ring"

This commit is contained in:
Zuul 2021-07-22 03:06:31 +00:00 committed by Gerrit Code Review
commit 34b9d88f0c
2 changed files with 132 additions and 16 deletions

View File

@ -34,9 +34,13 @@ not have any particular bugs that trigger creation of dark data. So,
this is an excercise in writing watchers, with a plausible function.
When enabled, Dark Data watcher definitely drags down the cluster's overall
performance. Of course, the load increase can be
mitigated as usual, but at the expense of the total time taken by
the pass of auditor.
performance. Of course, the load increase can be mitigated as usual,
but at the expense of the total time taken by the pass of auditor.
Because the watcher only deems an object dark when all container
servers agree, it will silently fail to detect anything if even one
of container servers in the ring is down or unreacheable. This is
done in the interest of operators who run with action=delete.
Finally, keep in mind that Dark Data watcher needs the container
ring to operate, but runs on an object node. This can come up if
@ -152,7 +156,7 @@ def get_info_1(container_ring, obj_path, logger):
# to a crawl (if this plugin is enabled).
random.shuffle(container_nodes)
dark_flag = 0
err_flag = 0
for node in container_nodes:
try:
headers, objs = direct_get_container(
@ -160,17 +164,12 @@ def get_info_1(container_ring, obj_path, logger):
prefix=obj_name, limit=1)
except (ClientException, Timeout):
# Something is wrong with that server, treat as an error.
err_flag += 1
continue
if not objs or objs[0]['name'] != obj_name:
dark_flag += 1
continue
return objs[0]
if objs and objs[0]['name'] == obj_name:
return objs[0]
# We do not ask for a quorum of container servers to know the object.
# Even if 1 server knows the object, we return with the info above.
# So, we only end here when all servers either have no record of the
# object or error out. In such case, even one non-error server means
# that the object is dark.
if dark_flag:
return None
raise ContainerError()
# We only report the object as dark if all known servers agree that it is.
if err_flag:
raise ContainerError()
return None

View File

@ -38,6 +38,7 @@ from swift.obj.diskfile import (
DiskFile, write_metadata, invalidate_hash, get_data_dir,
DiskFileManager, ECDiskFileManager, AuditLocation, clear_auditor_status,
get_auditor_status, HASH_FILE, HASH_INVALIDATIONS_FILE)
from swift.common.exceptions import ClientException
from swift.common.utils import (
mkdirs, normalize_timestamp, Timestamp, readconf, md5, PrefixLoggerAdapter)
from swift.common.storage_policy import (
@ -90,6 +91,26 @@ class FakeRing1(object):
return (1, [node1])
class FakeRing2(object):
def __init__(self, swift_dir, ring_name=None):
return
def get_nodes(self, *args, **kwargs):
nodes = []
for x in [1, 2]:
nodes.append({'ip': '10.0.0.%s' % x,
'replication_ip': '10.0.0.%s' % x,
'port': 6200 + x,
'replication_port': 6200 + x,
'device': 'sda',
'zone': x % 3,
'region': x % 2,
'id': x,
'handoff_index': 1})
return (1, nodes)
class TestAuditorBase(unittest.TestCase):
def setUp(self):
@ -1776,6 +1797,7 @@ class TestAuditWatchers(TestAuditorBase):
log_lines)
self.logger.clear()
# with grace_age=0 the DARK object will be older than
# grace_age so will be logged
ret_config = {'test_watcher1': {'action': 'log', 'grace_age': '0'}}
@ -1817,6 +1839,101 @@ class TestAuditWatchers(TestAuditorBase):
self.assertEqual(0, watcher.grace_age)
self.assertEqual('log', watcher.dark_data_policy)
def test_dark_data_agreement(self):
# The dark data watcher only sees an object as dark if all container
# servers in the ring reply without an error and return an empty
# listing. So, we have the following permutations for an object:
#
# Container Servers Result
# CS1 CS2
# Listed Listed Good - the baseline result
# Listed Error Good
# Listed Not listed Good
# Error Error Unknown - the baseline failure
# Not listed Error Unknown
# Not listed Not listed Dark - the only such result!
#
scenario = [
{'cr': ['L', 'L'], 'res': 'G'},
{'cr': ['L', 'E'], 'res': 'G'},
{'cr': ['L', 'N'], 'res': 'G'},
{'cr': ['E', 'E'], 'res': 'U'},
{'cr': ['N', 'E'], 'res': 'U'},
{'cr': ['N', 'N'], 'res': 'D'}]
conf = self.conf.copy()
conf['watchers'] = 'test_watcher1'
conf['__file__'] = '/etc/swift/swift.conf'
ret_config = {'test_watcher1': {'action': 'log', 'grace_age': '0'}}
with mock.patch('swift.obj.auditor.parse_prefixed_conf',
return_value=ret_config), \
mock.patch('swift.obj.auditor.load_pkg_resource',
side_effect=[DarkDataWatcher]):
my_auditor = auditor.ObjectAuditor(conf, logger=self.logger)
for cur in scenario:
def fake_direct_get_container(node, part, account, container,
prefix=None, limit=None):
self.assertEqual(part, 1)
self.assertEqual(limit, 1)
reply_type = cur['cr'][int(node['id']) - 1]
if reply_type == 'E':
raise ClientException("Emulated container server error")
if reply_type == 'N':
return {}, []
entry = {'bytes': 30968411,
'hash': '60303f4122966fe5925f045eb52d1129',
'name': '%s' % prefix,
'content_type': 'video/mp4',
'last_modified': '2017-08-15T03:30:57.693210'}
return {}, [entry]
self.logger.clear()
namespace = 'swift.obj.watchers.dark_data.'
with mock.patch(namespace + 'Ring', FakeRing2), \
mock.patch(namespace + 'direct_get_container',
fake_direct_get_container):
my_auditor.run_audit(mode='once')
# We inherit a common setUp with 3 objects, so 3 everywhere.
if cur['res'] == 'U':
unk_exp, ok_exp, dark_exp = 3, 0, 0
elif cur['res'] == 'G':
unk_exp, ok_exp, dark_exp = 0, 3, 0
else:
unk_exp, ok_exp, dark_exp = 0, 0, 3
log_lines = self.logger.get_lines_for_level('info')
for line in log_lines:
if not line.startswith('[audit-watcher test_watcher1] total'):
continue
words = line.split()
if not (words[3] == 'unknown' and
words[5] == 'ok' and
words[7] == 'dark'):
unittest.TestCase.fail('Syntax error in %r' % (line,))
try:
unk_cnt = int(words[4])
ok_cnt = int(words[6])
dark_cnt = int(words[8])
except ValueError:
unittest.TestCase.fail('Bad value in %r' % (line,))
if unk_cnt != unk_exp or ok_cnt != ok_exp or dark_cnt != dark_exp:
fmt = 'Expected unknown %d ok %d dark %d, got %r, for nodes %r'
msg = fmt % (unk_exp, ok_exp, dark_exp,
' '.join(words[3:]), cur['cr'])
self.fail(msg=msg)
if __name__ == '__main__':
unittest.main()