Merge "Dark Data Watcher: switch to agreement across the whole ring"
This commit is contained in:
commit
34b9d88f0c
|
@ -34,9 +34,13 @@ not have any particular bugs that trigger creation of dark data. So,
|
|||
this is an excercise in writing watchers, with a plausible function.
|
||||
|
||||
When enabled, Dark Data watcher definitely drags down the cluster's overall
|
||||
performance. Of course, the load increase can be
|
||||
mitigated as usual, but at the expense of the total time taken by
|
||||
the pass of auditor.
|
||||
performance. Of course, the load increase can be mitigated as usual,
|
||||
but at the expense of the total time taken by the pass of auditor.
|
||||
|
||||
Because the watcher only deems an object dark when all container
|
||||
servers agree, it will silently fail to detect anything if even one
|
||||
of container servers in the ring is down or unreacheable. This is
|
||||
done in the interest of operators who run with action=delete.
|
||||
|
||||
Finally, keep in mind that Dark Data watcher needs the container
|
||||
ring to operate, but runs on an object node. This can come up if
|
||||
|
@ -152,7 +156,7 @@ def get_info_1(container_ring, obj_path, logger):
|
|||
# to a crawl (if this plugin is enabled).
|
||||
random.shuffle(container_nodes)
|
||||
|
||||
dark_flag = 0
|
||||
err_flag = 0
|
||||
for node in container_nodes:
|
||||
try:
|
||||
headers, objs = direct_get_container(
|
||||
|
@ -160,17 +164,12 @@ def get_info_1(container_ring, obj_path, logger):
|
|||
prefix=obj_name, limit=1)
|
||||
except (ClientException, Timeout):
|
||||
# Something is wrong with that server, treat as an error.
|
||||
err_flag += 1
|
||||
continue
|
||||
if not objs or objs[0]['name'] != obj_name:
|
||||
dark_flag += 1
|
||||
continue
|
||||
return objs[0]
|
||||
if objs and objs[0]['name'] == obj_name:
|
||||
return objs[0]
|
||||
|
||||
# We do not ask for a quorum of container servers to know the object.
|
||||
# Even if 1 server knows the object, we return with the info above.
|
||||
# So, we only end here when all servers either have no record of the
|
||||
# object or error out. In such case, even one non-error server means
|
||||
# that the object is dark.
|
||||
if dark_flag:
|
||||
return None
|
||||
raise ContainerError()
|
||||
# We only report the object as dark if all known servers agree that it is.
|
||||
if err_flag:
|
||||
raise ContainerError()
|
||||
return None
|
||||
|
|
|
@ -38,6 +38,7 @@ from swift.obj.diskfile import (
|
|||
DiskFile, write_metadata, invalidate_hash, get_data_dir,
|
||||
DiskFileManager, ECDiskFileManager, AuditLocation, clear_auditor_status,
|
||||
get_auditor_status, HASH_FILE, HASH_INVALIDATIONS_FILE)
|
||||
from swift.common.exceptions import ClientException
|
||||
from swift.common.utils import (
|
||||
mkdirs, normalize_timestamp, Timestamp, readconf, md5, PrefixLoggerAdapter)
|
||||
from swift.common.storage_policy import (
|
||||
|
@ -90,6 +91,26 @@ class FakeRing1(object):
|
|||
return (1, [node1])
|
||||
|
||||
|
||||
class FakeRing2(object):
|
||||
|
||||
def __init__(self, swift_dir, ring_name=None):
|
||||
return
|
||||
|
||||
def get_nodes(self, *args, **kwargs):
|
||||
nodes = []
|
||||
for x in [1, 2]:
|
||||
nodes.append({'ip': '10.0.0.%s' % x,
|
||||
'replication_ip': '10.0.0.%s' % x,
|
||||
'port': 6200 + x,
|
||||
'replication_port': 6200 + x,
|
||||
'device': 'sda',
|
||||
'zone': x % 3,
|
||||
'region': x % 2,
|
||||
'id': x,
|
||||
'handoff_index': 1})
|
||||
return (1, nodes)
|
||||
|
||||
|
||||
class TestAuditorBase(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -1776,6 +1797,7 @@ class TestAuditWatchers(TestAuditorBase):
|
|||
log_lines)
|
||||
|
||||
self.logger.clear()
|
||||
|
||||
# with grace_age=0 the DARK object will be older than
|
||||
# grace_age so will be logged
|
||||
ret_config = {'test_watcher1': {'action': 'log', 'grace_age': '0'}}
|
||||
|
@ -1817,6 +1839,101 @@ class TestAuditWatchers(TestAuditorBase):
|
|||
self.assertEqual(0, watcher.grace_age)
|
||||
self.assertEqual('log', watcher.dark_data_policy)
|
||||
|
||||
def test_dark_data_agreement(self):
|
||||
|
||||
# The dark data watcher only sees an object as dark if all container
|
||||
# servers in the ring reply without an error and return an empty
|
||||
# listing. So, we have the following permutations for an object:
|
||||
#
|
||||
# Container Servers Result
|
||||
# CS1 CS2
|
||||
# Listed Listed Good - the baseline result
|
||||
# Listed Error Good
|
||||
# Listed Not listed Good
|
||||
# Error Error Unknown - the baseline failure
|
||||
# Not listed Error Unknown
|
||||
# Not listed Not listed Dark - the only such result!
|
||||
#
|
||||
scenario = [
|
||||
{'cr': ['L', 'L'], 'res': 'G'},
|
||||
{'cr': ['L', 'E'], 'res': 'G'},
|
||||
{'cr': ['L', 'N'], 'res': 'G'},
|
||||
{'cr': ['E', 'E'], 'res': 'U'},
|
||||
{'cr': ['N', 'E'], 'res': 'U'},
|
||||
{'cr': ['N', 'N'], 'res': 'D'}]
|
||||
|
||||
conf = self.conf.copy()
|
||||
conf['watchers'] = 'test_watcher1'
|
||||
conf['__file__'] = '/etc/swift/swift.conf'
|
||||
ret_config = {'test_watcher1': {'action': 'log', 'grace_age': '0'}}
|
||||
with mock.patch('swift.obj.auditor.parse_prefixed_conf',
|
||||
return_value=ret_config), \
|
||||
mock.patch('swift.obj.auditor.load_pkg_resource',
|
||||
side_effect=[DarkDataWatcher]):
|
||||
my_auditor = auditor.ObjectAuditor(conf, logger=self.logger)
|
||||
|
||||
for cur in scenario:
|
||||
|
||||
def fake_direct_get_container(node, part, account, container,
|
||||
prefix=None, limit=None):
|
||||
self.assertEqual(part, 1)
|
||||
self.assertEqual(limit, 1)
|
||||
|
||||
reply_type = cur['cr'][int(node['id']) - 1]
|
||||
|
||||
if reply_type == 'E':
|
||||
raise ClientException("Emulated container server error")
|
||||
|
||||
if reply_type == 'N':
|
||||
return {}, []
|
||||
|
||||
entry = {'bytes': 30968411,
|
||||
'hash': '60303f4122966fe5925f045eb52d1129',
|
||||
'name': '%s' % prefix,
|
||||
'content_type': 'video/mp4',
|
||||
'last_modified': '2017-08-15T03:30:57.693210'}
|
||||
return {}, [entry]
|
||||
|
||||
self.logger.clear()
|
||||
|
||||
namespace = 'swift.obj.watchers.dark_data.'
|
||||
with mock.patch(namespace + 'Ring', FakeRing2), \
|
||||
mock.patch(namespace + 'direct_get_container',
|
||||
fake_direct_get_container):
|
||||
my_auditor.run_audit(mode='once')
|
||||
|
||||
# We inherit a common setUp with 3 objects, so 3 everywhere.
|
||||
if cur['res'] == 'U':
|
||||
unk_exp, ok_exp, dark_exp = 3, 0, 0
|
||||
elif cur['res'] == 'G':
|
||||
unk_exp, ok_exp, dark_exp = 0, 3, 0
|
||||
else:
|
||||
unk_exp, ok_exp, dark_exp = 0, 0, 3
|
||||
|
||||
log_lines = self.logger.get_lines_for_level('info')
|
||||
for line in log_lines:
|
||||
|
||||
if not line.startswith('[audit-watcher test_watcher1] total'):
|
||||
continue
|
||||
words = line.split()
|
||||
if not (words[3] == 'unknown' and
|
||||
words[5] == 'ok' and
|
||||
words[7] == 'dark'):
|
||||
unittest.TestCase.fail('Syntax error in %r' % (line,))
|
||||
|
||||
try:
|
||||
unk_cnt = int(words[4])
|
||||
ok_cnt = int(words[6])
|
||||
dark_cnt = int(words[8])
|
||||
except ValueError:
|
||||
unittest.TestCase.fail('Bad value in %r' % (line,))
|
||||
|
||||
if unk_cnt != unk_exp or ok_cnt != ok_exp or dark_cnt != dark_exp:
|
||||
fmt = 'Expected unknown %d ok %d dark %d, got %r, for nodes %r'
|
||||
msg = fmt % (unk_exp, ok_exp, dark_exp,
|
||||
' '.join(words[3:]), cur['cr'])
|
||||
self.fail(msg=msg)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
Loading…
Reference in New Issue