Merge "Limit number of revert tombstone SSYNC requests"
This commit is contained in:
@@ -872,14 +872,18 @@ class ObjectReconstructor(Daemon):
|
|||||||
# push partitions off this node, but none of the suffixes
|
# push partitions off this node, but none of the suffixes
|
||||||
# have any data fragments to hint at which node would be a
|
# have any data fragments to hint at which node would be a
|
||||||
# good candidate to receive the tombstones.
|
# good candidate to receive the tombstones.
|
||||||
|
#
|
||||||
|
# we'll check a sample of other primaries before we delete our
|
||||||
|
# local tombstones, the exact number doesn't matter as long as
|
||||||
|
# it's enough to ensure the tombstones are not lost and less
|
||||||
|
# than *all the replicas*
|
||||||
|
nsample = (policy.ec_n_unique_fragments *
|
||||||
|
policy.ec_duplication_factor) - policy.ec_ndata + 1
|
||||||
jobs.append(build_job(
|
jobs.append(build_job(
|
||||||
job_type=REVERT,
|
job_type=REVERT,
|
||||||
frag_index=None,
|
frag_index=None,
|
||||||
suffixes=non_data_fragment_suffixes,
|
suffixes=non_data_fragment_suffixes,
|
||||||
# this is super safe
|
sync_to=random.sample(part_nodes, nsample)
|
||||||
sync_to=part_nodes,
|
|
||||||
# something like this would be probably be better
|
|
||||||
# sync_to=random.sample(part_nodes, 3),
|
|
||||||
))
|
))
|
||||||
# return a list of jobs for this part
|
# return a list of jobs for this part
|
||||||
return jobs
|
return jobs
|
||||||
|
|||||||
@@ -173,7 +173,7 @@ class TestReconstructorRevert(ECProbeTest):
|
|||||||
client.put_object(self.url, self.token, self.container_name,
|
client.put_object(self.url, self.token, self.container_name,
|
||||||
self.object_name, contents=contents)
|
self.object_name, contents=contents)
|
||||||
|
|
||||||
# now lets shut down a couple primaries
|
# now lets shut down a couple of primaries
|
||||||
failed_nodes = random.sample(onodes, 2)
|
failed_nodes = random.sample(onodes, 2)
|
||||||
for node in failed_nodes:
|
for node in failed_nodes:
|
||||||
self.kill_drive(self.device_dir('object', node))
|
self.kill_drive(self.device_dir('object', node))
|
||||||
@@ -197,61 +197,26 @@ class TestReconstructorRevert(ECProbeTest):
|
|||||||
self.fail('Node data on %r was not fully destroyed!' %
|
self.fail('Node data on %r was not fully destroyed!' %
|
||||||
(node,))
|
(node,))
|
||||||
|
|
||||||
# repair the first primary
|
# run the reconstructor on the handoff node multiple times until
|
||||||
self.revive_drive(self.device_dir('object', failed_nodes[0]))
|
# tombstone is pushed out - each handoff node syncs to a few
|
||||||
|
# primaries each time
|
||||||
# run the reconstructor on the handoffs nodes, if there are no data
|
iterations = 0
|
||||||
# frags to hint at the node index - each hnode syncs to all primaries
|
while iterations < 52:
|
||||||
for hnode in hnodes:
|
self.reconstructor.once(number=self.config_number(hnodes[0]))
|
||||||
self.reconstructor.once(number=self.config_number(hnode))
|
iterations += 1
|
||||||
|
# see if the tombstone is reverted
|
||||||
# because not all primaries are online, the tombstones remain
|
|
||||||
for hnode in hnodes:
|
|
||||||
try:
|
try:
|
||||||
self.direct_get(hnode, opart)
|
self.direct_get(hnodes[0], opart)
|
||||||
except direct_client.DirectClientException as err:
|
except direct_client.DirectClientException as err:
|
||||||
self.assertEqual(err.http_status, 404)
|
self.assertEqual(err.http_status, 404)
|
||||||
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
|
if 'X-Backend-Timestamp' not in err.http_headers:
|
||||||
delete_timestamp)
|
# this means the tombstone is *gone* so it's reverted
|
||||||
else:
|
break
|
||||||
self.fail('Found obj data on %r' % hnode)
|
|
||||||
|
|
||||||
# ... but it's on the first failed (now repaired) primary
|
|
||||||
try:
|
|
||||||
self.direct_get(failed_nodes[0], opart)
|
|
||||||
except direct_client.DirectClientException as err:
|
|
||||||
self.assertEqual(err.http_status, 404)
|
|
||||||
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
|
|
||||||
delete_timestamp)
|
|
||||||
else:
|
else:
|
||||||
self.fail('Found obj data on %r' % failed_nodes[0])
|
self.fail('Still found tombstone on %r after %s iterations' % (
|
||||||
|
hnodes[0], iterations))
|
||||||
|
|
||||||
# repair the second primary
|
# tombstone is still on the *second* handoff
|
||||||
self.revive_drive(self.device_dir('object', failed_nodes[1]))
|
|
||||||
|
|
||||||
# run the reconstructor on the *first* handoff node
|
|
||||||
self.reconstructor.once(number=self.config_number(hnodes[0]))
|
|
||||||
|
|
||||||
# make sure it's tombstone was pushed out
|
|
||||||
try:
|
|
||||||
self.direct_get(hnodes[0], opart)
|
|
||||||
except direct_client.DirectClientException as err:
|
|
||||||
self.assertEqual(err.http_status, 404)
|
|
||||||
self.assertNotIn('X-Backend-Timestamp', err.http_headers)
|
|
||||||
else:
|
|
||||||
self.fail('Found obj data on %r' % hnodes[0])
|
|
||||||
|
|
||||||
# ... and now it's on the second failed primary too!
|
|
||||||
try:
|
|
||||||
self.direct_get(failed_nodes[1], opart)
|
|
||||||
except direct_client.DirectClientException as err:
|
|
||||||
self.assertEqual(err.http_status, 404)
|
|
||||||
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
|
|
||||||
delete_timestamp)
|
|
||||||
else:
|
|
||||||
self.fail('Found obj data on %r' % failed_nodes[1])
|
|
||||||
|
|
||||||
# ... but still on the second handoff node
|
|
||||||
try:
|
try:
|
||||||
self.direct_get(hnodes[1], opart)
|
self.direct_get(hnodes[1], opart)
|
||||||
except direct_client.DirectClientException as err:
|
except direct_client.DirectClientException as err:
|
||||||
@@ -261,10 +226,14 @@ class TestReconstructorRevert(ECProbeTest):
|
|||||||
else:
|
else:
|
||||||
self.fail('Found obj data on %r' % hnodes[1])
|
self.fail('Found obj data on %r' % hnodes[1])
|
||||||
|
|
||||||
# ... until it's next sync
|
# repair the primaries
|
||||||
|
self.revive_drive(self.device_dir('object', failed_nodes[0]))
|
||||||
|
self.revive_drive(self.device_dir('object', failed_nodes[1]))
|
||||||
|
|
||||||
|
# run reconstructor on second handoff
|
||||||
self.reconstructor.once(number=self.config_number(hnodes[1]))
|
self.reconstructor.once(number=self.config_number(hnodes[1]))
|
||||||
|
|
||||||
# ... then it's tombstone is pushed off too!
|
# verify tombstone is reverted on the first pass
|
||||||
try:
|
try:
|
||||||
self.direct_get(hnodes[1], opart)
|
self.direct_get(hnodes[1], opart)
|
||||||
except direct_client.DirectClientException as err:
|
except direct_client.DirectClientException as err:
|
||||||
|
|||||||
@@ -2086,7 +2086,12 @@ class TestObjectReconstructor(BaseTestObjectReconstructor):
|
|||||||
'local_dev': self.local_dev,
|
'local_dev': self.local_dev,
|
||||||
'device': self.local_dev['device'],
|
'device': self.local_dev['device'],
|
||||||
}
|
}
|
||||||
self.assertEqual(ring.replica_count, len(job['sync_to']))
|
self.assertEqual(ring.replica_count, len(part_nodes))
|
||||||
|
expected_samples = (
|
||||||
|
(self.policy.ec_n_unique_fragments *
|
||||||
|
self.policy.ec_duplication_factor) -
|
||||||
|
self.policy.ec_ndata + 1)
|
||||||
|
self.assertEqual(len(job['sync_to']), expected_samples)
|
||||||
for k, v in expected.items():
|
for k, v in expected.items():
|
||||||
msg = 'expected %s != %s for %s' % (
|
msg = 'expected %s != %s for %s' % (
|
||||||
v, job[k], k)
|
v, job[k], k)
|
||||||
|
|||||||
Reference in New Issue
Block a user