From bcd0eb70afacae1483e9e53d5a4082536770aed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Kvasni=C4=8Dka?= Date: Wed, 22 Mar 2017 09:59:53 +0100 Subject: [PATCH] Container drive error results double space usage on rest drives When drive with container or account database is unmounted replicator pushes database to handoff location. But this handoff location finds replica with unmounted drive and pushes database to the *next* handoff until all handoffs has a replica - all container/account servers has replicas of all unmounted drives. This patch solves: - Consumption of iterator on handoff location that results in replication to the next and next handoff. - StopIteration exception stopped not finished loop over available handoffs if no more nodes exists for db replication candidency. Regression was introduced in 2.4.0 with rsync compression. Co-Author: Kota Tsuyuzaki Change-Id: I344f9daaa038c6946be11e1cf8c4ef104a09e68b Closes-Bug: 1675500 --- swift/common/db_replicator.py | 10 ++++-- test/unit/common/test_db_replicator.py | 44 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index f9ca249cef..30c6a35c12 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -543,7 +543,7 @@ class Replicator(Daemon): more_nodes = self.ring.get_more_nodes(int(partition)) if not local_dev: # Check further if local device is a handoff node - for node in more_nodes: + for node in self.ring.get_more_nodes(int(partition)): if node['id'] == node_id: local_dev = node break @@ -559,7 +559,13 @@ class Replicator(Daemon): success = self._repl_to_node(node, broker, partition, info, different_region) except DriveNotMounted: - repl_nodes.append(next(more_nodes)) + try: + repl_nodes.append(next(more_nodes)) + except StopIteration: + self.logger.error( + _('ERROR There are not enough handoff nodes to reach ' + 'replica count for partition %s'), + partition) self.logger.error(_('ERROR Remote drive not mounted %s'), node) except (Exception, Timeout): self.logger.exception(_('ERROR syncing %(file)s with node' diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index 50d031cb01..50f83c2d70 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -643,6 +643,50 @@ class TestDBReplicator(unittest.TestCase): replicator._replicate_object('0', '/path/to/file', 'node_id') self.assertEqual(['/path/to/file'], self.delete_db_calls) + def test_replicate_object_with_exception(self): + replicator = TestReplicator({}) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + replicator.delete_db = self.stub_delete_db + replicator._repl_to_node = mock.Mock(side_effect=Exception()) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(2, replicator._repl_to_node.call_count) + # with one DriveNotMounted exception called on +1 more replica + replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()]) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(3, replicator._repl_to_node.call_count) + # called on +1 more replica and self when *first* handoff + replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()]) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[3]['id']) + self.assertEqual(4, replicator._repl_to_node.call_count) + # even if it's the last handoff it works to keep 3 replicas + # 2 primaries + 1 handoff + replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()]) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[-1]['id']) + self.assertEqual(4, replicator._repl_to_node.call_count) + # with two DriveNotMounted exceptions called on +2 more replica keeping + # durability + replicator._repl_to_node = mock.Mock( + side_effect=[DriveNotMounted()] * 2) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(4, replicator._repl_to_node.call_count) + + def test_replicate_object_with_exception_run_out_of_nodes(self): + replicator = TestReplicator({}) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + replicator.delete_db = self.stub_delete_db + # all other devices are not mounted + replicator._repl_to_node = mock.Mock(side_effect=DriveNotMounted()) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(5, replicator._repl_to_node.call_count) + def test_replicate_account_out_of_place(self): replicator = TestReplicator({}, logger=unit.FakeLogger()) replicator.ring = FakeRingWithNodes().Ring('path')