Coordinator handles ToozError when joining group

While joining partitioning group, we check for
MemberAlreadyExist and GroupNotCreated, but fail
to account for connection failure scenarios. This
leads to ugly stack trace in notification agent logs
and bails out before configuring the notification
and pipeline listeners.

This fix handles the ToozError exception and logs
an error message before retrying the join, assuming
that the error was a temporary problem.

Change-Id: I2aed2241ded798464089b3eec5e1394422a45844
Closes-Bug: 1496982
This commit is contained in:
Rohit Jaiswal 2015-09-17 23:03:42 +00:00
parent bf3e38085d
commit 09d9325ddf
2 changed files with 35 additions and 1 deletions

View File

@ -130,6 +130,9 @@ class PartitionCoordinator(object):
create_grp_req.get() create_grp_req.get()
except tooz.coordination.GroupAlreadyExist: except tooz.coordination.GroupAlreadyExist:
pass pass
except tooz.coordination.ToozError:
LOG.exception(_LE('Error joining partitioning group %s,'
' re-trying'), group_id)
self._groups.add(group_id) self._groups.add(group_id)
def leave_group(self, group_id): def leave_group(self, group_id):

View File

@ -83,6 +83,21 @@ class MockToozCoordExceptionRaiser(MockToozCoordinator):
raise tooz.coordination.ToozError('error') raise tooz.coordination.ToozError('error')
class MockToozCoordExceptionOnJoinRaiser(MockToozCoordinator):
def __init__(self, member_id, shared_storage, retry_count=None):
super(MockToozCoordExceptionOnJoinRaiser,
self).__init__(member_id, shared_storage)
self.tooz_error_count = retry_count
self.count = 0
def join_group(self, group_id, capabilities=b''):
if self.count == self.tooz_error_count:
return MockAsyncResult(None)
else:
self.count += 1
raise tooz.coordination.ToozError('error')
class MockAsyncResult(tooz.coordination.CoordAsyncResult): class MockAsyncResult(tooz.coordination.CoordAsyncResult):
def __init__(self, result): def __init__(self, result):
self.result = result self.result = result
@ -135,12 +150,14 @@ class TestPartitioning(base.BaseTestCase):
self.shared_storage = {} self.shared_storage = {}
def _get_new_started_coordinator(self, shared_storage, agent_id=None, def _get_new_started_coordinator(self, shared_storage, agent_id=None,
coordinator_cls=None): coordinator_cls=None, retry_count=None):
coordinator_cls = coordinator_cls or MockToozCoordinator coordinator_cls = coordinator_cls or MockToozCoordinator
self.CONF.set_override('backend_url', 'xxx://yyy', self.CONF.set_override('backend_url', 'xxx://yyy',
group='coordination') group='coordination')
with mock.patch('tooz.coordination.get_coordinator', with mock.patch('tooz.coordination.get_coordinator',
lambda _, member_id: lambda _, member_id:
coordinator_cls(member_id, shared_storage,
retry_count) if retry_count else
coordinator_cls(member_id, shared_storage)): coordinator_cls(member_id, shared_storage)):
pc = coordination.PartitionCoordinator(agent_id) pc = coordination.PartitionCoordinator(agent_id)
pc.start() pc.start()
@ -210,6 +227,20 @@ class TestPartitioning(base.BaseTestCase):
for e in expected_errors: for e in expected_errors:
self.assertIn(e, self.str_handler.messages['error']) self.assertIn(e, self.str_handler.messages['error'])
def test_coordination_backend_connection_fail_on_join(self):
coord = self._get_new_started_coordinator(
{'group'}, 'agent1', MockToozCoordExceptionOnJoinRaiser,
retry_count=2)
with mock.patch('tooz.coordination.get_coordinator',
return_value=MockToozCoordExceptionOnJoinRaiser):
coord.join_group(group_id='group')
expected_errors = ['Error joining partitioning group group,'
' re-trying',
'Error joining partitioning group group,'
' re-trying']
self.assertEqual(expected_errors, self.str_handler.messages['error'])
def test_reconnect(self): def test_reconnect(self):
coord = self._get_new_started_coordinator({}, 'a', coord = self._get_new_started_coordinator({}, 'a',
MockToozCoordExceptionRaiser) MockToozCoordExceptionRaiser)