Stop endless cycles on openstack upgrade

If an HA cinder deployment is upgraded twice and the leadership has
changed, there is a potential race that can be triggered where because
the CINDER_DB_INIT_RKEY is different on the units that have been the
leader.  This race is that when the unit tries to decide whether it has
reflected the 'new' RKEY it sees a different one from each unit.  By
clearing the rkey, it ensures that only the relation changed event from
the leader actually causes a restart.

Change-Id: I712502a666f23421cc58925f10b7643e3482d861
Closes-Bug: #1928383
This commit is contained in:
Alex Kavanagh 2022-05-11 12:48:41 +01:00
parent 1f45d8c5ae
commit a50281db83
2 changed files with 35 additions and 1 deletions

View File

@ -762,7 +762,11 @@ def check_local_db_actions_complete():
service_restart(svc)
# Echo notification
relation_set(**{CINDER_DB_INIT_ECHO_RKEY: init_id})
data = {CINDER_DB_INIT_ECHO_RKEY: init_id}
# BUG: #1928383 - clear CINDER_DB_INIT_RKEY if not the leader
if not(is_elected_leader(CLUSTER_RES)):
data[CINDER_DB_INIT_RKEY] = None
relation_set(**data)
# NOTE(jamespage): Retry deals with sync issues during one-shot HA deploys.

View File

@ -907,6 +907,8 @@ class TestCinderUtils(CharmTestCase):
else:
return r_settings
# no clear of cinder_utils.CINDER_DB_INIT_RKEY
self.is_elected_leader.return_value = True
self.relation_get.side_effect = mock_relation_get
cinder_utils.check_local_db_actions_complete()
self.assertFalse(self.relation_set.called)
@ -916,6 +918,34 @@ class TestCinderUtils(CharmTestCase):
self.relation_set.assert_has_calls(calls)
self.service_restart.assert_called_with('svc1')
@patch.object(cinder_utils, 'is_db_initialised')
@patch.object(cinder_utils, 'enabled_services')
@patch.object(cinder_utils, 'local_unit', lambda *args: 'unit/0')
def test_check_local_db_actions_complete_not_leader(
self, enabled_services, mock_is_db_initialised):
mock_is_db_initialised.return_value = True
enabled_services.return_value = ['svc1']
r_settings = {}
def mock_relation_get(unit=None, rid=None, attribute=None):
if attribute:
return r_settings.get(attribute)
else:
return r_settings
# ensure clear of cinder_utils.CINDER_DB_INIT_RKEY
self.is_elected_leader.return_value = False
self.relation_get.side_effect = mock_relation_get
cinder_utils.check_local_db_actions_complete()
self.assertFalse(self.relation_set.called)
r_settings = {'cinder-db-initialised': 'unit/1-1234'}
cinder_utils.check_local_db_actions_complete()
calls = [call(
**{'cinder-db-initialised-echo': 'unit/1-1234',
cinder_utils.CINDER_DB_INIT_RKEY: None})]
self.relation_set.assert_has_calls(calls)
self.service_restart.assert_called_with('svc1')
@patch('subprocess.check_output')
def test_log_lvm_info(self, _check):
output = b"some output"