From 9953530f2633717f62469a573d1e3783fdc9f471 Mon Sep 17 00:00:00 2001 From: Edward Hope-Morley Date: Wed, 4 Oct 2017 21:39:14 +0100 Subject: [PATCH] Do relation consistency sweep on leader change The current charm design is to perform a sweep of all units related on the identity-service interface to ensure that they have all the correct setting values applied. If the leader unit is deleted and a new one is elected this will not happen until some event e.g. config-changed occurs. This can result in remote units malfunctioning since they think they are not configured. We resolve this by always doing a sweep when the leader-elected hook fires. Also fixes infinite loop edge case when ssl-cert-master switches as a result of leader switch. Also includes cherry-pick of commit: - ID: a59de539edbfc615de0edcef775e2a3fdf7608d9 - Title: Fix issue with haproxy not restarted Change-Id: Icd68cc70d81d7d518c918e831056f686dbc7db1e Closes-Bug: 1721269 (cherry picked from commit 68a0c87235b84f382b1168e7694007816c42ced2) --- hooks/keystone_hooks.py | 36 +++++++++++++------ hooks/keystone_utils.py | 60 ++++++++++++++++++++++--------- unit_tests/test_keystone_hooks.py | 14 +++++--- 3 files changed, 78 insertions(+), 32 deletions(-) diff --git a/hooks/keystone_hooks.py b/hooks/keystone_hooks.py index 5d89d3fc..ef7751a1 100755 --- a/hooks/keystone_hooks.py +++ b/hooks/keystone_hooks.py @@ -140,6 +140,7 @@ from charmhelpers.payload.execd import execd_preinstall from charmhelpers.contrib.peerstorage import ( peer_retrieve_by_prefix, peer_echo, + relation_get as relation_get_and_migrate, ) from charmhelpers.contrib.openstack.ip import ( ADMIN, @@ -551,7 +552,9 @@ def send_ssl_sync_request(): prev = bin(_prev) if rid and prev ^ ssl_config: - clear_ssl_synced_units() + if is_leader(): + clear_ssl_synced_units() + log("Setting %s=%s" % (key, bin(ssl_config)), level=DEBUG) relation_set(relation_id=rid, relation_settings=settings) @@ -589,8 +592,12 @@ def cluster_changed(): peer_interface='cluster', ensure_local_user=True) # NOTE(jamespage) re-echo passwords for peer storage - echo_whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master', + echo_whitelist = ['_passwd', 'identity-service:', 'db-initialised', 'ssl-cert-available-updates'] + # Don't echo if leader since a re-election may be in progress. + if not is_leader(): + echo_whitelist.append('ssl-cert-master') + log("Peer echo whitelist: %s" % (echo_whitelist), level=DEBUG) peer_echo(includes=echo_whitelist, force=True) @@ -598,14 +605,17 @@ def cluster_changed(): initialise_pki() - # Figure out if we need to mandate a sync - units = get_ssl_sync_request_units() - synced_units = relation_get(attribute='ssl-synced-units', - unit=local_unit()) - diff = None - if synced_units: - synced_units = json.loads(synced_units) - diff = set(units).symmetric_difference(set(synced_units)) + if is_leader(): + # Figure out if we need to mandate a sync + units = get_ssl_sync_request_units() + synced_units = relation_get_and_migrate(attribute='ssl-synced-units', + unit=local_unit()) + diff = None + if synced_units: + synced_units = json.loads(synced_units) + diff = set(units).symmetric_difference(set(synced_units)) + else: + units = None if units and (not synced_units or diff): log("New peers joined and need syncing - %s" % @@ -614,7 +624,7 @@ def cluster_changed(): else: update_all_identity_relation_units() - if not is_elected_leader(CLUSTER_RES) and is_ssl_cert_master(): + if not is_leader() and is_ssl_cert_master(): # Force and sync and trigger a sync master re-election since we are not # leader anymore. force_ssl_sync() @@ -623,14 +633,18 @@ def cluster_changed(): @hooks.hook('leader-elected') +@restart_on_change(restart_map(), stopstart=True) def leader_elected(): log('Unit has been elected leader.', level=DEBUG) # When the local unit has been elected the leader, update the cron jobs # to ensure that the cron jobs are active on this unit. CONFIGS.write(TOKEN_FLUSH_CRON_FILE) + update_all_identity_relation_units() + @hooks.hook('leader-settings-changed') +@restart_on_change(restart_map(), stopstart=True) def leader_settings_changed(): # Since minions are notified of a regime change via the # leader-settings-changed hook, rewrite the token flush cron job to make diff --git a/hooks/keystone_utils.py b/hooks/keystone_utils.py index e3530534..2ee0ab7d 100644 --- a/hooks/keystone_utils.py +++ b/hooks/keystone_utils.py @@ -135,6 +135,7 @@ from charmhelpers.contrib.peerstorage import ( peer_store_and_set, peer_store, peer_retrieve, + relation_set as relation_set_and_migrate_to_leader, ) from charmhelpers.core.templating import render @@ -1383,14 +1384,20 @@ def is_ssl_cert_master(votes=None): return True # Early in the process and juju leader - if not votes: + if not set_votes: log("This unit is the juju leader and there are no votes yet, " "becoming the ssl-cert-master", level=DEBUG) return True + elif (len(set_votes) == 1 and set_votes != set([local_unit()]) and + is_leader()): + log("This unit is the juju leader but not yet ssl-cert-master " + "(current votes = {})".format(set_votes), level=DEBUG) + return False # Should never reach here - log("Could not determine the ssl-cert-master. Missing edge case.", + log("Could not determine the ssl-cert-master. Missing edge case. " + "(current votes = {})".format(set_votes), level=ERROR) return False @@ -1574,8 +1581,12 @@ def synchronize_ca(fatal=False): if not os.path.isdir(SYNC_FLAGS_DIR): mkdir(SYNC_FLAGS_DIR, SSH_USER, 'keystone', 0o775) + restart_trigger = None for action, services in peer_service_actions.iteritems(): - create_peer_service_actions(action, set(services)) + services = set(services) + if services: + restart_trigger = str(uuid.uuid4()) + create_peer_service_actions(action, services) create_peer_actions(peer_actions) @@ -1593,13 +1604,23 @@ def synchronize_ca(fatal=False): if synced_units: # Format here needs to match that used when peers request sync synced_units = [u.replace('/', '-') for u in synced_units] - cluster_rel_settings['ssl-synced-units'] = \ + ssl_synced_units = \ json.dumps(synced_units) + # NOTE(hopem): we pull this onto the leader settings to avoid + # unnecessary cluster relation noise. This is possible because the + # setting is only needed by the cert master. + if 'ssl-synced-units' not in leader_get(): + rid = relation_ids('cluster')[0] + relation_set_and_migrate_to_leader(relation_id=rid, + **{'ssl-synced-units': + ssl_synced_units}) + else: + leader_set({'ssl-synced-units': ssl_synced_units}) - trigger = str(uuid.uuid4()) - log("Sending restart-services-trigger=%s to all peers" % (trigger), - level=DEBUG) - cluster_rel_settings['restart-services-trigger'] = trigger + if restart_trigger: + log("Sending restart-services-trigger=%s to all peers" % + (restart_trigger), level=DEBUG) + cluster_rel_settings['restart-services-trigger'] = restart_trigger log("Sync complete", level=DEBUG) return cluster_rel_settings @@ -1613,8 +1634,11 @@ def clear_ssl_synced_units(): """ log("Clearing ssl sync units", level=DEBUG) for rid in relation_ids('cluster'): - relation_set(relation_id=rid, - relation_settings={'ssl-synced-units': None}) + if 'ssl-synced-units' not in leader_get(): + relation_set_and_migrate_to_leader(relation_id=rid, + **{'ssl-synced-units': None}) + else: + leader_set({'ssl-synced-units': None}) def update_hash_from_path(hash, path, recurse_depth=10): @@ -1680,13 +1704,17 @@ def synchronize_ca_if_changed(force=False, fatal=False): # If we are the sync master but not leader, ensure we have # relinquished master status. - if not is_elected_leader(CLUSTER_RES): - log("Re-electing ssl cert master.", level=INFO) - peer_settings['ssl-cert-master'] = 'unknown' + cluster_rids = relation_ids('cluster') + if cluster_rids: + master = relation_get('ssl-cert-master', + rid=cluster_rids[0], + unit=local_unit()) + if not is_leader() and master == local_unit(): + log("Re-electing ssl cert master.", level=INFO) + peer_settings['ssl-cert-master'] = 'unknown' - if peer_settings: - for rid in relation_ids('cluster'): - relation_set(relation_id=rid, + if peer_settings: + relation_set(relation_id=cluster_rids[0], relation_settings=peer_settings) return ret diff --git a/unit_tests/test_keystone_hooks.py b/unit_tests/test_keystone_hooks.py index 094699c5..e7d23f37 100644 --- a/unit_tests/test_keystone_hooks.py +++ b/unit_tests/test_keystone_hooks.py @@ -721,6 +721,7 @@ class KeystoneRelationTests(CharmTestCase): hooks.cluster_joined(rid='foo:1', ssl_sync_request=False) self.assertFalse(mock_send_ssl_sync_request.called) + @patch.object(hooks, 'relation_get_and_migrate') @patch.object(hooks, 'initialise_pki') @patch.object(hooks, 'update_all_identity_relation_units') @patch.object(hooks, 'get_ssl_sync_request_units') @@ -742,16 +743,18 @@ class KeystoneRelationTests(CharmTestCase): mock_is_ssl_cert_master, mock_get_ssl_sync_request_units, mock_update_all_identity_relation_units, - mock_initialise_pki): + mock_initialise_pki, + mock_relation_get_and_migrate): relation_settings = {'foo_passwd': '123', 'identity-service:16_foo': 'bar'} + mock_relation_get_and_migrate.return_value = None mock_is_ssl_cert_master.return_value = False mock_peer_units.return_value = ['unit/0'] mock_ensure_ssl_cert_master.return_value = False mock_relation_ids.return_value = [] - self.is_elected_leader.return_value = False + self.is_leader.return_value = False def fake_rel_get(attribute=None, *args, **kwargs): if not attribute: @@ -764,8 +767,8 @@ class KeystoneRelationTests(CharmTestCase): mock_config.return_value = None hooks.cluster_changed() - whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master', - 'db-initialised', 'ssl-cert-available-updates'] + whitelist = ['_passwd', 'identity-service:', 'db-initialised', + 'ssl-cert-available-updates', 'ssl-cert-master'] self.peer_echo.assert_called_with(force=True, includes=whitelist) ssh_authorized_peers.assert_called_with( user=self.ssh_user, group='juju_keystone', @@ -773,8 +776,9 @@ class KeystoneRelationTests(CharmTestCase): self.assertFalse(mock_synchronize_ca.called) self.assertTrue(configs.write_all.called) + @patch.object(hooks, 'update_all_identity_relation_units') @patch.object(hooks.CONFIGS, 'write') - def test_leader_elected(self, mock_write): + def test_leader_elected(self, mock_write, mock_update): hooks.leader_elected() mock_write.assert_has_calls([call(utils.TOKEN_FLUSH_CRON_FILE)])