Do relation consistency sweep on leader change

The current charm design is to perform a sweep of all units
related on the identity-service interface to ensure that
they have all the correct setting values applied. If the
leader unit is deleted and a new one is elected this will
not happen until some event e.g. config-changed occurs. This
can result in remote units malfunctioning since they think they
are not configured. We resolve this by always doing a sweep when
the leader-elected hook fires.

Also fixes infinite loop edge case when ssl-cert-master switches
as a result of leader switch.

Also includes cherry-pick of commit:
- ID: a59de539ed
- Title: Fix issue with haproxy not restarted

Change-Id: Icd68cc70d81d7d518c918e831056f686dbc7db1e
Closes-Bug: 1721269
(cherry picked from commit 68a0c87235)
This commit is contained in:
Edward Hope-Morley 2017-10-04 21:39:14 +01:00
parent 00379aadc1
commit 9953530f26
3 changed files with 78 additions and 32 deletions

View File

@ -140,6 +140,7 @@ from charmhelpers.payload.execd import execd_preinstall
from charmhelpers.contrib.peerstorage import ( from charmhelpers.contrib.peerstorage import (
peer_retrieve_by_prefix, peer_retrieve_by_prefix,
peer_echo, peer_echo,
relation_get as relation_get_and_migrate,
) )
from charmhelpers.contrib.openstack.ip import ( from charmhelpers.contrib.openstack.ip import (
ADMIN, ADMIN,
@ -551,7 +552,9 @@ def send_ssl_sync_request():
prev = bin(_prev) prev = bin(_prev)
if rid and prev ^ ssl_config: if rid and prev ^ ssl_config:
clear_ssl_synced_units() if is_leader():
clear_ssl_synced_units()
log("Setting %s=%s" % (key, bin(ssl_config)), level=DEBUG) log("Setting %s=%s" % (key, bin(ssl_config)), level=DEBUG)
relation_set(relation_id=rid, relation_settings=settings) relation_set(relation_id=rid, relation_settings=settings)
@ -589,8 +592,12 @@ def cluster_changed():
peer_interface='cluster', peer_interface='cluster',
ensure_local_user=True) ensure_local_user=True)
# NOTE(jamespage) re-echo passwords for peer storage # NOTE(jamespage) re-echo passwords for peer storage
echo_whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master', echo_whitelist = ['_passwd', 'identity-service:',
'db-initialised', 'ssl-cert-available-updates'] 'db-initialised', 'ssl-cert-available-updates']
# Don't echo if leader since a re-election may be in progress.
if not is_leader():
echo_whitelist.append('ssl-cert-master')
log("Peer echo whitelist: %s" % (echo_whitelist), level=DEBUG) log("Peer echo whitelist: %s" % (echo_whitelist), level=DEBUG)
peer_echo(includes=echo_whitelist, force=True) peer_echo(includes=echo_whitelist, force=True)
@ -598,14 +605,17 @@ def cluster_changed():
initialise_pki() initialise_pki()
# Figure out if we need to mandate a sync if is_leader():
units = get_ssl_sync_request_units() # Figure out if we need to mandate a sync
synced_units = relation_get(attribute='ssl-synced-units', units = get_ssl_sync_request_units()
unit=local_unit()) synced_units = relation_get_and_migrate(attribute='ssl-synced-units',
diff = None unit=local_unit())
if synced_units: diff = None
synced_units = json.loads(synced_units) if synced_units:
diff = set(units).symmetric_difference(set(synced_units)) synced_units = json.loads(synced_units)
diff = set(units).symmetric_difference(set(synced_units))
else:
units = None
if units and (not synced_units or diff): if units and (not synced_units or diff):
log("New peers joined and need syncing - %s" % log("New peers joined and need syncing - %s" %
@ -614,7 +624,7 @@ def cluster_changed():
else: else:
update_all_identity_relation_units() update_all_identity_relation_units()
if not is_elected_leader(CLUSTER_RES) and is_ssl_cert_master(): if not is_leader() and is_ssl_cert_master():
# Force and sync and trigger a sync master re-election since we are not # Force and sync and trigger a sync master re-election since we are not
# leader anymore. # leader anymore.
force_ssl_sync() force_ssl_sync()
@ -623,14 +633,18 @@ def cluster_changed():
@hooks.hook('leader-elected') @hooks.hook('leader-elected')
@restart_on_change(restart_map(), stopstart=True)
def leader_elected(): def leader_elected():
log('Unit has been elected leader.', level=DEBUG) log('Unit has been elected leader.', level=DEBUG)
# When the local unit has been elected the leader, update the cron jobs # When the local unit has been elected the leader, update the cron jobs
# to ensure that the cron jobs are active on this unit. # to ensure that the cron jobs are active on this unit.
CONFIGS.write(TOKEN_FLUSH_CRON_FILE) CONFIGS.write(TOKEN_FLUSH_CRON_FILE)
update_all_identity_relation_units()
@hooks.hook('leader-settings-changed') @hooks.hook('leader-settings-changed')
@restart_on_change(restart_map(), stopstart=True)
def leader_settings_changed(): def leader_settings_changed():
# Since minions are notified of a regime change via the # Since minions are notified of a regime change via the
# leader-settings-changed hook, rewrite the token flush cron job to make # leader-settings-changed hook, rewrite the token flush cron job to make

View File

@ -135,6 +135,7 @@ from charmhelpers.contrib.peerstorage import (
peer_store_and_set, peer_store_and_set,
peer_store, peer_store,
peer_retrieve, peer_retrieve,
relation_set as relation_set_and_migrate_to_leader,
) )
from charmhelpers.core.templating import render from charmhelpers.core.templating import render
@ -1383,14 +1384,20 @@ def is_ssl_cert_master(votes=None):
return True return True
# Early in the process and juju leader # Early in the process and juju leader
if not votes: if not set_votes:
log("This unit is the juju leader and there are no votes yet, " log("This unit is the juju leader and there are no votes yet, "
"becoming the ssl-cert-master", "becoming the ssl-cert-master",
level=DEBUG) level=DEBUG)
return True return True
elif (len(set_votes) == 1 and set_votes != set([local_unit()]) and
is_leader()):
log("This unit is the juju leader but not yet ssl-cert-master "
"(current votes = {})".format(set_votes), level=DEBUG)
return False
# Should never reach here # Should never reach here
log("Could not determine the ssl-cert-master. Missing edge case.", log("Could not determine the ssl-cert-master. Missing edge case. "
"(current votes = {})".format(set_votes),
level=ERROR) level=ERROR)
return False return False
@ -1574,8 +1581,12 @@ def synchronize_ca(fatal=False):
if not os.path.isdir(SYNC_FLAGS_DIR): if not os.path.isdir(SYNC_FLAGS_DIR):
mkdir(SYNC_FLAGS_DIR, SSH_USER, 'keystone', 0o775) mkdir(SYNC_FLAGS_DIR, SSH_USER, 'keystone', 0o775)
restart_trigger = None
for action, services in peer_service_actions.iteritems(): for action, services in peer_service_actions.iteritems():
create_peer_service_actions(action, set(services)) services = set(services)
if services:
restart_trigger = str(uuid.uuid4())
create_peer_service_actions(action, services)
create_peer_actions(peer_actions) create_peer_actions(peer_actions)
@ -1593,13 +1604,23 @@ def synchronize_ca(fatal=False):
if synced_units: if synced_units:
# Format here needs to match that used when peers request sync # Format here needs to match that used when peers request sync
synced_units = [u.replace('/', '-') for u in synced_units] synced_units = [u.replace('/', '-') for u in synced_units]
cluster_rel_settings['ssl-synced-units'] = \ ssl_synced_units = \
json.dumps(synced_units) json.dumps(synced_units)
# NOTE(hopem): we pull this onto the leader settings to avoid
# unnecessary cluster relation noise. This is possible because the
# setting is only needed by the cert master.
if 'ssl-synced-units' not in leader_get():
rid = relation_ids('cluster')[0]
relation_set_and_migrate_to_leader(relation_id=rid,
**{'ssl-synced-units':
ssl_synced_units})
else:
leader_set({'ssl-synced-units': ssl_synced_units})
trigger = str(uuid.uuid4()) if restart_trigger:
log("Sending restart-services-trigger=%s to all peers" % (trigger), log("Sending restart-services-trigger=%s to all peers" %
level=DEBUG) (restart_trigger), level=DEBUG)
cluster_rel_settings['restart-services-trigger'] = trigger cluster_rel_settings['restart-services-trigger'] = restart_trigger
log("Sync complete", level=DEBUG) log("Sync complete", level=DEBUG)
return cluster_rel_settings return cluster_rel_settings
@ -1613,8 +1634,11 @@ def clear_ssl_synced_units():
""" """
log("Clearing ssl sync units", level=DEBUG) log("Clearing ssl sync units", level=DEBUG)
for rid in relation_ids('cluster'): for rid in relation_ids('cluster'):
relation_set(relation_id=rid, if 'ssl-synced-units' not in leader_get():
relation_settings={'ssl-synced-units': None}) relation_set_and_migrate_to_leader(relation_id=rid,
**{'ssl-synced-units': None})
else:
leader_set({'ssl-synced-units': None})
def update_hash_from_path(hash, path, recurse_depth=10): def update_hash_from_path(hash, path, recurse_depth=10):
@ -1680,13 +1704,17 @@ def synchronize_ca_if_changed(force=False, fatal=False):
# If we are the sync master but not leader, ensure we have # If we are the sync master but not leader, ensure we have
# relinquished master status. # relinquished master status.
if not is_elected_leader(CLUSTER_RES): cluster_rids = relation_ids('cluster')
log("Re-electing ssl cert master.", level=INFO) if cluster_rids:
peer_settings['ssl-cert-master'] = 'unknown' master = relation_get('ssl-cert-master',
rid=cluster_rids[0],
unit=local_unit())
if not is_leader() and master == local_unit():
log("Re-electing ssl cert master.", level=INFO)
peer_settings['ssl-cert-master'] = 'unknown'
if peer_settings: if peer_settings:
for rid in relation_ids('cluster'): relation_set(relation_id=cluster_rids[0],
relation_set(relation_id=rid,
relation_settings=peer_settings) relation_settings=peer_settings)
return ret return ret

View File

@ -721,6 +721,7 @@ class KeystoneRelationTests(CharmTestCase):
hooks.cluster_joined(rid='foo:1', ssl_sync_request=False) hooks.cluster_joined(rid='foo:1', ssl_sync_request=False)
self.assertFalse(mock_send_ssl_sync_request.called) self.assertFalse(mock_send_ssl_sync_request.called)
@patch.object(hooks, 'relation_get_and_migrate')
@patch.object(hooks, 'initialise_pki') @patch.object(hooks, 'initialise_pki')
@patch.object(hooks, 'update_all_identity_relation_units') @patch.object(hooks, 'update_all_identity_relation_units')
@patch.object(hooks, 'get_ssl_sync_request_units') @patch.object(hooks, 'get_ssl_sync_request_units')
@ -742,16 +743,18 @@ class KeystoneRelationTests(CharmTestCase):
mock_is_ssl_cert_master, mock_is_ssl_cert_master,
mock_get_ssl_sync_request_units, mock_get_ssl_sync_request_units,
mock_update_all_identity_relation_units, mock_update_all_identity_relation_units,
mock_initialise_pki): mock_initialise_pki,
mock_relation_get_and_migrate):
relation_settings = {'foo_passwd': '123', relation_settings = {'foo_passwd': '123',
'identity-service:16_foo': 'bar'} 'identity-service:16_foo': 'bar'}
mock_relation_get_and_migrate.return_value = None
mock_is_ssl_cert_master.return_value = False mock_is_ssl_cert_master.return_value = False
mock_peer_units.return_value = ['unit/0'] mock_peer_units.return_value = ['unit/0']
mock_ensure_ssl_cert_master.return_value = False mock_ensure_ssl_cert_master.return_value = False
mock_relation_ids.return_value = [] mock_relation_ids.return_value = []
self.is_elected_leader.return_value = False self.is_leader.return_value = False
def fake_rel_get(attribute=None, *args, **kwargs): def fake_rel_get(attribute=None, *args, **kwargs):
if not attribute: if not attribute:
@ -764,8 +767,8 @@ class KeystoneRelationTests(CharmTestCase):
mock_config.return_value = None mock_config.return_value = None
hooks.cluster_changed() hooks.cluster_changed()
whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master', whitelist = ['_passwd', 'identity-service:', 'db-initialised',
'db-initialised', 'ssl-cert-available-updates'] 'ssl-cert-available-updates', 'ssl-cert-master']
self.peer_echo.assert_called_with(force=True, includes=whitelist) self.peer_echo.assert_called_with(force=True, includes=whitelist)
ssh_authorized_peers.assert_called_with( ssh_authorized_peers.assert_called_with(
user=self.ssh_user, group='juju_keystone', user=self.ssh_user, group='juju_keystone',
@ -773,8 +776,9 @@ class KeystoneRelationTests(CharmTestCase):
self.assertFalse(mock_synchronize_ca.called) self.assertFalse(mock_synchronize_ca.called)
self.assertTrue(configs.write_all.called) self.assertTrue(configs.write_all.called)
@patch.object(hooks, 'update_all_identity_relation_units')
@patch.object(hooks.CONFIGS, 'write') @patch.object(hooks.CONFIGS, 'write')
def test_leader_elected(self, mock_write): def test_leader_elected(self, mock_write, mock_update):
hooks.leader_elected() hooks.leader_elected()
mock_write.assert_has_calls([call(utils.TOKEN_FLUSH_CRON_FILE)]) mock_write.assert_has_calls([call(utils.TOKEN_FLUSH_CRON_FILE)])