Do relation consistency sweep on leader change
The current charm design is to perform a sweep of all units related on the identity-service interface to ensure that they have all the correct setting values applied. If the leader unit is deleted and a new one is elected this will not happen until some event e.g. config-changed occurs. This can result in remote units malfunctioning since they think they are not configured. We resolve this by always doing a sweep when the leader-elected hook fires. Also fixes infinite loop edge case when ssl-cert-master switches as a result of leader switch. Change-Id: Icd68cc70d81d7d518c918e831056f686dbc7db1e Closes-Bug: 1721269
This commit is contained in:
parent
4dc04f9e2f
commit
68a0c87235
@ -145,6 +145,7 @@ from charmhelpers.payload.execd import execd_preinstall
|
||||
from charmhelpers.contrib.peerstorage import (
|
||||
peer_retrieve_by_prefix,
|
||||
peer_echo,
|
||||
relation_get as relation_get_and_migrate,
|
||||
)
|
||||
from charmhelpers.contrib.openstack.ip import (
|
||||
ADMIN,
|
||||
@ -585,7 +586,9 @@ def send_ssl_sync_request():
|
||||
prev = bin(_prev)
|
||||
|
||||
if rid and prev ^ ssl_config:
|
||||
if is_leader():
|
||||
clear_ssl_synced_units()
|
||||
|
||||
log("Setting %s=%s" % (key, bin(ssl_config)), level=DEBUG)
|
||||
relation_set(relation_id=rid, relation_settings=settings)
|
||||
|
||||
@ -623,8 +626,12 @@ def cluster_changed():
|
||||
peer_interface='cluster',
|
||||
ensure_local_user=True)
|
||||
# NOTE(jamespage) re-echo passwords for peer storage
|
||||
echo_whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master',
|
||||
echo_whitelist = ['_passwd', 'identity-service:',
|
||||
'db-initialised', 'ssl-cert-available-updates']
|
||||
# Don't echo if leader since a re-election may be in progress.
|
||||
if not is_leader():
|
||||
echo_whitelist.append('ssl-cert-master')
|
||||
|
||||
log("Peer echo whitelist: %s" % (echo_whitelist), level=DEBUG)
|
||||
peer_echo(includes=echo_whitelist, force=True)
|
||||
|
||||
@ -632,14 +639,17 @@ def cluster_changed():
|
||||
|
||||
initialise_pki()
|
||||
|
||||
if is_leader():
|
||||
# Figure out if we need to mandate a sync
|
||||
units = get_ssl_sync_request_units()
|
||||
synced_units = relation_get(attribute='ssl-synced-units',
|
||||
synced_units = relation_get_and_migrate(attribute='ssl-synced-units',
|
||||
unit=local_unit())
|
||||
diff = None
|
||||
if synced_units:
|
||||
synced_units = json.loads(synced_units)
|
||||
diff = set(units).symmetric_difference(set(synced_units))
|
||||
else:
|
||||
units = None
|
||||
|
||||
if units and (not synced_units or diff):
|
||||
log("New peers joined and need syncing - %s" %
|
||||
@ -648,7 +658,7 @@ def cluster_changed():
|
||||
else:
|
||||
update_all_identity_relation_units()
|
||||
|
||||
if not is_elected_leader(CLUSTER_RES) and is_ssl_cert_master():
|
||||
if not is_leader() and is_ssl_cert_master():
|
||||
# Force and sync and trigger a sync master re-election since we are not
|
||||
# leader anymore.
|
||||
force_ssl_sync()
|
||||
@ -663,6 +673,8 @@ def leader_elected():
|
||||
# to ensure that the cron jobs are active on this unit.
|
||||
CONFIGS.write(TOKEN_FLUSH_CRON_FILE)
|
||||
|
||||
update_all_identity_relation_units()
|
||||
|
||||
|
||||
@hooks.hook('leader-settings-changed')
|
||||
def leader_settings_changed():
|
||||
|
@ -139,6 +139,7 @@ from charmhelpers.contrib.peerstorage import (
|
||||
peer_store_and_set,
|
||||
peer_store,
|
||||
peer_retrieve,
|
||||
relation_set as relation_set_and_migrate_to_leader,
|
||||
)
|
||||
|
||||
from charmhelpers.core.templating import render
|
||||
@ -1524,14 +1525,20 @@ def is_ssl_cert_master(votes=None):
|
||||
return True
|
||||
|
||||
# Early in the process and juju leader
|
||||
if not votes:
|
||||
if not set_votes:
|
||||
log("This unit is the juju leader and there are no votes yet, "
|
||||
"becoming the ssl-cert-master",
|
||||
level=DEBUG)
|
||||
return True
|
||||
elif (len(set_votes) == 1 and set_votes != set([local_unit()]) and
|
||||
is_leader()):
|
||||
log("This unit is the juju leader but not yet ssl-cert-master "
|
||||
"(current votes = {})".format(set_votes), level=DEBUG)
|
||||
return False
|
||||
|
||||
# Should never reach here
|
||||
log("Could not determine the ssl-cert-master. Missing edge case.",
|
||||
log("Could not determine the ssl-cert-master. Missing edge case. "
|
||||
"(current votes = {})".format(set_votes),
|
||||
level=ERROR)
|
||||
return False
|
||||
|
||||
@ -1717,8 +1724,12 @@ def synchronize_ca(fatal=False):
|
||||
if not os.path.isdir(SYNC_FLAGS_DIR):
|
||||
mkdir(SYNC_FLAGS_DIR, SSH_USER, KEYSTONE_USER, 0o775)
|
||||
|
||||
restart_trigger = None
|
||||
for action, services in peer_service_actions.iteritems():
|
||||
create_peer_service_actions(action, set(services))
|
||||
services = set(services)
|
||||
if services:
|
||||
restart_trigger = str(uuid.uuid4())
|
||||
create_peer_service_actions(action, services)
|
||||
|
||||
create_peer_actions(peer_actions)
|
||||
|
||||
@ -1736,13 +1747,23 @@ def synchronize_ca(fatal=False):
|
||||
if synced_units:
|
||||
# Format here needs to match that used when peers request sync
|
||||
synced_units = [u.replace('/', '-') for u in synced_units]
|
||||
cluster_rel_settings['ssl-synced-units'] = \
|
||||
ssl_synced_units = \
|
||||
json.dumps(synced_units)
|
||||
# NOTE(hopem): we pull this onto the leader settings to avoid
|
||||
# unnecessary cluster relation noise. This is possible because the
|
||||
# setting is only needed by the cert master.
|
||||
if 'ssl-synced-units' not in leader_get():
|
||||
rid = relation_ids('cluster')[0]
|
||||
relation_set_and_migrate_to_leader(relation_id=rid,
|
||||
**{'ssl-synced-units':
|
||||
ssl_synced_units})
|
||||
else:
|
||||
leader_set({'ssl-synced-units': ssl_synced_units})
|
||||
|
||||
trigger = str(uuid.uuid4())
|
||||
log("Sending restart-services-trigger=%s to all peers" % (trigger),
|
||||
level=DEBUG)
|
||||
cluster_rel_settings['restart-services-trigger'] = trigger
|
||||
if restart_trigger:
|
||||
log("Sending restart-services-trigger=%s to all peers" %
|
||||
(restart_trigger), level=DEBUG)
|
||||
cluster_rel_settings['restart-services-trigger'] = restart_trigger
|
||||
|
||||
log("Sync complete", level=DEBUG)
|
||||
return cluster_rel_settings
|
||||
@ -1756,8 +1777,11 @@ def clear_ssl_synced_units():
|
||||
"""
|
||||
log("Clearing ssl sync units", level=DEBUG)
|
||||
for rid in relation_ids('cluster'):
|
||||
relation_set(relation_id=rid,
|
||||
relation_settings={'ssl-synced-units': None})
|
||||
if 'ssl-synced-units' not in leader_get():
|
||||
relation_set_and_migrate_to_leader(relation_id=rid,
|
||||
**{'ssl-synced-units': None})
|
||||
else:
|
||||
leader_set({'ssl-synced-units': None})
|
||||
|
||||
|
||||
def update_hash_from_path(hash, path, recurse_depth=10):
|
||||
@ -1823,13 +1847,17 @@ def synchronize_ca_if_changed(force=False, fatal=False):
|
||||
|
||||
# If we are the sync master but not leader, ensure we have
|
||||
# relinquished master status.
|
||||
if not is_elected_leader(CLUSTER_RES):
|
||||
cluster_rids = relation_ids('cluster')
|
||||
if cluster_rids:
|
||||
master = relation_get('ssl-cert-master',
|
||||
rid=cluster_rids[0],
|
||||
unit=local_unit())
|
||||
if not is_leader() and master == local_unit():
|
||||
log("Re-electing ssl cert master.", level=INFO)
|
||||
peer_settings['ssl-cert-master'] = 'unknown'
|
||||
|
||||
if peer_settings:
|
||||
for rid in relation_ids('cluster'):
|
||||
relation_set(relation_id=rid,
|
||||
relation_set(relation_id=cluster_rids[0],
|
||||
relation_settings=peer_settings)
|
||||
|
||||
return ret
|
||||
|
@ -732,6 +732,7 @@ class KeystoneRelationTests(CharmTestCase):
|
||||
hooks.cluster_joined(rid='foo:1', ssl_sync_request=False)
|
||||
self.assertFalse(mock_send_ssl_sync_request.called)
|
||||
|
||||
@patch.object(hooks, 'relation_get_and_migrate')
|
||||
@patch.object(hooks, 'initialise_pki')
|
||||
@patch.object(hooks, 'update_all_identity_relation_units')
|
||||
@patch.object(hooks, 'get_ssl_sync_request_units')
|
||||
@ -753,16 +754,18 @@ class KeystoneRelationTests(CharmTestCase):
|
||||
mock_is_ssl_cert_master,
|
||||
mock_get_ssl_sync_request_units,
|
||||
mock_update_all_identity_relation_units,
|
||||
mock_initialise_pki):
|
||||
mock_initialise_pki,
|
||||
mock_relation_get_and_migrate):
|
||||
|
||||
relation_settings = {'foo_passwd': '123',
|
||||
'identity-service:16_foo': 'bar'}
|
||||
|
||||
mock_relation_get_and_migrate.return_value = None
|
||||
mock_is_ssl_cert_master.return_value = False
|
||||
mock_peer_units.return_value = ['unit/0']
|
||||
mock_ensure_ssl_cert_master.return_value = False
|
||||
mock_relation_ids.return_value = []
|
||||
self.is_elected_leader.return_value = False
|
||||
self.is_leader.return_value = False
|
||||
|
||||
def fake_rel_get(attribute=None, *args, **kwargs):
|
||||
if not attribute:
|
||||
@ -775,8 +778,8 @@ class KeystoneRelationTests(CharmTestCase):
|
||||
mock_config.return_value = None
|
||||
|
||||
hooks.cluster_changed()
|
||||
whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master',
|
||||
'db-initialised', 'ssl-cert-available-updates']
|
||||
whitelist = ['_passwd', 'identity-service:', 'db-initialised',
|
||||
'ssl-cert-available-updates', 'ssl-cert-master']
|
||||
self.peer_echo.assert_called_with(force=True, includes=whitelist)
|
||||
ssh_authorized_peers.assert_called_with(
|
||||
user=self.ssh_user, group='juju_keystone',
|
||||
@ -784,8 +787,9 @@ class KeystoneRelationTests(CharmTestCase):
|
||||
self.assertFalse(mock_synchronize_ca.called)
|
||||
self.assertTrue(configs.write_all.called)
|
||||
|
||||
@patch.object(hooks, 'update_all_identity_relation_units')
|
||||
@patch.object(hooks.CONFIGS, 'write')
|
||||
def test_leader_elected(self, mock_write):
|
||||
def test_leader_elected(self, mock_write, mock_update):
|
||||
hooks.leader_elected()
|
||||
mock_write.assert_has_calls([call(utils.TOKEN_FLUSH_CRON_FILE)])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user