Report when Vault service needs to be restarted for HA

Vault can act as the CA for etcd to allow it to operate in HA mode by
the leader first being unsealed in non-HA mode and providing the root CA
certificate, which allows it to provide a certificate to etcd. However,
at that point, the Vault service needs to be restarted and unsealed
again in order to pick up the HA configuration. Currenty, the status
just reports Vault as ready, potentially with multiple "active" units.
This change detects when the Vault service should be restarted to pick
up the HA configuration and reports it via status.

Change-Id: I40e813b1df4ab3b3301881385a5d713524698821
This commit is contained in:
Cory Johns 2021-04-02 14:54:02 -04:00
parent 0d32b7b320
commit 426e68f873
2 changed files with 38 additions and 1 deletions

View File

@ -838,6 +838,15 @@ def _assess_status():
'to restart the service.')
return
if is_flag_set('etcd.tls.available'):
client = vault.get_local_client()
if not client.ha_status['ha_enabled']:
status_set(
'active',
'Vault running as non-HA, manual intervention required '
'to restart the service.')
return
status_set(
'active',
'Unit is ready '
@ -856,10 +865,11 @@ def client_approle_authorized():
vault.get_local_client()
return True
except (vault.hvac.exceptions.InternalServerError,
vault.hvac.exceptions.InvalidRequest,
vault.VaultNotReady,
vault.hvac.exceptions.VaultDown,
vault.requests.exceptions.ReadTimeout):
log("InternalServerError: Unable to athorize approle. "
log("InternalServerError: Unable to authorize approle. "
"This may indicate failure to communicate with the database ",
"WARNING")
log(traceback.format_exc(), level=ERROR)

View File

@ -1154,3 +1154,30 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
self.status_set.assert_called_with(
'blocked', 'Load balancer failed: just because'
)
@patch.object(handlers.vault, 'get_local_client')
@patch.object(handlers, 'leader_get')
@patch.object(handlers, 'client_approle_authorized')
@patch.object(handlers, '_assess_interface_groups')
@patch.object(handlers.vault, 'get_vault_health')
def test_assess_status_non_ha(self,
get_vault_health,
_assess_interface_groups,
_client_approle_authorized,
_leader_get,
get_local_client):
get_vault_health.return_value = self._health_response
self.snap.get_installed_version.return_value = '0.9.0'
self.endpoint_from_name().is_available = True
self.endpoint_from_name().has_response = False
self.is_flag_set.side_effect = lambda f: False
get_local_client.return_value.ha_status = {'ha_enabled': False}
handlers._assess_status()
self.assertIn('Unit is ready', self.status_set.call_args[0][1])
self.is_flag_set.side_effect = lambda f: f == 'etcd.tls.available'
handlers._assess_status()
self.assertIn('Vault running as non-HA',
self.status_set.call_args[0][1])
get_local_client.return_value.ha_status = {'ha_enabled': True}
handlers._assess_status()
self.assertIn('Unit is ready', self.status_set.call_args[0][1])