DC: ensure subcloud is online in watch event
As a subcloud is being bootstrapped, kubernetes secrets are created before the subcloud is online. As a result, the DCIntermediateCertRenew watch fires well before the subcloud is available. Cert-mon attempts to audit the subcloud as a result of the watch event, and fails, causing the subcloud to go into a reattempt loop. There is also some sort of issue with the keystone token, causing 401 unauthorized errors when the subcloud eventually does come online. This behaviour is completely avoided if we simply check that the subcloud is online before processing the watch event. If the subloud is offline the event is ignored - the subcloud will be properly audited when cert-mon receives the subcloud online event from dcmanager. Story: 2008960 Task: 42969 Change-Id: I75751e24ae233c53bd00734a7d73d517868c87e8 Signed-off-by: Kyle MacLeod <kyle.macleod@windriver.com>
This commit is contained in:
parent
1df668d5c6
commit
2f1b1bcfb0
|
@ -121,6 +121,11 @@ class CertificateMonManager(periodic_task.PeriodicTasks):
|
|||
len(self.subclouds_to_audit) - num_pause_tasks,
|
||||
num_pause_tasks))
|
||||
|
||||
if not utils.is_subcloud_online(subcloud_name):
|
||||
LOG.info("Subcloud is not online, aborting audit: %s" % subcloud_name)
|
||||
self.subclouds_to_audit.pop(0)
|
||||
return
|
||||
|
||||
try:
|
||||
subcloud_sysinv_url = utils.dc_get_subcloud_sysinv_url(subcloud_name)
|
||||
sc_ssl_cert = utils.get_endpoint_certificate(subcloud_sysinv_url)
|
||||
|
|
|
@ -275,6 +275,17 @@ def get_subclouds_from_dcmanager(token):
|
|||
return load_subclouds(resp)
|
||||
|
||||
|
||||
def is_subcloud_online(subcloud_name, token=None):
|
||||
"""Check if subcloud is online"""
|
||||
if not token:
|
||||
token = get_token()
|
||||
subcloud_info = get_subcloud(token, subcloud_name)
|
||||
if not subcloud_info:
|
||||
LOG.error('Cannot find subcloud %s' % subcloud_name)
|
||||
return False
|
||||
return subcloud_info['availability-status'] == AVAILABILITY_ONLINE
|
||||
|
||||
|
||||
def update_subcloud_status(token, subcloud_name, status):
|
||||
service_name = 'dcmanager'
|
||||
api_url = dc_get_service_endpoint_url(constants.SYSTEM_CONTROLLER_REGION,
|
||||
|
|
|
@ -413,7 +413,7 @@ class AdminEndpointRenew(CertificateRenew):
|
|||
|
||||
role = self.context.dc_role
|
||||
utils.update_admin_ep_cert(token, event_data.ca_crt, event_data.tls_crt,
|
||||
event_data.tls_key)
|
||||
event_data.tls_key)
|
||||
|
||||
# In subclouds, it was observed that sometimes old ICA was used
|
||||
# to sign adminep-cert. Here we run a verification to confirm that
|
||||
|
@ -431,6 +431,19 @@ class DCIntermediateCertRenew(CertificateRenew):
|
|||
def check_filter(self, event_data):
|
||||
m = self.secret_pattern.search(event_data.secret_name)
|
||||
if m and m.start() > 0:
|
||||
# Ensure subcloud is online (watch events can fire
|
||||
# for secrets before the subcloud first comes online)
|
||||
subcloud_name = self._get_subcloud_name(event_data)
|
||||
try:
|
||||
if not utils.is_subcloud_online(subcloud_name,
|
||||
token=self.context.get_token()):
|
||||
LOG.info('%s check_filter[%s]: subcloud is not online' %
|
||||
(self.__class__.__name__, subcloud_name))
|
||||
return False
|
||||
except Exception:
|
||||
LOG.exception('Failed to check subcloud availability: %s'
|
||||
% subcloud_name)
|
||||
return False
|
||||
return self.certificate_is_ready(event_data)
|
||||
else:
|
||||
return False
|
||||
|
@ -441,7 +454,7 @@ class DCIntermediateCertRenew(CertificateRenew):
|
|||
|
||||
def update_certificate(self, event_data):
|
||||
subcloud_name = self._get_subcloud_name(event_data)
|
||||
LOG.info('subcloud %s %s' % (subcloud_name, event_data))
|
||||
LOG.info('update_certificate: subcloud %s %s' % (subcloud_name, event_data))
|
||||
|
||||
token = self.context.get_dc_token(subcloud_name)
|
||||
subcloud_sysinv_url = utils.dc_get_subcloud_sysinv_url(subcloud_name)
|
||||
|
@ -586,7 +599,7 @@ class PlatformCertRenew(CertificateRenew):
|
|||
def check_filter(self, event_data):
|
||||
LOG.debug('%s: Received event_data %s' % (self.secret_name, event_data))
|
||||
if self.secret_name == event_data.secret_name:
|
||||
LOG.info('%s check_filter[%s]: proceed on event_data: %s'
|
||||
LOG.info('%s check_filter[%s], proceed on event_data: %s'
|
||||
% (self.__class__.__name__, self.secret_name, event_data))
|
||||
return self.certificate_is_ready(event_data)
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue