Merge "Filter cert-mon for geo-redundancy in audit and DC_CertWatcher"
This commit is contained in:
commit
8ea80c4b27
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2020-2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2020-2024 Wind River Systems, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -36,12 +36,13 @@ TASK_NAME_PAUSE_AUDIT = 'pause'
|
||||
INVALID_SUBCLOUD_AUDIT_DEPLOY_STATES = [
|
||||
# Secondary subclouds should not be audited as they are expected
|
||||
# to be managed by a peer system controller (geo-redundancy feat.)
|
||||
'create-complete',
|
||||
'pre-rehome',
|
||||
'rehome-failed',
|
||||
'rehome-pending',
|
||||
'rehoming',
|
||||
'secondary',
|
||||
'secondary-failed',
|
||||
'rehome-pending',
|
||||
'pre-rehome',
|
||||
'rehoming',
|
||||
'rehome-failed'
|
||||
]
|
||||
|
||||
cert_mon_opts = [
|
||||
@ -118,12 +119,19 @@ class CertificateMonManager(periodic_task.PeriodicTasks):
|
||||
# Do nothing if it is not systemcontroller
|
||||
return
|
||||
|
||||
all_subclouds = utils.get_subclouds()[:]
|
||||
all_subclouds = utils.get_subclouds_from_dcmanager(
|
||||
self.token_cache.get_token(), INVALID_SUBCLOUD_AUDIT_DEPLOY_STATES
|
||||
)
|
||||
LOG.info("Periodic: begin subcloud certificate audit: %d subclouds"
|
||||
% len(all_subclouds))
|
||||
for subcloud_name in all_subclouds:
|
||||
self.sc_audit_queue.enqueue(
|
||||
subcloud_audit_queue.SubcloudAuditData(subcloud_name))
|
||||
for sc in all_subclouds:
|
||||
try:
|
||||
self.sc_audit_queue.enqueue(
|
||||
subcloud_audit_queue.SubcloudAuditData(sc['name']))
|
||||
except subcloud_audit_queue.SubcloudAuditException as exc:
|
||||
# Log as warn because we can see this if the watch has fired
|
||||
# near the same time as we are auditing the subcloud
|
||||
LOG.warn("Failed to enqueue subcloud audit: %s", str(exc))
|
||||
|
||||
def on_start_audit(self):
|
||||
"""
|
||||
@ -136,14 +144,18 @@ class CertificateMonManager(periodic_task.PeriodicTasks):
|
||||
return
|
||||
|
||||
if CONF.certmon.startup_audit_all:
|
||||
LOG.info("Service start: audit all subclouds")
|
||||
LOG.info("Service start startup_audit_all: audit all subclouds")
|
||||
self.audit_sc_cert_start(None)
|
||||
return
|
||||
|
||||
LOG.info("Service start: begin subcloud certificate audit [batch: %s]"
|
||||
% CONF.certmon.audit_batch_size)
|
||||
all_subclouds = utils.get_subclouds_from_dcmanager(
|
||||
self.token_cache.get_token())
|
||||
self.token_cache.get_token(), INVALID_SUBCLOUD_AUDIT_DEPLOY_STATES
|
||||
)
|
||||
LOG.info(
|
||||
"Service start: begin subcloud certificate audit [#sc: %d, batch: %s]"
|
||||
% (len(all_subclouds), CONF.certmon.audit_batch_size)
|
||||
)
|
||||
|
||||
for subcloud in all_subclouds:
|
||||
if subcloud[utils.ENDPOINT_TYPE_DC_CERT] != utils.SYNC_STATUS_IN_SYNC:
|
||||
subcloud_name = subcloud['name']
|
||||
@ -352,7 +364,8 @@ class CertificateMonManager(periodic_task.PeriodicTasks):
|
||||
self.dc_monitor = watcher.DC_CertWatcher()
|
||||
self.dc_monitor.initialize(
|
||||
audit_subcloud=lambda subcloud_name:
|
||||
self.audit_subcloud(subcloud_name, allow_requeue=True))
|
||||
self.audit_subcloud(subcloud_name, allow_requeue=True),
|
||||
invalid_deploy_states=INVALID_SUBCLOUD_AUDIT_DEPLOY_STATES)
|
||||
|
||||
def init_restapicert_monitor(self):
|
||||
self.restapicert_monitor = watcher.RestApiCert_CertWatcher()
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2020-2023 Wind River Systems, Inc.
|
||||
# Copyright (c) 2020-2024 Wind River Systems, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -204,28 +204,31 @@ def get_subcloud(token, subcloud_name):
|
||||
return resp
|
||||
|
||||
|
||||
def load_subclouds(resp):
|
||||
def load_subclouds(resp, invalid_deploy_states=None):
|
||||
sc_list = []
|
||||
for obj in resp['subclouds']:
|
||||
for obj in resp["subclouds"]:
|
||||
if invalid_deploy_states and obj["deploy-status"] in invalid_deploy_states:
|
||||
continue
|
||||
sc = {}
|
||||
sc['name'] = obj['name']
|
||||
sc['management-state'] = obj['management-state']
|
||||
sc['availability-status'] = obj['availability-status']
|
||||
sc['sync_status'] = obj['sync_status']
|
||||
for ss in obj['endpoint_sync_status']:
|
||||
sc[ss['endpoint_type']] = ss['sync_status']
|
||||
sc["name"] = obj["name"]
|
||||
sc["region-name"] = obj["region-name"]
|
||||
sc["management-state"] = obj["management-state"]
|
||||
sc["availability-status"] = obj["availability-status"]
|
||||
sc["sync_status"] = obj["sync_status"]
|
||||
for ss in obj["endpoint_sync_status"]:
|
||||
sc[ss["endpoint_type"]] = ss["sync_status"]
|
||||
sc_list.append(sc)
|
||||
|
||||
return sc_list
|
||||
|
||||
|
||||
def get_subclouds_from_dcmanager(token):
|
||||
def get_subclouds_from_dcmanager(token, invalid_deploy_states=None):
|
||||
api_url = dc_get_service_endpoint_url(token)
|
||||
api_cmd = api_url + '/subclouds'
|
||||
LOG.debug('api_cmd %s' % api_cmd)
|
||||
resp = rest_api_request(token, "GET", api_cmd)
|
||||
|
||||
return load_subclouds(resp)
|
||||
return load_subclouds(resp, invalid_deploy_states)
|
||||
|
||||
|
||||
def is_subcloud_online(subcloud_name, token=None):
|
||||
@ -239,6 +242,33 @@ def is_subcloud_online(subcloud_name, token=None):
|
||||
return subcloud_info['availability-status'] == AVAILABILITY_ONLINE
|
||||
|
||||
|
||||
def query_subcloud_online_with_deploy_state(
|
||||
subcloud_name, invalid_deploy_states=None, token=None
|
||||
):
|
||||
"""Check if subcloud is online and not in an invalid deploy state"""
|
||||
if not token:
|
||||
token = get_token()
|
||||
subcloud_info = get_subcloud(token, subcloud_name)
|
||||
if not subcloud_info:
|
||||
LOG.error("Cannot find subcloud %s" % subcloud_name)
|
||||
return False, None, None
|
||||
subcloud_valid_state = False
|
||||
if (
|
||||
invalid_deploy_states
|
||||
and subcloud_info["deploy-status"] in invalid_deploy_states
|
||||
):
|
||||
subcloud_valid_state = False
|
||||
else:
|
||||
subcloud_valid_state = (
|
||||
subcloud_info["availability-status"] == AVAILABILITY_ONLINE
|
||||
)
|
||||
return (
|
||||
subcloud_valid_state,
|
||||
subcloud_info["availability-status"],
|
||||
subcloud_info["deploy-status"],
|
||||
)
|
||||
|
||||
|
||||
def update_subcloud_status(token, subcloud_name, status):
|
||||
api_url = dc_get_service_endpoint_url(token)
|
||||
api_cmd = api_url + '/subclouds/%s/update_status' % subcloud_name
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2020-2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2020-2024 Wind River Systems, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -375,7 +375,7 @@ class DC_CertWatcher(CertWatcher):
|
||||
def __init__(self):
|
||||
super(DC_CertWatcher, self).__init__()
|
||||
|
||||
def initialize(self, audit_subcloud):
|
||||
def initialize(self, audit_subcloud, invalid_deploy_states):
|
||||
self.context.initialize()
|
||||
dc_role = self.context.dc_role
|
||||
LOG.info('DC role: %s' % dc_role)
|
||||
@ -390,7 +390,11 @@ class DC_CertWatcher(CertWatcher):
|
||||
self.context.kubernete_namespace = ns
|
||||
self.register_listener(AdminEndpointRenew(self.context))
|
||||
if dc_role == constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER:
|
||||
self.register_listener(DCIntermediateCertRenew(self.context, audit_subcloud))
|
||||
self.register_listener(
|
||||
DCIntermediateCertRenew(
|
||||
self.context, audit_subcloud, invalid_deploy_states
|
||||
)
|
||||
)
|
||||
self.register_listener(RootCARenew(self.context))
|
||||
|
||||
|
||||
@ -520,26 +524,42 @@ class AdminEndpointRenew(CertificateRenew):
|
||||
|
||||
|
||||
class DCIntermediateCertRenew(CertificateRenew):
|
||||
def __init__(self, context, audit_subcloud):
|
||||
def __init__(self, context, audit_subcloud, invalid_deploy_states):
|
||||
super(DCIntermediateCertRenew, self).__init__(context)
|
||||
self.invalid_deploy_states = invalid_deploy_states
|
||||
self.secret_pattern = re.compile('-adminep-ca-certificate$')
|
||||
self.audit_subcloud = audit_subcloud
|
||||
|
||||
def check_filter(self, event_data):
|
||||
m = self.secret_pattern.search(event_data.secret_name)
|
||||
if m and m.start() > 0:
|
||||
# Ensure subcloud is online (watch events can fire
|
||||
# for secrets before the subcloud first comes online)
|
||||
search_result = self.secret_pattern.search(event_data.secret_name)
|
||||
if search_result and search_result.start() > 0:
|
||||
# Ensure subcloud is in a valid deploy-status and online (watch
|
||||
# events can fire for secrets before the subcloud first comes online)
|
||||
subcloud_name = self._get_subcloud_name(event_data)
|
||||
try:
|
||||
if not utils.is_subcloud_online(subcloud_name,
|
||||
token=self.context.get_token()):
|
||||
LOG.info('%s check_filter[%s]: subcloud is not online' %
|
||||
(self.__class__.__name__, subcloud_name))
|
||||
(
|
||||
subcloud_valid_state,
|
||||
availability_status,
|
||||
deploy_status,
|
||||
) = utils.query_subcloud_online_with_deploy_state(
|
||||
subcloud_name,
|
||||
invalid_deploy_states=self.invalid_deploy_states,
|
||||
token=self.context.get_token(),
|
||||
)
|
||||
if not subcloud_valid_state:
|
||||
LOG.info(
|
||||
"%s check_filter: subcloud %s is ignored, "
|
||||
"availability=%s, deploy_status: %s",
|
||||
self.__class__.__name__,
|
||||
subcloud_name,
|
||||
availability_status,
|
||||
deploy_status,
|
||||
)
|
||||
return False
|
||||
except Exception:
|
||||
LOG.exception('Failed to check subcloud availability: %s'
|
||||
% subcloud_name)
|
||||
LOG.exception(
|
||||
"Failed to check subcloud availability: %s" % subcloud_name
|
||||
)
|
||||
return False
|
||||
return self.certificate_is_ready(event_data)
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user