Trigger subcloud patch and load audits by updating identity
During the 1st round subcloud audit triggered by the subcloud manage call, there's a race condition between the dcdbsync and the subcloud audit. The subcloud patch audit will fail to authenticate due to the keystone database is synchronizing and the original keystone patching user and its role cannot be found. It results the patching and load sync status remain as "unkown" until the next periodic audit. This commit removes the trigger of audit from the manage action, instead, adds the trigger when updating the identity resource from unknown to the other statuses. After this change, the patching audit is expected to be audited and its status should be updated right after the identity status is updated out of unkown. Test plan: 1. Deployed several subclouds with this change. 2. Manage all the subclouds at the same time, no RC 401 error returns from the patching audit. All the subclouds go in-sync in a short time. 3. Unmanage all the subclouds, and manage them again, all the subcloud endpoints except the "dc-cert_sync_status" go in-sync -> unkown -> in-sync without any error. The subcloud patch and load audits are triggered after the identity endpoints turn back to in-sync. 4. Leave the subclouds for 1 hr, the update calls for updating the identity in-sync from dcorch will not trigger an extra round of audits. 5. Add a user to update the identity status from in-sync -> out-of-sync -> in-sync, extra subcloud patch and load audits are not triggered. 6. Apply a patch to the SystemController, the patch audit should detect and raise the appropriate subcloud alarms. Closes-bug: 1949477 Signed-off-by: Yuxing Jiang <yuxing.jiang@windriver.com> Change-Id: I6b88fa6e5d9fd86b47f9662112be137ce80ed9cd
This commit is contained in:
parent
7398c99877
commit
f55c356ab2
@ -75,8 +75,13 @@ class ManagerAuditClient(object):
|
||||
def trigger_load_audit(self, ctxt):
|
||||
return self.cast(ctxt, self.make_msg('trigger_load_audit'))
|
||||
|
||||
def trigger_subcloud_audits(self, ctxt, subcloud_id):
|
||||
def trigger_subcloud_audits(self, ctxt, subcloud_id, exclude_endpoints=None):
|
||||
return self.cast(ctxt, self.make_msg('trigger_subcloud_audits',
|
||||
subcloud_id=subcloud_id,
|
||||
exclude_endpoints=exclude_endpoints))
|
||||
|
||||
def trigger_subcloud_patch_load_audits(self, ctxt, subcloud_id):
|
||||
return self.cast(ctxt, self.make_msg('trigger_subcloud_patch_load_audits',
|
||||
subcloud_id=subcloud_id))
|
||||
|
||||
|
||||
|
@ -147,10 +147,18 @@ class DCManagerAuditService(service.Service):
|
||||
return self.subcloud_audit_manager.trigger_load_audit(context)
|
||||
|
||||
@request_context
|
||||
def trigger_subcloud_audits(self, context, subcloud_id):
|
||||
def trigger_subcloud_audits(self, context, subcloud_id, exclude_endpoints):
|
||||
"""Trigger all subcloud audits for one subcloud."""
|
||||
LOG.info("Trigger all audits for subcloud %s", subcloud_id)
|
||||
LOG.info("Trigger all audits for subcloud %s except endpoints %s" %
|
||||
(subcloud_id, exclude_endpoints))
|
||||
return self.subcloud_audit_manager.trigger_subcloud_audits(
|
||||
context, subcloud_id, exclude_endpoints)
|
||||
|
||||
@request_context
|
||||
def trigger_subcloud_patch_load_audits(self, context, subcloud_id):
|
||||
"""Trigger patch and load audits for one subcloud."""
|
||||
LOG.info("Trigger patch and load audits for subcloud %s", subcloud_id)
|
||||
return self.subcloud_audit_manager.trigger_subcloud_patch_load_audits(
|
||||
context, subcloud_id)
|
||||
|
||||
|
||||
|
@ -186,7 +186,7 @@ class SubcloudAuditManager(manager.Manager):
|
||||
def reset_force_patch_audit(cls):
|
||||
cls.force_patch_audit = False
|
||||
|
||||
def trigger_subcloud_audits(self, context, subcloud_id):
|
||||
def trigger_subcloud_audits(self, context, subcloud_id, exclude_endpoints):
|
||||
"""Trigger all subcloud audits for one subcloud."""
|
||||
values = {
|
||||
'patch_audit_requested': True,
|
||||
@ -195,6 +195,22 @@ class SubcloudAuditManager(manager.Manager):
|
||||
'kubernetes_audit_requested': True,
|
||||
'kube_rootca_update_audit_requested': True,
|
||||
}
|
||||
# For the endpoints excluded in the audit, set it to False in db
|
||||
# to disable the audit explicitly.
|
||||
if exclude_endpoints:
|
||||
for exclude_endpoint in exclude_endpoints:
|
||||
exclude_request = dcorch_consts.ENDPOINT_AUDIT_REQUESTS.get(
|
||||
exclude_endpoint)
|
||||
if exclude_request:
|
||||
values.update({exclude_request: False})
|
||||
db_api.subcloud_audits_update(context, subcloud_id, values)
|
||||
|
||||
def trigger_subcloud_patch_load_audits(self, context, subcloud_id):
|
||||
"""Trigger subcloud patch and load audits for one subcloud."""
|
||||
values = {
|
||||
"patch_audit_requested": True,
|
||||
"load_audit_requested": True,
|
||||
}
|
||||
db_api.subcloud_audits_update(context, subcloud_id, values)
|
||||
|
||||
def periodic_subcloud_audit(self):
|
||||
|
@ -1253,9 +1253,13 @@ class SubcloudManager(manager.Manager):
|
||||
LOG.info('Request for managed audit for %s' % subcloud.name)
|
||||
dc_notification = rpc_client.DCManagerNotifications()
|
||||
dc_notification.subcloud_managed(context, subcloud.name)
|
||||
# Trigger all the audits for the subcloud so it can update the
|
||||
# sync status ASAP.
|
||||
self.audit_rpc_client.trigger_subcloud_audits(context, subcloud_id)
|
||||
# Since sysinv user is sync'ed during bootstrap, trigger the
|
||||
# related audits. Patch and load audits are delayed until the
|
||||
# identity resource synchronized by dcdbsync is complete.
|
||||
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
|
||||
dcorch_consts.ENDPOINT_TYPE_LOAD]
|
||||
self.audit_rpc_client.trigger_subcloud_audits(
|
||||
context, subcloud_id, exclude_endpoints)
|
||||
|
||||
return db_api.subcloud_db_model_to_dict(subcloud)
|
||||
|
||||
@ -1278,6 +1282,7 @@ class SubcloudManager(manager.Manager):
|
||||
|
||||
subcloud_status_list = []
|
||||
subcloud = None
|
||||
original_identity_status = None
|
||||
# retrieve the info from the db for this subcloud.
|
||||
# subcloud_id should not be None
|
||||
try:
|
||||
@ -1287,6 +1292,9 @@ class SubcloudManager(manager.Manager):
|
||||
subcloud_status_list.append(
|
||||
db_api.subcloud_endpoint_status_db_model_to_dict(
|
||||
subcloud_status))
|
||||
if subcloud_status.endpoint_type == \
|
||||
dcorch_consts.ENDPOINT_TYPE_IDENTITY:
|
||||
original_identity_status = subcloud_status.sync_status
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
raise e
|
||||
@ -1318,6 +1326,16 @@ class SubcloudManager(manager.Manager):
|
||||
endpoint_type,
|
||||
sync_status)
|
||||
|
||||
# Trigger subcloud patch and load audits for the subcloud after
|
||||
# its identity endpoint turns to other status from unknown
|
||||
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_IDENTITY \
|
||||
and sync_status != consts.SYNC_STATUS_UNKNOWN \
|
||||
and original_identity_status == consts.SYNC_STATUS_UNKNOWN:
|
||||
LOG.debug('Request for patch and load audit for %s after updating '
|
||||
'identity out of unknown' % subcloud.name)
|
||||
self.audit_rpc_client.trigger_subcloud_patch_load_audits(
|
||||
context, subcloud_id)
|
||||
|
||||
entity_instance_id = "subcloud=%s.resource=%s" % \
|
||||
(subcloud.name, endpoint_type)
|
||||
fault = self.fm_api.get_fault(
|
||||
|
@ -26,6 +26,7 @@ from dcmanager.audit import subcloud_audit_manager
|
||||
from dcmanager.db.sqlalchemy import api as db_api
|
||||
|
||||
from dcmanager.tests import base
|
||||
from dcorch.common import consts as dcorch_consts
|
||||
|
||||
|
||||
class FakeAuditWorkerAPI(object):
|
||||
@ -296,7 +297,7 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
def test_audit_one_subcloud(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx)
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
am.trigger_subcloud_audits(self.ctx, subcloud.id)
|
||||
am.trigger_subcloud_audits(self.ctx, subcloud.id, None)
|
||||
# Subaudits should be requested.
|
||||
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
|
||||
self.assertEqual(result['patch_audit_requested'], True)
|
||||
@ -305,6 +306,20 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
self.assertEqual(result['kubernetes_audit_requested'], True)
|
||||
self.assertEqual(result['kube_rootca_update_audit_requested'], True)
|
||||
|
||||
def test_audit_one_subcloud_exclude_endpoints(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx)
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
|
||||
dcorch_consts.ENDPOINT_TYPE_LOAD]
|
||||
am.trigger_subcloud_audits(self.ctx, subcloud.id, exclude_endpoints)
|
||||
# Verify subaudits be requested.
|
||||
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
|
||||
self.assertEqual(result['patch_audit_requested'], False)
|
||||
self.assertEqual(result['firmware_audit_requested'], True)
|
||||
self.assertEqual(result['load_audit_requested'], False)
|
||||
self.assertEqual(result['kubernetes_audit_requested'], True)
|
||||
self.assertEqual(result['kube_rootca_update_audit_requested'], True)
|
||||
|
||||
def test_trigger_load_audit(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx)
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
@ -313,3 +328,16 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
|
||||
self.assertEqual(result['patch_audit_requested'], False)
|
||||
self.assertEqual(result['load_audit_requested'], True)
|
||||
|
||||
def test_trigger_one_subcloud_patch_load_audits(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx)
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
am.trigger_subcloud_patch_load_audits(self.ctx, subcloud.id)
|
||||
# Subcloud patch and load audits should be requested.
|
||||
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
|
||||
self.assertEqual(result['patch_audit_requested'], True)
|
||||
self.assertEqual(result['load_audit_requested'], True)
|
||||
# Other audits should not be requested
|
||||
self.assertEqual(result['firmware_audit_requested'], False)
|
||||
self.assertEqual(result['kubernetes_audit_requested'], False)
|
||||
self.assertEqual(result['kube_rootca_update_audit_requested'], False)
|
||||
|
@ -1084,7 +1084,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
|
||||
|
||||
# Now pretend someone triggered all the subaudits in the DB
|
||||
# after the subcloud audit was triggered but before it ran.
|
||||
am.trigger_subcloud_audits(self.ctx, subcloud.id)
|
||||
am.trigger_subcloud_audits(self.ctx, subcloud.id, None)
|
||||
|
||||
# Make sure all subaudits are requested in DB
|
||||
audits = db_api.subcloud_audits_get(self.ctx, subcloud.id)
|
||||
|
@ -45,6 +45,7 @@ class FakeDCManagerAuditAPI(object):
|
||||
|
||||
def __init__(self):
|
||||
self.trigger_subcloud_audits = mock.MagicMock()
|
||||
self.trigger_subcloud_patch_load_audits = mock.MagicMock()
|
||||
|
||||
|
||||
class FakeDCOrchAPI(object):
|
||||
@ -491,8 +492,11 @@ class TestSubcloudManager(base.DCManagerTestCase):
|
||||
|
||||
fake_dcmanager_notification.subcloud_managed.assert_called_once_with(
|
||||
self.ctx, subcloud.name)
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_audits.assert_called_once_with(
|
||||
self.ctx, subcloud.id)
|
||||
|
||||
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
|
||||
dcorch_consts.ENDPOINT_TYPE_LOAD]
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_audits.\
|
||||
assert_called_once_with(self.ctx, subcloud.id, exclude_endpoints)
|
||||
|
||||
# Verify subcloud was updated with correct values
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, subcloud.name)
|
||||
@ -528,8 +532,10 @@ class TestSubcloudManager(base.DCManagerTestCase):
|
||||
|
||||
fake_dcmanager_cermon_api.subcloud_managed.assert_called_once_with(
|
||||
self.ctx, subcloud.name)
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_audits.assert_called_once_with(
|
||||
self.ctx, subcloud.id)
|
||||
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
|
||||
dcorch_consts.ENDPOINT_TYPE_LOAD]
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_audits.\
|
||||
assert_called_once_with(self.ctx, subcloud.id, exclude_endpoints)
|
||||
|
||||
# Verify subcloud was updated with correct values
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, subcloud.name)
|
||||
@ -991,6 +997,11 @@ class TestSubcloudManager(base.DCManagerTestCase):
|
||||
endpoint_type=endpoint,
|
||||
sync_status=consts.SYNC_STATUS_IN_SYNC)
|
||||
|
||||
# We trigger a subcloud audits after updating the identity from unknown
|
||||
# to in-sync
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_patch_load_audits.\
|
||||
assert_called_once_with(self.ctx, subcloud.id)
|
||||
|
||||
# Audit fails once
|
||||
audit_fail_count = 1
|
||||
sm.update_subcloud_availability(self.ctx, subcloud.name,
|
||||
@ -1029,9 +1040,58 @@ class TestSubcloudManager(base.DCManagerTestCase):
|
||||
self.assertEqual(subcloud_status.sync_status,
|
||||
consts.SYNC_STATUS_UNKNOWN)
|
||||
|
||||
# Verify we did not trigger subcloud audits
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_audits.\
|
||||
assert_not_called()
|
||||
def test_update_subcloud_identity_endpoint(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
sm = subcloud_manager.SubcloudManager()
|
||||
|
||||
# Set the subcloud to online/managed
|
||||
db_api.subcloud_update(self.ctx, subcloud.id,
|
||||
management_state=consts.MANAGEMENT_MANAGED,
|
||||
availability_status=consts.AVAILABILITY_ONLINE)
|
||||
|
||||
# Create identity endpoints statuses
|
||||
endpoint = dcorch_consts.ENDPOINT_TYPE_IDENTITY
|
||||
db_api.subcloud_status_create(
|
||||
self.ctx, subcloud.id, endpoint)
|
||||
|
||||
for original_sync_status in [consts.SYNC_STATUS_IN_SYNC,
|
||||
consts.SYNC_STATUS_OUT_OF_SYNC,
|
||||
consts.SYNC_STATUS_UNKNOWN]:
|
||||
|
||||
for new_sync_status in [consts.SYNC_STATUS_IN_SYNC,
|
||||
consts.SYNC_STATUS_OUT_OF_SYNC,
|
||||
consts.SYNC_STATUS_UNKNOWN]:
|
||||
|
||||
# Update identity to the original status
|
||||
sm.update_subcloud_endpoint_status(
|
||||
self.ctx, subcloud_name=subcloud.name,
|
||||
endpoint_type=endpoint,
|
||||
sync_status=original_sync_status)
|
||||
|
||||
# Get the count of the trigger already called
|
||||
original_trigger_subcloud_patch_load_audits_count = \
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_patch_load_audits.call_count
|
||||
|
||||
# Update identity to new status and get the count of the trigger again
|
||||
sm.update_subcloud_endpoint_status(
|
||||
self.ctx, subcloud_name=subcloud.name,
|
||||
endpoint_type=endpoint,
|
||||
sync_status=new_sync_status)
|
||||
new_trigger_subcloud_patch_load_audits_count = \
|
||||
self.fake_dcmanager_audit_api.trigger_subcloud_patch_load_audits.call_count
|
||||
|
||||
trigger_count = new_trigger_subcloud_patch_load_audits_count - \
|
||||
original_trigger_subcloud_patch_load_audits_count
|
||||
|
||||
if original_sync_status == consts.SYNC_STATUS_UNKNOWN and \
|
||||
new_sync_status != consts.SYNC_STATUS_UNKNOWN:
|
||||
# Verify the subcloud patch and load audit is triggered once
|
||||
self.assertEqual(trigger_count, 1)
|
||||
else:
|
||||
# Verify the subcloud patch and load audit is not triggered
|
||||
self.assertEqual(trigger_count, 0)
|
||||
|
||||
def test_update_subcloud_sync_endpoint_type(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
|
@ -143,6 +143,17 @@ ENDPOINT_TYPES_LIST = [ENDPOINT_TYPE_PLATFORM,
|
||||
ENDPOINT_TYPE_KUBERNETES,
|
||||
ENDPOINT_TYPE_KUBE_ROOTCA]
|
||||
|
||||
# All endpoint audit requests
|
||||
# TODO(yuxing): move some constants to dccommon as part of general refactoring
|
||||
# for maintainability in a future commit.
|
||||
ENDPOINT_AUDIT_REQUESTS = {
|
||||
ENDPOINT_TYPE_FIRMWARE: 'firmware_audit_requested',
|
||||
ENDPOINT_TYPE_KUBERNETES: 'kubernetes_audit_requested',
|
||||
ENDPOINT_TYPE_KUBE_ROOTCA: 'kube_rootca_update_audit_requested',
|
||||
ENDPOINT_TYPE_LOAD: 'load_audit_requested',
|
||||
ENDPOINT_TYPE_PATCHING: 'patch_audit_requested',
|
||||
}
|
||||
|
||||
# Dcorch sync endpoint types
|
||||
SYNC_ENDPOINT_TYPES_LIST = [ENDPOINT_TYPE_PLATFORM,
|
||||
ENDPOINT_TYPE_IDENTITY]
|
||||
|
Loading…
Reference in New Issue
Block a user