Add alarm for Restore in progress
Currently, there is no alarm for Restore in progress. Because of this, the system is shown as healthy, before restore has been completed. This new alarm will prevent the system from being healthy until restore has properly been completed. TEST PLAN PASS: On any available system, the following commands can be triggered at anytime: * Run "system restore-start" to trigger alarm * Run "system restore-complete" to clear alarm PASS: Do legacy restore on AIO-SX Alarm will be seen after unlocking and before "system restore-complete" is sent. PASS: Do optimized restore on AIO-SX Alarm will be seen after unlocking and before "system restore-complete" is sent. PASS: Run AIO-SX subcloud upgrade using dcmanager. Story: 2010709 Task: 47865 Depends-On: https://review.opendev.org/c/starlingx/fault/+/878076 Signed-off-by: Joshua Kraitberg <joshua.kraitberg@windriver.com> Change-Id: I1791e81a10c523b626775000abf37957cb1a48ee
This commit is contained in:
parent
e948a02f29
commit
544851e76f
@ -15365,6 +15365,26 @@ class ConductorManager(service.PeriodicService):
|
||||
else:
|
||||
return constants.RESTORE_PROGRESS_ALREADY_IN_PROGRESS
|
||||
|
||||
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
constants.CONTROLLER_HOSTNAME)
|
||||
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=fm_constants.FM_ALARM_ID_RESTORE_IN_PROGRESS,
|
||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
entity_instance_id=entity_instance_id,
|
||||
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
|
||||
reason_text=("System Restore in progress."),
|
||||
# operational
|
||||
alarm_type=fm_constants.FM_ALARM_TYPE_7,
|
||||
# congestion
|
||||
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_8,
|
||||
proposed_repair_action=("Run 'system restore-complete' to complete restore "
|
||||
"if running restore manually."),
|
||||
service_affecting=False)
|
||||
|
||||
self.fm_api.set_fault(fault)
|
||||
|
||||
# TODO (agrosu): no use case at this point for sending a BACKUP_ACTION_PRE_RESTORE notification.
|
||||
return constants.RESTORE_PROGRESS_STARTED
|
||||
|
||||
@ -15414,6 +15434,11 @@ class ConductorManager(service.PeriodicService):
|
||||
self.dbapi.restore_update(restore.uuid,
|
||||
values={'state': state})
|
||||
|
||||
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
constants.CONTROLLER_HOSTNAME)
|
||||
|
||||
self.fm_api.clear_fault(fm_constants.FM_ALARM_ID_RESTORE_IN_PROGRESS, entity_instance_id)
|
||||
|
||||
LOG.info("Complete the restore procedure.")
|
||||
|
||||
return constants.RESTORE_PROGRESS_COMPLETED
|
||||
|
@ -10,6 +10,7 @@ Tests for the restore logic
|
||||
|
||||
from oslo_context import context
|
||||
|
||||
from fm_api import fm_api
|
||||
from sysinv.common import constants
|
||||
from sysinv.conductor import manager
|
||||
from sysinv.db import api as dbapi
|
||||
@ -24,6 +25,7 @@ class RestoreTestCase(base.BaseHostTestCase):
|
||||
# Set up objects for testing
|
||||
self.service = manager.ConductorManager('test-host', 'test-topic')
|
||||
self.service.dbapi = dbapi.get_instance()
|
||||
self.service.fm_api = fm_api.FaultAPIs()
|
||||
self.context = context.get_admin_context()
|
||||
self.valid_restore_states = [
|
||||
constants.RESTORE_PROGRESS_ALREADY_COMPLETED,
|
||||
|
Loading…
Reference in New Issue
Block a user