Add alarm for Restore in progress

Currently, there is no alarm for Restore in progress.
Because of this, the system is shown as healthy,
before restore has been completed.

This new alarm will prevent the system from being healthy
until restore has properly been completed.

TEST PLAN
PASS: On any available system, the following commands can
  be triggered at anytime:
* Run "system restore-start" to trigger alarm
* Run "system restore-complete" to clear alarm
PASS: Do legacy restore on AIO-SX
  Alarm will be seen after unlocking and
  before "system restore-complete" is sent.
PASS: Do optimized restore on AIO-SX
  Alarm will be seen after unlocking and
  before "system restore-complete" is sent.
PASS: Run AIO-SX subcloud upgrade using dcmanager.

Story: 2010709
Task: 47865
Depends-On: https://review.opendev.org/c/starlingx/fault/+/878076
Signed-off-by: Joshua Kraitberg <joshua.kraitberg@windriver.com>
Change-Id: I1791e81a10c523b626775000abf37957cb1a48ee
This commit is contained in:
Joshua Kraitberg 2023-03-21 09:29:14 -04:00
parent e948a02f29
commit 544851e76f
2 changed files with 27 additions and 0 deletions

View File

@ -15365,6 +15365,26 @@ class ConductorManager(service.PeriodicService):
else:
return constants.RESTORE_PROGRESS_ALREADY_IN_PROGRESS
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
constants.CONTROLLER_HOSTNAME)
fault = fm_api.Fault(
alarm_id=fm_constants.FM_ALARM_ID_RESTORE_IN_PROGRESS,
alarm_state=fm_constants.FM_ALARM_STATE_SET,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=entity_instance_id,
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
reason_text=("System Restore in progress."),
# operational
alarm_type=fm_constants.FM_ALARM_TYPE_7,
# congestion
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_8,
proposed_repair_action=("Run 'system restore-complete' to complete restore "
"if running restore manually."),
service_affecting=False)
self.fm_api.set_fault(fault)
# TODO (agrosu): no use case at this point for sending a BACKUP_ACTION_PRE_RESTORE notification.
return constants.RESTORE_PROGRESS_STARTED
@ -15414,6 +15434,11 @@ class ConductorManager(service.PeriodicService):
self.dbapi.restore_update(restore.uuid,
values={'state': state})
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
constants.CONTROLLER_HOSTNAME)
self.fm_api.clear_fault(fm_constants.FM_ALARM_ID_RESTORE_IN_PROGRESS, entity_instance_id)
LOG.info("Complete the restore procedure.")
return constants.RESTORE_PROGRESS_COMPLETED

View File

@ -10,6 +10,7 @@ Tests for the restore logic
from oslo_context import context
from fm_api import fm_api
from sysinv.common import constants
from sysinv.conductor import manager
from sysinv.db import api as dbapi
@ -24,6 +25,7 @@ class RestoreTestCase(base.BaseHostTestCase):
# Set up objects for testing
self.service = manager.ConductorManager('test-host', 'test-topic')
self.service.dbapi = dbapi.get_instance()
self.service.fm_api = fm_api.FaultAPIs()
self.context = context.get_admin_context()
self.valid_restore_states = [
constants.RESTORE_PROGRESS_ALREADY_COMPLETED,