Support passing an ignore alarm list to kube upgrade start API
Health utils support an ignore alarm list. The kube_upgrade API makes use of those commands. Story: 2008137 Task: 41559 Signed-off-by: albailey <Al.Bailey@windriver.com> Change-Id: I19db852f2e87273551d8a30f4bab470afa420de2
This commit is contained in:
parent
84936cf189
commit
2ab82b7262
|
@ -187,6 +187,7 @@ class KubeUpgradeController(rest.RestController):
|
|||
"""Create a new Kubernetes Upgrade and start upgrade."""
|
||||
|
||||
force = body.get('force', False) is True
|
||||
alarm_ignore_list = body.get('alarm_ignore_list')
|
||||
|
||||
# There must not be a platform upgrade in progress
|
||||
try:
|
||||
|
@ -247,7 +248,10 @@ class KubeUpgradeController(rest.RestController):
|
|||
|
||||
# The system must be healthy
|
||||
success, output = pecan.request.rpcapi.get_system_health(
|
||||
pecan.request.context, force=force, kube_upgrade=True)
|
||||
pecan.request.context,
|
||||
force=force,
|
||||
kube_upgrade=True,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
if not success:
|
||||
LOG.info("Health query failure during kubernetes upgrade start: %s"
|
||||
% output)
|
||||
|
|
|
@ -94,8 +94,11 @@ class Health(object):
|
|||
success = not not_patch_current_hosts and not hostnames
|
||||
return success, not_patch_current_hosts, hostnames
|
||||
|
||||
def _check_alarms(self, context, force=False):
|
||||
def _check_alarms(self, context, force=False, alarm_ignore_list=None):
|
||||
"""Checks that no alarms are active"""
|
||||
if alarm_ignore_list is None:
|
||||
alarm_ignore_list = []
|
||||
|
||||
alarms = fmclient(context).alarm.list(include_suppress=True)
|
||||
|
||||
success = True
|
||||
|
@ -103,14 +106,15 @@ class Health(object):
|
|||
affecting = 0
|
||||
# Separate alarms that are mgmt affecting
|
||||
for alarm in alarms:
|
||||
mgmt_affecting = alarm.mgmt_affecting == "True"
|
||||
if not mgmt_affecting:
|
||||
allowed += 1
|
||||
if not force:
|
||||
if alarm.alarm_id not in alarm_ignore_list:
|
||||
mgmt_affecting = alarm.mgmt_affecting == "True"
|
||||
if not mgmt_affecting:
|
||||
allowed += 1
|
||||
if not force:
|
||||
success = False
|
||||
else:
|
||||
affecting += 1
|
||||
success = False
|
||||
else:
|
||||
affecting += 1
|
||||
success = False
|
||||
|
||||
return success, allowed, affecting
|
||||
|
||||
|
@ -245,7 +249,7 @@ class Health(object):
|
|||
|
||||
return True
|
||||
|
||||
def get_system_health(self, context, force=False):
|
||||
def get_system_health(self, context, force=False, alarm_ignore_list=None):
|
||||
"""Returns the general health of the system
|
||||
|
||||
Checks the following:
|
||||
|
@ -257,8 +261,12 @@ class Health(object):
|
|||
- For ceph systems: The storage cluster is healthy
|
||||
- All kubernetes nodes are ready
|
||||
- All kubernetes control plane pods are ready
|
||||
"""
|
||||
|
||||
:param context: request context.
|
||||
:param force: set to true to ignore minor and warning alarms
|
||||
:param alarm_ignore_list: list of alarm ids to ignore when performing
|
||||
a health check
|
||||
"""
|
||||
hosts = self._dbapi.ihost_get_list()
|
||||
output = _('System Health:\n')
|
||||
health_ok = True
|
||||
|
@ -316,7 +324,10 @@ class Health(object):
|
|||
|
||||
health_ok = health_ok and success
|
||||
|
||||
success, allowed, affecting = self._check_alarms(context, force)
|
||||
success, allowed, affecting = self._check_alarms(
|
||||
context,
|
||||
force=force,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
output += _('No alarms: [%s]\n') \
|
||||
% (Health.SUCCESS_MSG if success else Health.FAIL_MSG)
|
||||
if not success:
|
||||
|
@ -345,17 +356,29 @@ class Health(object):
|
|||
|
||||
return health_ok, output
|
||||
|
||||
def get_system_health_upgrade(self, context, force=False):
|
||||
"""Ensures the system is in a valid state for an upgrade"""
|
||||
def get_system_health_upgrade(self,
|
||||
context,
|
||||
force=False,
|
||||
alarm_ignore_list=None):
|
||||
"""
|
||||
Ensures the system is in a valid state for an upgrade
|
||||
|
||||
:param context: request context.
|
||||
:param force: set to true to ignore minor and warning alarms
|
||||
:param alarm_ignore_list: list of alarm ids to ignore when performing
|
||||
a health check
|
||||
"""
|
||||
# Does a general health check then does the following:
|
||||
# A load is imported
|
||||
# The load patch requirements are met
|
||||
# The license is valid for the N+1 load
|
||||
|
||||
system_mode = self._dbapi.isystem_get_one().system_mode
|
||||
simplex = (system_mode == constants.SYSTEM_MODE_SIMPLEX)
|
||||
|
||||
health_ok, output = self.get_system_health(context, force)
|
||||
health_ok, output = self.get_system_health(
|
||||
context,
|
||||
force=force,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
loads = self._dbapi.load_get_list()
|
||||
try:
|
||||
imported_load = utils.get_imported_load(loads)
|
||||
|
@ -412,14 +435,25 @@ class Health(object):
|
|||
|
||||
return health_ok, output
|
||||
|
||||
def get_system_health_kube_upgrade(self, context, force=False):
|
||||
"""Ensures the system is in a valid state for a kubernetes upgrade
|
||||
def get_system_health_kube_upgrade(self,
|
||||
context,
|
||||
force=False,
|
||||
alarm_ignore_list=None):
|
||||
"""
|
||||
Ensures the system is in a valid state for a kubernetes upgrade
|
||||
|
||||
Does a general health check then does the following:
|
||||
- All kubernetes applications are in a stable state
|
||||
"""
|
||||
|
||||
health_ok, output = self.get_system_health(context, force)
|
||||
:param context: request context.
|
||||
:param force: set to true to ignore minor and warning alarms
|
||||
:param alarm_ignore_list: list of alarm ids to ignore when performing
|
||||
a health check
|
||||
"""
|
||||
health_ok, output = self.get_system_health(
|
||||
context,
|
||||
force=force,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
|
||||
success, apps_not_valid = self._check_kube_applications()
|
||||
output += _(
|
||||
|
|
|
@ -9792,7 +9792,8 @@ class ConductorManager(service.PeriodicService):
|
|||
return
|
||||
|
||||
def get_system_health(self, context, force=False, upgrade=False,
|
||||
kube_upgrade=False):
|
||||
kube_upgrade=False,
|
||||
alarm_ignore_list=None):
|
||||
"""
|
||||
Performs a system health check.
|
||||
|
||||
|
@ -9801,18 +9802,26 @@ class ConductorManager(service.PeriodicService):
|
|||
:param upgrade: set to true to perform an upgrade health check
|
||||
:param kube_upgrade: set to true to perform a kubernetes upgrade health
|
||||
check
|
||||
:param alarm_ignore_list: list of alarm ids to ignore when performing
|
||||
a health check
|
||||
"""
|
||||
health_util = health.Health(self.dbapi)
|
||||
|
||||
if upgrade is True:
|
||||
return health_util.get_system_health_upgrade(context=context,
|
||||
force=force)
|
||||
return health_util.get_system_health_upgrade(
|
||||
context=context,
|
||||
force=force,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
elif kube_upgrade is True:
|
||||
return health_util.get_system_health_kube_upgrade(context=context,
|
||||
force=force)
|
||||
return health_util.get_system_health_kube_upgrade(
|
||||
context=context,
|
||||
force=force,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
else:
|
||||
return health_util.get_system_health(context=context,
|
||||
force=force)
|
||||
return health_util.get_system_health(
|
||||
context=context,
|
||||
force=force,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
|
||||
def _get_cinder_address_name(self, network_type):
|
||||
ADDRESS_FORMAT_ARGS = (constants.CONTROLLER_HOSTNAME,
|
||||
|
|
|
@ -1336,7 +1336,7 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
|
|||
success=success))
|
||||
|
||||
def get_system_health(self, context, force=False, upgrade=False,
|
||||
kube_upgrade=False):
|
||||
kube_upgrade=False, alarm_ignore_list=None):
|
||||
"""
|
||||
Performs a system health check.
|
||||
|
||||
|
@ -1345,11 +1345,14 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
|
|||
:param upgrade: set to true to perform an upgrade health check
|
||||
:param kube_upgrade: set to true to perform a kubernetes upgrade health
|
||||
check
|
||||
:param alarm_ignore_list: list of alarm ids to ignore when performing
|
||||
a health check
|
||||
"""
|
||||
return self.call(context,
|
||||
self.make_msg('get_system_health',
|
||||
force=force, upgrade=upgrade,
|
||||
kube_upgrade=kube_upgrade))
|
||||
kube_upgrade=kube_upgrade,
|
||||
alarm_ignore_list=alarm_ignore_list))
|
||||
|
||||
def reserve_ip_for_first_storage_node(self, context):
|
||||
"""
|
||||
|
|
|
@ -54,6 +54,21 @@ FAKE_KUBE_VERSIONS = [
|
|||
]
|
||||
|
||||
|
||||
class FakeAlarm(object):
|
||||
def __init__(self, alarm_id, mgmt_affecting):
|
||||
self.alarm_id = alarm_id
|
||||
self.mgmt_affecting = mgmt_affecting
|
||||
|
||||
|
||||
FAKE_MGMT_AFFECTING_ALARM = FakeAlarm('900.401', "True")
|
||||
FAKE_NON_MGMT_AFFECTING_ALARM = FakeAlarm('900.400', "False")
|
||||
|
||||
|
||||
class FakeFmClient(object):
|
||||
def __init__(self):
|
||||
self.alarm = mock.MagicMock()
|
||||
|
||||
|
||||
class FakeConductorAPI(object):
|
||||
|
||||
def __init__(self):
|
||||
|
@ -62,9 +77,13 @@ class FakeConductorAPI(object):
|
|||
self.service = ConductorManager('test-host', 'test-topic')
|
||||
|
||||
def get_system_health(self, context, force=False, upgrade=False,
|
||||
kube_upgrade=False):
|
||||
return self.service.get_system_health(context, force, upgrade,
|
||||
kube_upgrade)
|
||||
kube_upgrade=False, alarm_ignore_list=None):
|
||||
return self.service.get_system_health(
|
||||
context,
|
||||
force=force,
|
||||
upgrade=upgrade,
|
||||
kube_upgrade=kube_upgrade,
|
||||
alarm_ignore_list=alarm_ignore_list)
|
||||
|
||||
|
||||
class TestKubeUpgrade(base.FunctionalTest):
|
||||
|
@ -167,11 +186,11 @@ class TestKubeUpgrade(base.FunctionalTest):
|
|||
self.mock_patch_query_hosts.return_value = self._patch_current()
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# _check_alarms
|
||||
# _check_alarms returns (Success Boolean, Allow Int, Affecting Int)
|
||||
p = mock.patch.object(health.Health, '_check_alarms')
|
||||
self.mock_check_alarms = p.start()
|
||||
self.mock_check_alarms.return_value = (True, 0, 0)
|
||||
# _check_alarms calls fmclient alarms.list
|
||||
self.fake_fm_client = FakeFmClient()
|
||||
p = mock.patch('sysinv.common.health.fmclient')
|
||||
self.mock_fm_client = p.start()
|
||||
self.mock_fm_client.return_value = self.fake_fm_client
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# _check_kube_nodes_ready
|
||||
|
@ -357,7 +376,8 @@ class TestPostKubeUpgrade(TestKubeUpgrade,
|
|||
"""Test creation of a kube upgrade while there are alarms"""
|
||||
# Test creation of upgrade when system health check fails
|
||||
# 1 alarm, when force is not specified will return False
|
||||
self.mock_check_alarms.return_value = (False, 1, 0)
|
||||
self.fake_fm_client.alarm.list.return_value = \
|
||||
[FAKE_NON_MGMT_AFFECTING_ALARM, ]
|
||||
|
||||
create_dict = dbutils.post_get_test_kube_upgrade(to_version='v1.43.2')
|
||||
result = self.post_json('/kube_upgrade', create_dict,
|
||||
|
@ -375,7 +395,8 @@ class TestPostKubeUpgrade(TestKubeUpgrade,
|
|||
# overridden with force
|
||||
|
||||
# mock a 'non' mgmt_affecting alarm, upgrade can be forced
|
||||
self.mock_check_alarms.return_value = (True, 1, 0)
|
||||
self.fake_fm_client.alarm.list.return_value = \
|
||||
[FAKE_NON_MGMT_AFFECTING_ALARM, ]
|
||||
create_dict = dbutils.post_get_test_kube_upgrade(
|
||||
to_version='v1.43.2')
|
||||
create_dict['force'] = True
|
||||
|
@ -392,7 +413,8 @@ class TestPostKubeUpgrade(TestKubeUpgrade,
|
|||
""" Test kube upgrade create fails when mgmt affecting alarms found"""
|
||||
|
||||
# mock a mgmt_affecting alarm, upgrade cannot be forced
|
||||
self.mock_check_alarms.return_value = (False, 0, 1)
|
||||
self.fake_fm_client.alarm.list.return_value = \
|
||||
[FAKE_MGMT_AFFECTING_ALARM, ]
|
||||
create_dict = dbutils.post_get_test_kube_upgrade(
|
||||
to_version='v1.43.2')
|
||||
create_dict['force'] = True
|
||||
|
@ -406,6 +428,26 @@ class TestPostKubeUpgrade(TestKubeUpgrade,
|
|||
self.assertIn("System is not in a valid state",
|
||||
result.json['error_message'])
|
||||
|
||||
def test_create_system_can_ignore_alarms(self):
|
||||
# Test creation of upgrade when system health check fails but
|
||||
# overridden with force
|
||||
|
||||
# mock a 'non' mgmt_affecting alarm, upgrade can be forced
|
||||
self.fake_fm_client.alarm.list.return_value = \
|
||||
[FAKE_MGMT_AFFECTING_ALARM, ]
|
||||
create_dict = dbutils.post_get_test_kube_upgrade(
|
||||
to_version='v1.43.2')
|
||||
# ignore the alarm_id for the mgmt affecting alarm
|
||||
create_dict['alarm_ignore_list'] = "['900.401',]"
|
||||
result = self.post_json('/kube_upgrade', create_dict,
|
||||
headers={'User-Agent': 'sysinv-test'})
|
||||
|
||||
# Verify that the upgrade has the expected attributes
|
||||
self.assertEqual(result.json['from_version'], 'v1.43.1')
|
||||
self.assertEqual(result.json['to_version'], 'v1.43.2')
|
||||
self.assertEqual(result.json['state'],
|
||||
kubernetes.KUBE_UPGRADE_STARTED)
|
||||
|
||||
def test_create_system_unhealthy_from_bad_apps(self):
|
||||
""" Test kube upgrade create fails when invalid kube app found"""
|
||||
|
||||
|
|
|
@ -25,7 +25,8 @@ class FakeConductorAPI(object):
|
|||
self.start_upgrade = mock.MagicMock()
|
||||
self.get_system_health_return = (True, "System is super healthy")
|
||||
|
||||
def get_system_health(self, context, force=False, upgrade=False):
|
||||
def get_system_health(self, context, force=False, upgrade=False,
|
||||
kube_upgrade=False, alarm_ignore_list=None):
|
||||
if force:
|
||||
return True, "System is healthy because I was forced to say that"
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue