Add system command and periodic audit to transition state
system kube-upgrade-* commands can get stuck in upgrading-* state with no way to continue to upgrade. The 'system kube-upgrade-failed' command created to manually set state to *-failed. Created kube-upgrade-failed command to manually set status to *-failed. Created 30 minute periodic task _audit_kube_upgrade_states to automatically change the kube_upograde state to *-failed if the specific state is stuck 'upgrading-*' for more than 1 hour. Updarted kube_upgrade_controller to support state transition to *-failed state. Test Plan: PASS: Manually edit kube_upgrade state to upgrading-* and execute 'system kube-upgrade-failed' and verify the state transition to *-failed PASS: Manually edit kube_upgrade state to upgrading-* after kube_upgrade completion and wait for one hour and verify state transition to *-failed based on updated_at time stamp PASS: Verify the functionality of _audit_kube_upgrade_states and kube-upgrade-failed by building ISO Closes-Bug: 1999405 Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com> Change-Id: I499fb2909f11dc2b240dbf2e03ccfd95f1fd2e62
This commit is contained in:
parent
5036eb895a
commit
029e3eecf5
@ -11,6 +11,8 @@ from cgtsclient import exc
|
|||||||
KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES = 'downloading-images'
|
KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES = 'downloading-images'
|
||||||
KUBE_UPGRADE_STATE_UPGRADING_NETWORKING = 'upgrading-networking'
|
KUBE_UPGRADE_STATE_UPGRADING_NETWORKING = 'upgrading-networking'
|
||||||
KUBE_UPGRADE_STATE_COMPLETE = 'upgrade-complete'
|
KUBE_UPGRADE_STATE_COMPLETE = 'upgrade-complete'
|
||||||
|
KUBE_UPGRADE_STATE_UPGRADING_FIRST_MASTER = 'upgrading-first-master'
|
||||||
|
KUBE_UPGRADE_STATE_UPGRADING_SECOND_MASTER = 'upgrading-second-master'
|
||||||
|
|
||||||
|
|
||||||
def _print_kube_upgrade_show(obj):
|
def _print_kube_upgrade_show(obj):
|
||||||
@ -109,3 +111,34 @@ def do_kube_upgrade_delete(cc, args):
|
|||||||
raise exc.CommandError('Kubernetes upgrade not found')
|
raise exc.CommandError('Kubernetes upgrade not found')
|
||||||
|
|
||||||
print("Kubernetes upgrade deleted")
|
print("Kubernetes upgrade deleted")
|
||||||
|
|
||||||
|
|
||||||
|
def do_kube_upgrade_failed(cc, args):
|
||||||
|
"""Set kubernetes upgrade status to *-failed"""
|
||||||
|
|
||||||
|
kube_upgrade_state_map = {
|
||||||
|
KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES: "downloading-images-failed",
|
||||||
|
KUBE_UPGRADE_STATE_UPGRADING_NETWORKING: "upgrading-networking-failed",
|
||||||
|
KUBE_UPGRADE_STATE_UPGRADING_FIRST_MASTER: "upgrading-first-master-failed",
|
||||||
|
KUBE_UPGRADE_STATE_UPGRADING_SECOND_MASTER: "upgrading-second-master-failed"
|
||||||
|
}
|
||||||
|
|
||||||
|
kube_upgrades = cc.kube_upgrade.list()
|
||||||
|
if kube_upgrades:
|
||||||
|
current_state = getattr(kube_upgrades[0], 'state', '')
|
||||||
|
if kube_upgrade_state_map.get(current_state):
|
||||||
|
data = dict()
|
||||||
|
data['state'] = kube_upgrade_state_map.get(current_state)
|
||||||
|
patch = []
|
||||||
|
for (k, v) in data.items():
|
||||||
|
patch.append({'op': 'replace', 'path': '/' + k, 'value': v})
|
||||||
|
|
||||||
|
try:
|
||||||
|
kube_upgrade = cc.kube_upgrade.update(patch)
|
||||||
|
except exc.HTTPNotFound:
|
||||||
|
raise exc.CommandError('Kubernetes upgrade not found')
|
||||||
|
_print_kube_upgrade_show(kube_upgrade)
|
||||||
|
else:
|
||||||
|
print('Kubernetes upgrade is in %s state, cannot be set to failed' % current_state)
|
||||||
|
else:
|
||||||
|
print('A kubernetes upgrade is not in progress')
|
||||||
|
@ -325,7 +325,22 @@ class KubeUpgradeController(rest.RestController):
|
|||||||
raise wsme.exc.ClientSideError(_(
|
raise wsme.exc.ClientSideError(_(
|
||||||
"A kubernetes upgrade is not in progress"))
|
"A kubernetes upgrade is not in progress"))
|
||||||
|
|
||||||
if updates['state'] == kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES:
|
if updates['state'] and updates['state'].split('-')[-1] == 'failed':
|
||||||
|
if kube_upgrade_obj.state in [
|
||||||
|
kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES,
|
||||||
|
kubernetes.KUBE_UPGRADING_FIRST_MASTER,
|
||||||
|
kubernetes.KUBE_UPGRADING_SECOND_MASTER,
|
||||||
|
kubernetes.KUBE_UPGRADING_NETWORKING]:
|
||||||
|
kube_upgrade_obj.state = updates['state']
|
||||||
|
kube_upgrade_obj.save()
|
||||||
|
LOG.info("Kubernetes upgrade state is changed to %s" % updates['state'])
|
||||||
|
return KubeUpgrade.convert_with_links(kube_upgrade_obj)
|
||||||
|
else:
|
||||||
|
raise wsme.exc.ClientSideError(_(
|
||||||
|
"A kubernetes upgrade is in %s state cannot be set to failed"
|
||||||
|
% kube_upgrade_obj.state))
|
||||||
|
|
||||||
|
elif updates['state'] == kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES:
|
||||||
# Make sure upgrade is in the correct state to download images
|
# Make sure upgrade is in the correct state to download images
|
||||||
if kube_upgrade_obj.state not in [
|
if kube_upgrade_obj.state not in [
|
||||||
kubernetes.KUBE_UPGRADE_STARTED,
|
kubernetes.KUBE_UPGRADE_STARTED,
|
||||||
|
@ -49,6 +49,7 @@ import uuid
|
|||||||
import xml.etree.ElementTree as ElementTree
|
import xml.etree.ElementTree as ElementTree
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from datetime import timedelta
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
@ -168,6 +169,7 @@ audit_intervals_opts = [
|
|||||||
cfg.IntOpt('storage_backend_failure', default=400),
|
cfg.IntOpt('storage_backend_failure', default=400),
|
||||||
cfg.IntOpt('k8s_application', default=60),
|
cfg.IntOpt('k8s_application', default=60),
|
||||||
cfg.IntOpt('device_image_update', default=300),
|
cfg.IntOpt('device_image_update', default=300),
|
||||||
|
cfg.IntOpt('kube_upgrade_states', default=1800),
|
||||||
]
|
]
|
||||||
|
|
||||||
CONF = cfg.CONF
|
CONF = cfg.CONF
|
||||||
@ -16442,6 +16444,35 @@ class ConductorManager(service.PeriodicService):
|
|||||||
LOG.info('Successfully updated %s feature-gates service param.' % section)
|
LOG.info('Successfully updated %s feature-gates service param.' % section)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kube_upgrade_states)
|
||||||
|
def _audit_kube_upgrade_states(self, context):
|
||||||
|
# A Kubernetes upgrade state can be stuck in upgrading-* state.
|
||||||
|
# To avoid this situation we audit the sanity of the states,
|
||||||
|
# after 2 audit cycles if the states are not changed then set
|
||||||
|
# the kube_state to *-failed.
|
||||||
|
|
||||||
|
kube_upgrade_state_map = dict()
|
||||||
|
kube_upgrade_state_map["downloading-images"] = "downloading-images-failed"
|
||||||
|
kube_upgrade_state_map["upgrading-networking"] = "upgrading-networking-failed"
|
||||||
|
kube_upgrade_state_map["upgrading-first-master"] = "upgrading-first-master-failed"
|
||||||
|
kube_upgrade_state_map["upgrading-second-master"] = "upgrading-second-master-failed"
|
||||||
|
|
||||||
|
try:
|
||||||
|
kube_upgrade = self.dbapi.kube_upgrade_get_one()
|
||||||
|
current_state = getattr(kube_upgrade, 'state', '')
|
||||||
|
if kube_upgrade_state_map.get(current_state):
|
||||||
|
kube_upgrade_time_stamp = getattr(kube_upgrade, 'updated_at')
|
||||||
|
if datetime.utcnow() - kube_upgrade_time_stamp >= timedelta(
|
||||||
|
seconds=CONF.conductor_periodic_task_intervals.kube_upgrade_states * 2):
|
||||||
|
self.dbapi.kube_upgrade_update(kube_upgrade.uuid,
|
||||||
|
{'state': kube_upgrade_state_map[current_state]})
|
||||||
|
LOG.info(
|
||||||
|
"Kube_upgrade state changed from "
|
||||||
|
"'%s' to '%s'", current_state,
|
||||||
|
kube_upgrade_state_map[current_state])
|
||||||
|
except exception.NotFound:
|
||||||
|
LOG.debug("A kubernetes upgrade is not in progress")
|
||||||
|
|
||||||
|
|
||||||
def device_image_state_sort_key(dev_img_state):
|
def device_image_state_sort_key(dev_img_state):
|
||||||
if dev_img_state.bitstream_type == dconstants.BITSTREAM_TYPE_ROOT_KEY:
|
if dev_img_state.bitstream_type == dconstants.BITSTREAM_TYPE_ROOT_KEY:
|
||||||
|
@ -843,6 +843,65 @@ class TestPatch(TestKubeUpgrade,
|
|||||||
self.assertIn("Invalid state",
|
self.assertIn("Invalid state",
|
||||||
result.json['error_message'])
|
result.json['error_message'])
|
||||||
|
|
||||||
|
def test_update_failed_state(self):
|
||||||
|
# Test updating the state of an upgrade with a failed state
|
||||||
|
|
||||||
|
# Create the upgrade
|
||||||
|
kube_upgrade = dbutils.create_test_kube_upgrade(
|
||||||
|
from_version='v1.43.1',
|
||||||
|
to_version='v1.43.2',
|
||||||
|
state=kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES)
|
||||||
|
uuid = kube_upgrade.uuid
|
||||||
|
|
||||||
|
# Update state
|
||||||
|
new_state = kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED
|
||||||
|
result = self.patch_json('/kube_upgrade',
|
||||||
|
[{'path': '/state',
|
||||||
|
'value': new_state,
|
||||||
|
'op': 'replace'}],
|
||||||
|
headers={'User-Agent': 'sysinv-test'},
|
||||||
|
expect_errors=True)
|
||||||
|
|
||||||
|
# Verify the failure
|
||||||
|
self.assertEqual(result.content_type, 'application/json')
|
||||||
|
self.assertEqual(result.status_code, http_client.OK)
|
||||||
|
self.assertEqual(result.json['state'], new_state)
|
||||||
|
|
||||||
|
# see if state was changed in DB
|
||||||
|
kube_cmd_version = self.dbapi.kube_upgrade_get_one()
|
||||||
|
self.assertEqual(kube_cmd_version.state, new_state)
|
||||||
|
|
||||||
|
# Verify that the upgrade was updated with the new state
|
||||||
|
result = self.get_json('/kube_upgrade/%s' % uuid)
|
||||||
|
self.assertEqual(result['from_version'], 'v1.43.1')
|
||||||
|
self.assertEqual(result['to_version'], 'v1.43.2')
|
||||||
|
self.assertEqual(result['state'], new_state)
|
||||||
|
|
||||||
|
def test_update_state_failed_invalid_state(self):
|
||||||
|
# Test updating the invalid state of an upgrade with a failed state
|
||||||
|
|
||||||
|
# Create the upgrade
|
||||||
|
dbutils.create_test_kube_upgrade(
|
||||||
|
from_version='v1.43.1',
|
||||||
|
to_version='v1.43.2',
|
||||||
|
state=kubernetes.KUBE_UPGRADE_DOWNLOADED_IMAGES)
|
||||||
|
|
||||||
|
# Update state
|
||||||
|
new_state = kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED
|
||||||
|
result = self.patch_json('/kube_upgrade',
|
||||||
|
[{'path': '/state',
|
||||||
|
'value': new_state,
|
||||||
|
'op': 'replace'}],
|
||||||
|
headers={'User-Agent': 'sysinv-test'},
|
||||||
|
expect_errors=True)
|
||||||
|
|
||||||
|
# Verify the failure
|
||||||
|
self.assertEqual(result.content_type, 'application/json')
|
||||||
|
self.assertEqual(result.status_code, http_client.BAD_REQUEST)
|
||||||
|
self.assertIn(("A kubernetes upgrade is in downloaded-images state "
|
||||||
|
"cannot be set to failed"),
|
||||||
|
result.json['error_message'])
|
||||||
|
|
||||||
|
|
||||||
class TestDelete(TestKubeUpgrade):
|
class TestDelete(TestKubeUpgrade):
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user