Merge "Add system command and periodic audit to transition state"

This commit is contained in:
Zuul 2023-01-26 16:08:32 +00:00 committed by Gerrit Code Review
commit 7fada9db62
4 changed files with 139 additions and 1 deletions

View File

@ -11,6 +11,8 @@ from cgtsclient import exc
KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES = 'downloading-images'
KUBE_UPGRADE_STATE_UPGRADING_NETWORKING = 'upgrading-networking'
KUBE_UPGRADE_STATE_COMPLETE = 'upgrade-complete'
KUBE_UPGRADE_STATE_UPGRADING_FIRST_MASTER = 'upgrading-first-master'
KUBE_UPGRADE_STATE_UPGRADING_SECOND_MASTER = 'upgrading-second-master'
def _print_kube_upgrade_show(obj):
@ -109,3 +111,34 @@ def do_kube_upgrade_delete(cc, args):
raise exc.CommandError('Kubernetes upgrade not found')
print("Kubernetes upgrade deleted")
def do_kube_upgrade_failed(cc, args):
"""Set kubernetes upgrade status to *-failed"""
kube_upgrade_state_map = {
KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES: "downloading-images-failed",
KUBE_UPGRADE_STATE_UPGRADING_NETWORKING: "upgrading-networking-failed",
KUBE_UPGRADE_STATE_UPGRADING_FIRST_MASTER: "upgrading-first-master-failed",
KUBE_UPGRADE_STATE_UPGRADING_SECOND_MASTER: "upgrading-second-master-failed"
}
kube_upgrades = cc.kube_upgrade.list()
if kube_upgrades:
current_state = getattr(kube_upgrades[0], 'state', '')
if kube_upgrade_state_map.get(current_state):
data = dict()
data['state'] = kube_upgrade_state_map.get(current_state)
patch = []
for (k, v) in data.items():
patch.append({'op': 'replace', 'path': '/' + k, 'value': v})
try:
kube_upgrade = cc.kube_upgrade.update(patch)
except exc.HTTPNotFound:
raise exc.CommandError('Kubernetes upgrade not found')
_print_kube_upgrade_show(kube_upgrade)
else:
print('Kubernetes upgrade is in %s state, cannot be set to failed' % current_state)
else:
print('A kubernetes upgrade is not in progress')

View File

@ -325,7 +325,22 @@ class KubeUpgradeController(rest.RestController):
raise wsme.exc.ClientSideError(_(
"A kubernetes upgrade is not in progress"))
if updates['state'] == kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES:
if updates['state'] and updates['state'].split('-')[-1] == 'failed':
if kube_upgrade_obj.state in [
kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES,
kubernetes.KUBE_UPGRADING_FIRST_MASTER,
kubernetes.KUBE_UPGRADING_SECOND_MASTER,
kubernetes.KUBE_UPGRADING_NETWORKING]:
kube_upgrade_obj.state = updates['state']
kube_upgrade_obj.save()
LOG.info("Kubernetes upgrade state is changed to %s" % updates['state'])
return KubeUpgrade.convert_with_links(kube_upgrade_obj)
else:
raise wsme.exc.ClientSideError(_(
"A kubernetes upgrade is in %s state cannot be set to failed"
% kube_upgrade_obj.state))
elif updates['state'] == kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES:
# Make sure upgrade is in the correct state to download images
if kube_upgrade_obj.state not in [
kubernetes.KUBE_UPGRADE_STARTED,

View File

@ -49,6 +49,7 @@ import uuid
import xml.etree.ElementTree as ElementTree
from contextlib import contextmanager
from datetime import datetime
from datetime import timedelta
from distutils.util import strtobool
from copy import deepcopy
@ -168,6 +169,7 @@ audit_intervals_opts = [
cfg.IntOpt('storage_backend_failure', default=400),
cfg.IntOpt('k8s_application', default=60),
cfg.IntOpt('device_image_update', default=300),
cfg.IntOpt('kube_upgrade_states', default=1800),
]
CONF = cfg.CONF
@ -16467,6 +16469,35 @@ class ConductorManager(service.PeriodicService):
LOG.info('Successfully updated %s feature-gates service param.' % section)
return 0
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kube_upgrade_states)
def _audit_kube_upgrade_states(self, context):
# A Kubernetes upgrade state can be stuck in upgrading-* state.
# To avoid this situation we audit the sanity of the states,
# after 2 audit cycles if the states are not changed then set
# the kube_state to *-failed.
kube_upgrade_state_map = dict()
kube_upgrade_state_map["downloading-images"] = "downloading-images-failed"
kube_upgrade_state_map["upgrading-networking"] = "upgrading-networking-failed"
kube_upgrade_state_map["upgrading-first-master"] = "upgrading-first-master-failed"
kube_upgrade_state_map["upgrading-second-master"] = "upgrading-second-master-failed"
try:
kube_upgrade = self.dbapi.kube_upgrade_get_one()
current_state = getattr(kube_upgrade, 'state', '')
if kube_upgrade_state_map.get(current_state):
kube_upgrade_time_stamp = getattr(kube_upgrade, 'updated_at')
if datetime.utcnow() - kube_upgrade_time_stamp >= timedelta(
seconds=CONF.conductor_periodic_task_intervals.kube_upgrade_states * 2):
self.dbapi.kube_upgrade_update(kube_upgrade.uuid,
{'state': kube_upgrade_state_map[current_state]})
LOG.info(
"Kube_upgrade state changed from "
"'%s' to '%s'", current_state,
kube_upgrade_state_map[current_state])
except exception.NotFound:
LOG.debug("A kubernetes upgrade is not in progress")
def device_image_state_sort_key(dev_img_state):
if dev_img_state.bitstream_type == dconstants.BITSTREAM_TYPE_ROOT_KEY:

View File

@ -843,6 +843,65 @@ class TestPatch(TestKubeUpgrade,
self.assertIn("Invalid state",
result.json['error_message'])
def test_update_failed_state(self):
# Test updating the state of an upgrade with a failed state
# Create the upgrade
kube_upgrade = dbutils.create_test_kube_upgrade(
from_version='v1.43.1',
to_version='v1.43.2',
state=kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES)
uuid = kube_upgrade.uuid
# Update state
new_state = kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED
result = self.patch_json('/kube_upgrade',
[{'path': '/state',
'value': new_state,
'op': 'replace'}],
headers={'User-Agent': 'sysinv-test'},
expect_errors=True)
# Verify the failure
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(result.status_code, http_client.OK)
self.assertEqual(result.json['state'], new_state)
# see if state was changed in DB
kube_cmd_version = self.dbapi.kube_upgrade_get_one()
self.assertEqual(kube_cmd_version.state, new_state)
# Verify that the upgrade was updated with the new state
result = self.get_json('/kube_upgrade/%s' % uuid)
self.assertEqual(result['from_version'], 'v1.43.1')
self.assertEqual(result['to_version'], 'v1.43.2')
self.assertEqual(result['state'], new_state)
def test_update_state_failed_invalid_state(self):
# Test updating the invalid state of an upgrade with a failed state
# Create the upgrade
dbutils.create_test_kube_upgrade(
from_version='v1.43.1',
to_version='v1.43.2',
state=kubernetes.KUBE_UPGRADE_DOWNLOADED_IMAGES)
# Update state
new_state = kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED
result = self.patch_json('/kube_upgrade',
[{'path': '/state',
'value': new_state,
'op': 'replace'}],
headers={'User-Agent': 'sysinv-test'},
expect_errors=True)
# Verify the failure
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(result.status_code, http_client.BAD_REQUEST)
self.assertIn(("A kubernetes upgrade is in downloaded-images state "
"cannot be set to failed"),
result.json['error_message'])
class TestDelete(TestKubeUpgrade):