Merge "Add system command and periodic audit to transition state"

This commit is contained in:
Zuul 2023-01-26 16:08:32 +00:00 committed by Gerrit Code Review
commit 7fada9db62
4 changed files with 139 additions and 1 deletions

View File

@ -11,6 +11,8 @@ from cgtsclient import exc
KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES = 'downloading-images' KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES = 'downloading-images'
KUBE_UPGRADE_STATE_UPGRADING_NETWORKING = 'upgrading-networking' KUBE_UPGRADE_STATE_UPGRADING_NETWORKING = 'upgrading-networking'
KUBE_UPGRADE_STATE_COMPLETE = 'upgrade-complete' KUBE_UPGRADE_STATE_COMPLETE = 'upgrade-complete'
KUBE_UPGRADE_STATE_UPGRADING_FIRST_MASTER = 'upgrading-first-master'
KUBE_UPGRADE_STATE_UPGRADING_SECOND_MASTER = 'upgrading-second-master'
def _print_kube_upgrade_show(obj): def _print_kube_upgrade_show(obj):
@ -109,3 +111,34 @@ def do_kube_upgrade_delete(cc, args):
raise exc.CommandError('Kubernetes upgrade not found') raise exc.CommandError('Kubernetes upgrade not found')
print("Kubernetes upgrade deleted") print("Kubernetes upgrade deleted")
def do_kube_upgrade_failed(cc, args):
"""Set kubernetes upgrade status to *-failed"""
kube_upgrade_state_map = {
KUBE_UPGRADE_STATE_DOWNLOADING_IMAGES: "downloading-images-failed",
KUBE_UPGRADE_STATE_UPGRADING_NETWORKING: "upgrading-networking-failed",
KUBE_UPGRADE_STATE_UPGRADING_FIRST_MASTER: "upgrading-first-master-failed",
KUBE_UPGRADE_STATE_UPGRADING_SECOND_MASTER: "upgrading-second-master-failed"
}
kube_upgrades = cc.kube_upgrade.list()
if kube_upgrades:
current_state = getattr(kube_upgrades[0], 'state', '')
if kube_upgrade_state_map.get(current_state):
data = dict()
data['state'] = kube_upgrade_state_map.get(current_state)
patch = []
for (k, v) in data.items():
patch.append({'op': 'replace', 'path': '/' + k, 'value': v})
try:
kube_upgrade = cc.kube_upgrade.update(patch)
except exc.HTTPNotFound:
raise exc.CommandError('Kubernetes upgrade not found')
_print_kube_upgrade_show(kube_upgrade)
else:
print('Kubernetes upgrade is in %s state, cannot be set to failed' % current_state)
else:
print('A kubernetes upgrade is not in progress')

View File

@ -325,7 +325,22 @@ class KubeUpgradeController(rest.RestController):
raise wsme.exc.ClientSideError(_( raise wsme.exc.ClientSideError(_(
"A kubernetes upgrade is not in progress")) "A kubernetes upgrade is not in progress"))
if updates['state'] == kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES: if updates['state'] and updates['state'].split('-')[-1] == 'failed':
if kube_upgrade_obj.state in [
kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES,
kubernetes.KUBE_UPGRADING_FIRST_MASTER,
kubernetes.KUBE_UPGRADING_SECOND_MASTER,
kubernetes.KUBE_UPGRADING_NETWORKING]:
kube_upgrade_obj.state = updates['state']
kube_upgrade_obj.save()
LOG.info("Kubernetes upgrade state is changed to %s" % updates['state'])
return KubeUpgrade.convert_with_links(kube_upgrade_obj)
else:
raise wsme.exc.ClientSideError(_(
"A kubernetes upgrade is in %s state cannot be set to failed"
% kube_upgrade_obj.state))
elif updates['state'] == kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES:
# Make sure upgrade is in the correct state to download images # Make sure upgrade is in the correct state to download images
if kube_upgrade_obj.state not in [ if kube_upgrade_obj.state not in [
kubernetes.KUBE_UPGRADE_STARTED, kubernetes.KUBE_UPGRADE_STARTED,

View File

@ -49,6 +49,7 @@ import uuid
import xml.etree.ElementTree as ElementTree import xml.etree.ElementTree as ElementTree
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime from datetime import datetime
from datetime import timedelta
from distutils.util import strtobool from distutils.util import strtobool
from copy import deepcopy from copy import deepcopy
@ -168,6 +169,7 @@ audit_intervals_opts = [
cfg.IntOpt('storage_backend_failure', default=400), cfg.IntOpt('storage_backend_failure', default=400),
cfg.IntOpt('k8s_application', default=60), cfg.IntOpt('k8s_application', default=60),
cfg.IntOpt('device_image_update', default=300), cfg.IntOpt('device_image_update', default=300),
cfg.IntOpt('kube_upgrade_states', default=1800),
] ]
CONF = cfg.CONF CONF = cfg.CONF
@ -16467,6 +16469,35 @@ class ConductorManager(service.PeriodicService):
LOG.info('Successfully updated %s feature-gates service param.' % section) LOG.info('Successfully updated %s feature-gates service param.' % section)
return 0 return 0
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kube_upgrade_states)
def _audit_kube_upgrade_states(self, context):
# A Kubernetes upgrade state can be stuck in upgrading-* state.
# To avoid this situation we audit the sanity of the states,
# after 2 audit cycles if the states are not changed then set
# the kube_state to *-failed.
kube_upgrade_state_map = dict()
kube_upgrade_state_map["downloading-images"] = "downloading-images-failed"
kube_upgrade_state_map["upgrading-networking"] = "upgrading-networking-failed"
kube_upgrade_state_map["upgrading-first-master"] = "upgrading-first-master-failed"
kube_upgrade_state_map["upgrading-second-master"] = "upgrading-second-master-failed"
try:
kube_upgrade = self.dbapi.kube_upgrade_get_one()
current_state = getattr(kube_upgrade, 'state', '')
if kube_upgrade_state_map.get(current_state):
kube_upgrade_time_stamp = getattr(kube_upgrade, 'updated_at')
if datetime.utcnow() - kube_upgrade_time_stamp >= timedelta(
seconds=CONF.conductor_periodic_task_intervals.kube_upgrade_states * 2):
self.dbapi.kube_upgrade_update(kube_upgrade.uuid,
{'state': kube_upgrade_state_map[current_state]})
LOG.info(
"Kube_upgrade state changed from "
"'%s' to '%s'", current_state,
kube_upgrade_state_map[current_state])
except exception.NotFound:
LOG.debug("A kubernetes upgrade is not in progress")
def device_image_state_sort_key(dev_img_state): def device_image_state_sort_key(dev_img_state):
if dev_img_state.bitstream_type == dconstants.BITSTREAM_TYPE_ROOT_KEY: if dev_img_state.bitstream_type == dconstants.BITSTREAM_TYPE_ROOT_KEY:

View File

@ -843,6 +843,65 @@ class TestPatch(TestKubeUpgrade,
self.assertIn("Invalid state", self.assertIn("Invalid state",
result.json['error_message']) result.json['error_message'])
def test_update_failed_state(self):
# Test updating the state of an upgrade with a failed state
# Create the upgrade
kube_upgrade = dbutils.create_test_kube_upgrade(
from_version='v1.43.1',
to_version='v1.43.2',
state=kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES)
uuid = kube_upgrade.uuid
# Update state
new_state = kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED
result = self.patch_json('/kube_upgrade',
[{'path': '/state',
'value': new_state,
'op': 'replace'}],
headers={'User-Agent': 'sysinv-test'},
expect_errors=True)
# Verify the failure
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(result.status_code, http_client.OK)
self.assertEqual(result.json['state'], new_state)
# see if state was changed in DB
kube_cmd_version = self.dbapi.kube_upgrade_get_one()
self.assertEqual(kube_cmd_version.state, new_state)
# Verify that the upgrade was updated with the new state
result = self.get_json('/kube_upgrade/%s' % uuid)
self.assertEqual(result['from_version'], 'v1.43.1')
self.assertEqual(result['to_version'], 'v1.43.2')
self.assertEqual(result['state'], new_state)
def test_update_state_failed_invalid_state(self):
# Test updating the invalid state of an upgrade with a failed state
# Create the upgrade
dbutils.create_test_kube_upgrade(
from_version='v1.43.1',
to_version='v1.43.2',
state=kubernetes.KUBE_UPGRADE_DOWNLOADED_IMAGES)
# Update state
new_state = kubernetes.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED
result = self.patch_json('/kube_upgrade',
[{'path': '/state',
'value': new_state,
'op': 'replace'}],
headers={'User-Agent': 'sysinv-test'},
expect_errors=True)
# Verify the failure
self.assertEqual(result.content_type, 'application/json')
self.assertEqual(result.status_code, http_client.BAD_REQUEST)
self.assertIn(("A kubernetes upgrade is in downloaded-images state "
"cannot be set to failed"),
result.json['error_message'])
class TestDelete(TestKubeUpgrade): class TestDelete(TestKubeUpgrade):