Improving kube rootca orchestration recovery

When hosts are in 'updating' state, the kube rootca
update orchestration should consider those hosts to be
in-progress, rather than attempt the same command, which
immediately fails the strategy.

A second scenario is that if a host is rebooted while
initiating a kube rootca updating action, it will not complete
and will block orchestration. The only way to resume is
to abort that rootca update.

The compatability changes in this review are:
- The removal of the 'force' option for rootca 'patch' REST
API calls pertaining to the 'complete' and 'abort' operations.

The logic changes in this review are:

- If a host is already 'updated' when the step is invoked,
it will be considered successful.

- If a host is 'updating', orchestration will wait for it
to complete. If the host is stalled, the step will fail based
on the step timeout. If the host was updated longer than a
specific duration, it will immediately be considered timed out.

- If a host step fails, the rootca update is aborted,
not just the orchestration.

Known limitation:
VIM orchestration updates the hosts sorted by their name.
If a system has been manually updated, and then VIM orchestration
is used to resume it, it will fail if the hosts cannot be updated
in the expected order.

TEST PLAN:
 PASS: Run a kube-rootca-update orchestration (2 controllers)
 PASS: Abort a kube-rootca-update orchestration after first step and
  verify update is not aborted.
 PASS: Create a valid kube-rootca-update orchestration in a system
   with an existing 'started' update.
 PASS: Reboot second host before it is sent its 'updating' request.
   The host state moves to 'updating' but never completes.
   Verify orchestration fails (step times out after 10 minutes) and
   the update is aborted.
 PASS: Run kube-rootca-update orchestration over an aborted update.
   This will create a new update which can be run.
 PASS: Abort orchestration during the 'pods' step.
   This completes the pods step, and aborts the orchestration but will
   not abort the update.
 PASS: Manually start an update and generate a cert.
   Create an orchestration.
   Manually update controller-1 and let it complete.
   Start the orchestration.
   Orchestration should succeed (skips cleanly over controller-1)

Story: 2009665
Task: 44058
Depends-On: https://review.opendev.org/c/starlingx/config/+/819020
Signed-off-by: albailey <Al.Bailey@windriver.com>
Change-Id: If5da6fb7648a6c07438b41449e7acc724f71e0f7
This commit is contained in:
albailey 2021-11-03 19:01:03 -05:00
parent aba5636096
commit 60738b4cdb
8 changed files with 293 additions and 24 deletions

View File

@ -3,6 +3,8 @@
#
# SPDX-License-Identifier: Apache-2.0
#
import datetime
import iso8601
import json
from six.moves import http_client as httplib
@ -26,6 +28,9 @@ from nfv_plugins.nfvi_plugins.openstack.objects import OPENSTACK_SERVICE
DLOG = debug.debug_get_logger('nfv_plugins.nfvi_plugins.infrastructure_api')
# Allow 600 seconds to determine if a kube rootca host update has stalled
MAX_KUBE_ROOTCA_HOST_UPDATE_DURATION = 600
def host_state(host_uuid, host_name, host_personality, host_sub_functions,
host_admin_state, host_oper_state, host_avail_status,
@ -174,6 +179,16 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
(self._openstack_directory.get_service_info(
OPENSTACK_SERVICE.NOVA) is not None))
def set_response_error(self, response, activity, issue="did not complete"):
"""Utility method to consistently log and report an API error activity
:param str activity: the API action that failed
:param dict response: The response dict to store the error 'reason'
"""
error_string = "{} {}.".format(activity, issue)
DLOG.error(error_string)
response['reason'] = error_string
def get_datanetworks(self, future, host_uuid, callback):
"""
Get host data networks from the plugin
@ -839,6 +854,49 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.send(response)
callback.close()
def kube_rootca_update_abort(self, future, callback):
"""Invokes sysinv kube-rootca-update-abort"""
response = dict()
response['completed'] = False
response['reason'] = ''
action_type = 'kube-rootca-update-abort'
sysinv_method = sysinv.kube_rootca_update_abort
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
if self._platform_token is None or \
self._platform_token.is_expired():
future.work(openstack.get_token, self._platform_directory)
future.result = (yield)
if not future.result.is_complete() or \
future.result.data is None:
self.set_response_error(response, "Openstack get-token")
return
self._platform_token = future.result.data
future.work(sysinv_method, self._platform_token)
future.result = (yield)
if not future.result.is_complete():
self.set_response_error(response, action_type)
return
api_data = future.result.data
result_obj = nfvi.objects.v1.KubeRootcaUpdate(api_data['state'])
response['result-data'] = result_obj
response['completed'] = True
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED
if self._platform_token is not None:
self._platform_token.set_expired()
else:
DLOG.exception("Caught API exception while trying %s. error=%s"
% (action_type, e))
response['reason'] = e.http_response_reason
except Exception as e:
DLOG.exception("Caught exception while trying %s. error=%s"
% (action_type, e))
finally:
callback.send(response)
callback.close()
def kube_rootca_update_complete(self, future, callback):
"""Invokes sysinv kube-rootca-update-complete"""
response = dict()
@ -973,7 +1031,11 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.close()
def kube_rootca_update_host(self, future, host_uuid, host_name,
update_type, callback):
update_type,
in_progress_state,
completed_state,
failed_state,
callback):
"""
Kube Root CA Update a host for a particular update_type (phase)
"""
@ -992,19 +1054,80 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
future.result = (yield)
if not future.result.is_complete() or \
future.result.data is None:
DLOG.error("OpenStack get-token did not complete.")
self.set_response_error(response, "Openstack get-token")
return
self._platform_token = future.result.data
future.work(sysinv_method,
self._platform_token,
host_uuid,
update_type)
# This is wasteful but we need to check if host updating/updated,
# so we can skip or wait for it, rather than issue an action.
# todo(abailey): Update sysinv API to support a single host query
# todo(abailey): update vim schema for a table for these entries
# todo(abailey): this should be removed and put in directory once
# schema is updated
future.work(sysinv.get_kube_rootca_host_update_list,
self._platform_token)
future.result = (yield)
if not future.result.is_complete():
DLOG.error("%s did not complete." % action_type)
self.set_response_error(response,
"SysInv get-kube-rootca-host-updates")
return
api_data = future.result.data
result_obj = nfvi.objects.v1.KubeRootcaUpdate(api_data['state'])
sysinv_result_key = "kube_host_updates"
results_list = future.result.data[sysinv_result_key]
results_obj = self._extract_kube_rootca_host_updates(results_list)
# walk the list and find the object for this host
# Do the match based on hostname since the id will not match
host_state = None
for host_obj in results_obj:
if host_obj.hostname == host_name:
host_state = host_obj.state
result_obj = host_obj
break
DLOG.info("Existing Host state for %s is %s"
% (host_name, host_state))
if host_state == in_progress_state:
# Do not re-invoke the action. It is already in progress
# the host_obj in the loop above can be returned as result_obj
# the operation may have stalled and the kube rootca code in
# sysinv does not have code to detect this, so we check
# last_updated and abort if too much time spent in-progress
# the updated_at field must exist is we are in-progress
updated_at = iso8601.parse_date(result_obj['updated_at'])
now = iso8601.parse_date(datetime.datetime.utcnow().isoformat())
delta = (now - updated_at).total_seconds()
if delta > MAX_KUBE_ROOTCA_HOST_UPDATE_DURATION:
# still in progress after this amount of time, it is likely
# a broken state. Need to abort.
self.set_response_error(response, action_type,
issue="timed out (in-progress)")
return
pass
elif host_state == completed_state:
# Do not re-invoke the action. It is already completed
# the host_obj in the loop above can be returned as result_obj
pass
else:
# Every other state (including failed) means we invoke API
future.work(sysinv_method,
self._platform_token,
host_uuid,
update_type)
future.result = (yield)
if not future.result.is_complete():
self.set_response_error(response, action_type)
return
api_data = future.result.data
result_obj = nfvi.objects.v1.KubeRootcaHostUpdate(
api_data['id'],
api_data['hostname'],
api_data['target_rootca_cert'],
api_data['effective_rootca_cert'],
api_data['state'],
api_data['created_at'],
api_data['updated_at']
)
# result_obj is the host_obj from the loop, or the API result
response['result-data'] = result_obj
response['completed'] = True
except exceptions.OpenStackRestAPIException as e:
@ -1125,7 +1248,9 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
host_data['hostname'],
host_data['target_rootca_cert'],
host_data['effective_rootca_cert'],
host_data['state']
host_data['state'],
host_data['created_at'],
host_data['updated_at']
)
)
return result_list

View File

@ -301,6 +301,21 @@ def kube_rootca_update_upload_cert(token, cert_file):
return response
def kube_rootca_update_abort(token):
"""
Ask System Inventory to kube rootca update abort
"""
api_cmd_payload = list()
state_data = dict()
state_data['path'] = "/state"
state_data['value'] = 'update-aborted'
state_data['op'] = "replace"
api_cmd_payload.append(state_data)
return _api_patch_dict(token,
KUBE_ROOTCA_UPDATE_ENDPOINT,
api_cmd_payload)
def kube_rootca_update_complete(token):
"""
Ask System Inventory to kube rootca update complete
@ -312,7 +327,7 @@ def kube_rootca_update_complete(token):
state_data['op'] = "replace"
api_cmd_payload.append(state_data)
return _api_patch_dict(token,
KUBE_ROOTCA_UPDATE_ENDPOINT + "?force=True",
KUBE_ROOTCA_UPDATE_ENDPOINT,
api_cmd_payload)

View File

@ -990,18 +990,25 @@ class HostDirector(object):
sw_mgmt_director = directors.get_sw_mgmt_director()
sw_mgmt_director.kube_host_rootca_update_failed(host)
def _nfvi_kube_rootca_update_host(self, host_uuid, host_name, update_type):
def _nfvi_kube_rootca_update_host(self, host_uuid, host_name, update_type,
in_progress_state, completed_state,
failed_state):
"""NFVI Kube Root CA Update - Host"""
nfvi.nfvi_kube_rootca_update_host(
host_uuid,
host_name,
update_type,
in_progress_state,
completed_state,
failed_state,
self._nfvi_kube_rootca_update_host_callback())
def kube_rootca_update_hosts_by_type(self, host_names, update_type):
def kube_rootca_update_hosts_by_type(self, host_names, update_type,
in_progress_state, completed_state,
failed_state):
"""Utility method for Kube Root CA Update - Host"""
DLOG.info("Kube RootCA Update %s for hosts: %s" % (update_type,
host_names))
DLOG.info("Kube RootCA Update %s (%s) for hosts: %s"
% (update_type, in_progress_state, host_names))
host_operation = Operation(OPERATION_TYPE.KUBE_ROOTCA_UPDATE_HOSTS)
if self._host_operation is not None:
DLOG.debug("Canceling previous host operation %s, before "
@ -1022,7 +1029,10 @@ class HostDirector(object):
OPERATION_STATE.INPROGRESS)
self._nfvi_kube_rootca_update_host(host.uuid,
host.name,
update_type)
update_type,
in_progress_state,
completed_state,
failed_state)
if host_operation.is_inprogress():
self._host_operation = host_operation
return host_operation

View File

@ -114,6 +114,7 @@ from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_host_device_image_upda
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_host_device_image_update_abort # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_upgrade_control_plane # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_upgrade_kubelet # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_rootca_update_abort # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_rootca_update_complete # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_rootca_update_generate_cert # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_rootca_update_host # noqa: F401

View File

@ -126,6 +126,14 @@ def nfvi_kube_host_upgrade_kubelet(host_uuid, host_name, force, callback):
return cmd_id
def nfvi_kube_rootca_update_abort(callback):
"""Kube RootCA Update - Abort"""
cmd_id = _infrastructure_plugin.invoke_plugin(
'kube_rootca_update_abort',
callback=callback)
return cmd_id
def nfvi_kube_rootca_update_complete(callback):
"""Kube RootCA Update - Complete"""
cmd_id = _infrastructure_plugin.invoke_plugin(
@ -144,16 +152,23 @@ def nfvi_kube_rootca_update_generate_cert(expiry_date, subject, callback):
return cmd_id
def nfvi_kube_rootca_update_host(host_uuid, host_name, update_type, callback):
def nfvi_kube_rootca_update_host(host_uuid, host_name, update_type,
in_progress_state, completed_state,
failed_state, callback):
"""Kube RootCA Update - Host"""
cmd_id = _infrastructure_plugin.invoke_plugin('kube_rootca_update_host',
host_uuid,
host_name,
update_type,
in_progress_state,
completed_state,
failed_state,
callback=callback)
return cmd_id
# todo(abailey): Similar in-progress/complete/failed handling as used for hosts
# would protect stalled pod states from blocking orchestration
def nfvi_kube_rootca_update_pods(phase, callback):
"""Kube RootCA Update - Pods for a particular phase"""
cmd_id = _infrastructure_plugin.invoke_plugin(

View File

@ -61,16 +61,20 @@ class KubeRootcaHostUpdate(ObjectData):
NFVI Kube RootCA Host Update Object
"""
def __init__(self,
host_id,
host_id, # this ID is not the same as the sysinv ID
hostname,
target_rootca_cert,
effective_rootca_cert,
state):
state,
created_at,
updated_at):
super(KubeRootcaHostUpdate, self).__init__('1.0.0')
self.update(
dict(host_id=host_id,
hostname=hostname,
target_rootca_cert=target_rootca_cert,
effective_rootca_cert=effective_rootca_cert,
state=state)
state=state,
created_at=created_at,
updated_at=updated_at)
)

View File

@ -646,7 +646,9 @@ class QueryKubeRootcaHostUpdatesMixin(QueryMixinBase):
list_data['hostname'],
list_data['target_rootca_cert'],
list_data['effective_rootca_cert'],
list_data['state'])
list_data['state'],
list_data['created_at'],
list_data['updated_at'])
mixin_data.append(new_object)
self._nfvi_kube_rootca_host_update_list = mixin_data

View File

@ -56,6 +56,7 @@ class StrategyStepNames(Constants):
DISABLE_HOST_SERVICES = Constant('disable-host-services')
ENABLE_HOST_SERVICES = Constant('enable-host-services')
# kube rootca update steps
KUBE_ROOTCA_UPDATE_ABORT = Constant('kube-rootca-update-abort')
KUBE_ROOTCA_UPDATE_COMPLETE = Constant('kube-rootca-update-complete')
KUBE_ROOTCA_UPDATE_GENERATE_CERT = Constant('kube-rootca-update-generate-cert')
KUBE_ROOTCA_UPDATE_HOST_TRUSTBOTHCAS = Constant('kube-rootca-update-host-trustbothcas')
@ -2868,10 +2869,14 @@ class AbstractKubeRootcaUpdateStep(AbstractStrategyStep):
"""
Abstract Step class for processing changes to kube root ca update
"""
# todo(abailey): The hosts and pod steps have in_progress_state and
# fail_state but the majority of the transition steps do not so there
# should be an abstract class defined above this which adds those.
def __init__(self,
step_name,
success_state,
in_progress_state,
fail_state,
timeout_in_secs=600):
super(AbstractKubeRootcaUpdateStep, self).__init__(step_name,
@ -2879,8 +2884,9 @@ class AbstractKubeRootcaUpdateStep(AbstractStrategyStep):
# _wait_time and _query_inprogress are NOT persisted
self._wait_time = 0
self._query_inprogress = False
# success and fail state validators are persisted
# success, in-progress and fail state validators are persisted
self._success_state = success_state
self._in_progress_state = in_progress_state
self._fail_state = fail_state
@coroutine
@ -2968,6 +2974,7 @@ class AbstractKubeRootcaUpdateStep(AbstractStrategyStep):
self._query_inprogress = False
# validation states are persisted
self._success_state = data['success_state']
self._in_progress_state = data['in_progress_state']
self._fail_state = data['fail_state']
return self
@ -2977,6 +2984,7 @@ class AbstractKubeRootcaUpdateStep(AbstractStrategyStep):
"""
data = super(AbstractKubeRootcaUpdateStep, self).as_dict()
data['success_state'] = self._success_state
data['in_progress_state'] = self._in_progress_state
data['fail_state'] = self._fail_state
return data
@ -2986,12 +2994,14 @@ class AbstractKubeRootcaUpdateHostStep(AbstractKubeRootcaUpdateStep):
hosts,
step_name,
success_state,
in_progress_state,
fail_state,
update_type,
timeout_in_secs=600):
super(AbstractKubeRootcaUpdateHostStep, self).__init__(
step_name,
success_state,
in_progress_state,
fail_state,
timeout_in_secs=timeout_in_secs)
self._hosts = hosts
@ -3053,6 +3063,8 @@ class AbstractKubeRootcaUpdateHostStep(AbstractKubeRootcaUpdateStep):
elif k_host.state == self._fail_state:
# we should not have gotten here
fail_count += 1
# todo(abailey): We can add an in-progress check here
# and treat as failed if the time is too long
host_count += 1
# break out of inner loop, since uuids match
break
@ -3096,7 +3108,7 @@ class AbstractKubeRootcaUpdateHostStep(AbstractKubeRootcaUpdateStep):
now_ms = timers.get_monotonic_timestamp_in_ms()
secs_expired = (now_ms - self._wait_time) // 1000
# Wait at least 60 seconds before checking upgrade for first time
# Wait at least 60 seconds before checking update for first time
if 60 <= secs_expired and not self._query_inprogress:
self._query_inprogress = True
nfvi.nfvi_get_kube_rootca_host_update_list(
@ -3114,7 +3126,10 @@ class AbstractKubeRootcaUpdateHostStep(AbstractKubeRootcaUpdateStep):
host_director = directors.get_host_director()
operation = host_director.kube_rootca_update_hosts_by_type(
self._host_names,
self._update_type)
self._update_type,
self._in_progress_state,
self._success_state,
self._fail_state)
if operation.is_inprogress():
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
@ -3133,9 +3148,16 @@ class KubeRootcaUpdateHostTrustBothcasStep(AbstractKubeRootcaUpdateHostStep):
hosts,
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_HOST_TRUSTBOTHCAS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATED_HOST_TRUSTBOTHCAS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_HOST_TRUSTBOTHCAS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_HOST_TRUSTBOTHCAS_FAILED,
KUBE_CERT_UPDATE_TRUSTBOTHCAS)
def abort(self):
"""
Returns the abort step related to this step
"""
return [KubeRootcaUpdateAbortStep()]
class KubeRootcaUpdateHostUpdateCertsStep(AbstractKubeRootcaUpdateHostStep):
"""Kube RootCA Update - Host - updateCerts"""
@ -3146,9 +3168,16 @@ class KubeRootcaUpdateHostUpdateCertsStep(AbstractKubeRootcaUpdateHostStep):
hosts,
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_HOST_UPDATECERTS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATED_HOST_UPDATECERTS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_HOST_UPDATECERTS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_HOST_UPDATECERTS_FAILED,
KUBE_CERT_UPDATE_UPDATECERTS)
def abort(self):
"""
Returns the abort step related to this step
"""
return [KubeRootcaUpdateAbortStep()]
class KubeRootcaUpdateHostTrustNewcaStep(AbstractKubeRootcaUpdateHostStep):
"""Kube RootCA Update - Host - trustNewCA"""
@ -3159,9 +3188,16 @@ class KubeRootcaUpdateHostTrustNewcaStep(AbstractKubeRootcaUpdateHostStep):
hosts,
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_HOST_TRUSTNEWCA,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATED_HOST_TRUSTNEWCA,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_HOST_TRUSTNEWCA,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_HOST_TRUSTNEWCA_FAILED,
KUBE_CERT_UPDATE_TRUSTNEWCA)
def abort(self):
"""
Returns the abort step related to this step
"""
return [KubeRootcaUpdateAbortStep()]
class AbstractKubeRootcaUpdatePodsStep(AbstractKubeRootcaUpdateStep):
"""
@ -3171,12 +3207,14 @@ class AbstractKubeRootcaUpdatePodsStep(AbstractKubeRootcaUpdateStep):
def __init__(self,
step_name,
success_state,
in_progress_state,
fail_state,
phase,
timeout_in_secs=600):
super(AbstractKubeRootcaUpdatePodsStep, self).__init__(
step_name,
success_state,
in_progress_state,
fail_state,
timeout_in_secs=timeout_in_secs)
self._phase = phase
@ -3228,6 +3266,7 @@ class KubeRootcaUpdatePodsTrustBothcasStep(AbstractKubeRootcaUpdatePodsStep):
super(KubeRootcaUpdatePodsTrustBothcasStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_PODS_TRUSTBOTHCAS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATED_PODS_TRUSTBOTHCAS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_PODS_TRUSTBOTHCAS,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_PODS_TRUSTBOTHCAS_FAILED,
KUBE_CERT_UPDATE_TRUSTBOTHCAS) # phase
@ -3240,6 +3279,7 @@ class KubeRootcaUpdatePodsTrustNewcaStep(AbstractKubeRootcaUpdatePodsStep):
super(KubeRootcaUpdatePodsTrustNewcaStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_PODS_TRUSTNEWCA,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATED_PODS_TRUSTNEWCA,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_PODS_TRUSTNEWCA,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATING_PODS_TRUSTNEWCA_FAILED,
KUBE_CERT_UPDATE_TRUSTNEWCA) # phase
@ -3252,6 +3292,7 @@ class KubeRootcaUpdateStartStep(AbstractKubeRootcaUpdateStep):
super(KubeRootcaUpdateStartStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_START,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATE_STARTED,
None, # sysinv API does not have in-progress state for this action
None) # there is no failure state if 'start' fails
@coroutine
@ -3282,6 +3323,57 @@ class KubeRootcaUpdateStartStep(AbstractKubeRootcaUpdateStep):
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
class KubeRootcaUpdateAbortStep(AbstractKubeRootcaUpdateStep):
"""Kube RootCA Update - Abort - Strategy Step"""
def __init__(self):
from nfv_vim import nfvi
super(KubeRootcaUpdateAbortStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_ABORT,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATE_ABORTED,
None, # sysinv API does not have in-progress state for this action
None) # there is no failure state if 'abort' fails
@coroutine
def _response_callback(self):
"""Kube RootCA Update - Abort - Callback"""
response = (yield)
DLOG.debug("%s callback response=%s." % (self._name, response))
if response['completed']:
if self.strategy is not None:
self.strategy.nfvi_kube_rootca_update = response['result-data']
# Calling abort on an aborted update returns a failure so we check
# the rootca update object on success AND failure.
if self.strategy is None:
# return success if there is no more strategy
self.stage.step_complete(strategy.STRATEGY_STEP_RESULT.SUCCESS,
"no strategy")
elif self.strategy.nfvi_kube_rootca_update is None:
# return success if there is no more update
self.stage.step_complete(strategy.STRATEGY_STEP_RESULT.SUCCESS,
"no update")
elif self.strategy.nfvi_kube_rootca_update.state == self._success_state:
self.stage.step_complete(strategy.STRATEGY_STEP_RESULT.SUCCESS,
"")
else:
# If the state does not match, the abort failed.
result = strategy.STRATEGY_STEP_RESULT.FAILED
self.stage.step_complete(result,
"Unexpected state: %s"
% self.strategy.nfvi_kube_rootca_update.state)
def apply(self):
"""Kube RootCA Update - Abort"""
from nfv_vim import nfvi
nfvi.nfvi_kube_rootca_update_abort(self._response_callback())
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
class KubeRootcaUpdateCompleteStep(AbstractKubeRootcaUpdateStep):
"""Kube RootCA Update - Complete - Strategy Step"""
@ -3290,6 +3382,7 @@ class KubeRootcaUpdateCompleteStep(AbstractKubeRootcaUpdateStep):
super(KubeRootcaUpdateCompleteStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_COMPLETE,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATE_COMPLETED,
None, # sysinv API does not have in-progress state for this action
None) # there is no failure state if 'complete' fails
def handle_event(self, event, event_data=None):
@ -3345,6 +3438,7 @@ class KubeRootcaUpdateGenerateCertStep(AbstractKubeRootcaUpdateStep):
super(KubeRootcaUpdateGenerateCertStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_GENERATE_CERT,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATE_CERT_GENERATED,
None, # sysinv API does not have in-progress state for this action
None, # sysinv API does not have a FAILED state for this action
timeout_in_secs=300) # set a five minute timeout to detect failure
self._expiry_date = expiry_date
@ -3403,6 +3497,7 @@ class KubeRootcaUpdateUploadCertStep(AbstractKubeRootcaUpdateStep):
super(KubeRootcaUpdateUploadCertStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_UPLOAD_CERT,
nfvi.objects.v1.KUBE_ROOTCA_UPDATE_STATE.KUBE_ROOTCA_UPDATE_CERT_UPLOADED,
None, # sysinv API does not have in-progress state for this action
None, # sysinv API does not have a FAILED state for this action
timeout_in_secs=300)
self._cert_file = cert_file
@ -4135,6 +4230,8 @@ def strategy_step_rebuild_from_dict(data):
rebuild_map = {
STRATEGY_STEP_NAME.APPLY_PATCHES: ApplySwPatchesStep,
# kube rootca update steps
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_ABORT:
KubeRootcaUpdateAbortStep,
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_COMPLETE:
KubeRootcaUpdateCompleteStep,
STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_GENERATE_CERT: