Add cluster upgrade to the API
To enable the rolling upgrade ability of Kubernetes Cluster, this patch is proposing a new API /upgrade to support upgrade the base operating system of nodes and the version of Kubernetes, even add-ons running on the k8s cluster: POST <ClusterID>/actions/upgrade And the post body will be: { "cluster_template": 'dd9cc5ed-3a2b-11e9-9233-fa163e46bcc2', "max_batch_size": 1, "nodegroup": "production_group" } Co-Authored-By: Feilong Wang <flwang@catalyst.net.nz> Task: 30168 Story: 2002210 Change-Id: Ia168877778aa0d473383eb06b1c8a16dc06b0576
This commit is contained in:
parent
49e5f17cb5
commit
9b1bd5da54
@ -33,6 +33,7 @@ created and managed by Magnum to support the COE's.
|
||||
#. `Notification`_
|
||||
#. `Container Monitoring`_
|
||||
#. `Kubernetes External Load Balancer`_
|
||||
#. `Rolling Upgrade`_
|
||||
|
||||
Overview
|
||||
========
|
||||
@ -3230,3 +3231,8 @@ Kubernetes External Load Balancer
|
||||
=================================
|
||||
|
||||
.. include:: kubernetes-load-balancer.rst
|
||||
|
||||
Rolling Upgrade
|
||||
===============
|
||||
|
||||
.. include:: rolling-upgrade.rst
|
||||
|
30
doc/source/user/rolling-upgrade.rst
Normal file
30
doc/source/user/rolling-upgrade.rst
Normal file
@ -0,0 +1,30 @@
|
||||
Rolling upgrade is one of most important features user want to see for a
|
||||
managed Kubernetes service. And in Magnum, we're thinking more deeper to
|
||||
provide better user experience.
|
||||
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
#!/bin/bash -x
|
||||
|
||||
IP="192.168.122.1"
|
||||
CLUSTER="797b39e1-fac2-48d3-8377-d6e6cc443d39"
|
||||
CT="e32c8cf7-394b-45e6-a17e-4fe6a30ad64b"
|
||||
|
||||
# Upgrade curl
|
||||
req_body=$(cat << EOF
|
||||
{
|
||||
"max_batch_size": 1,
|
||||
"nodegroup": "master",
|
||||
"cluster_template": "${CT}"
|
||||
}
|
||||
EOF
|
||||
)
|
||||
USER_TOKEN=$(openstack token issue -c id -f value)
|
||||
curl -g -i -X PATCH https://${IP}:9511/v1/clusters/${CLUSTER}/actions/upgrade \
|
||||
-H "OpenStack-API-Version: container-infra latest" \
|
||||
-H "X-Auth-Token: $USER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Accept: application/json" \
|
||||
-H "User-Agent: None" \
|
||||
-d "$req_body"
|
@ -55,6 +55,22 @@ class ClusterResizeRequest(base.APIBase):
|
||||
"""Group of nodes to be uprgaded (master or node)"""
|
||||
|
||||
|
||||
class ClusterUpgradeRequest(base.APIBase):
|
||||
"""API object for handling upgrade requests.
|
||||
|
||||
This class enforces type checking and value constraints.
|
||||
"""
|
||||
|
||||
max_batch_size = wtypes.IntegerType(minimum=1)
|
||||
"""Max batch size of nodes to be upraded in parallel"""
|
||||
|
||||
nodegroup = wtypes.StringType(min_length=1, max_length=255)
|
||||
"""Group of nodes to be uprgaded (master or node)"""
|
||||
|
||||
cluster_template = wtypes.StringType(min_length=1, max_length=255)
|
||||
"""The cluster_template UUID"""
|
||||
|
||||
|
||||
class ActionsController(base.Controller):
|
||||
"""REST controller for cluster actions."""
|
||||
def __init__(self):
|
||||
@ -62,6 +78,7 @@ class ActionsController(base.Controller):
|
||||
|
||||
_custom_actions = {
|
||||
'resize': ['POST'],
|
||||
'upgrade': ['POST']
|
||||
}
|
||||
|
||||
@base.Controller.api_version("1.7")
|
||||
@ -108,3 +125,35 @@ class ActionsController(base.Controller):
|
||||
cluster_resize_req.nodes_to_remove,
|
||||
nodegroup)
|
||||
return ClusterID(cluster.uuid)
|
||||
|
||||
@base.Controller.api_version("1.8")
|
||||
@expose.expose(None, types.uuid_or_name,
|
||||
body=ClusterUpgradeRequest, status_code=202)
|
||||
def upgrade(self, cluster_ident, cluster_upgrade_req):
|
||||
"""Upgrade a cluster.
|
||||
|
||||
:param cluster_ident: UUID of a cluster or logical name of the cluster.
|
||||
"""
|
||||
context = pecan.request.context
|
||||
cluster = api_utils.get_resource('Cluster', cluster_ident)
|
||||
policy.enforce(context, 'cluster:upgrade', cluster,
|
||||
action='cluster:upgrade')
|
||||
|
||||
new_cluster_template = api_utils.get_resource(
|
||||
'ClusterTemplate',
|
||||
cluster_upgrade_req.cluster_template)
|
||||
|
||||
if (cluster_upgrade_req.nodegroup == wtypes.Unset or
|
||||
not cluster_upgrade_req.nodegroup):
|
||||
# NOTE(ttsiouts): If the nodegroup is not specified
|
||||
# reflect the change to the default worker nodegroup
|
||||
nodegroup = cluster.default_ng_worker
|
||||
else:
|
||||
nodegroup = objects.NodeGroup.get(
|
||||
context, cluster.uuid, cluster_upgrade_req.nodegroup)
|
||||
pecan.request.rpcapi.cluster_upgrade(
|
||||
cluster,
|
||||
new_cluster_template,
|
||||
cluster_upgrade_req.max_batch_size,
|
||||
nodegroup)
|
||||
return ClusterID(cluster.uuid)
|
||||
|
@ -40,10 +40,11 @@ REST_API_VERSION_HISTORY = """REST API Version History:
|
||||
* 1.5 - Add cluster CA certificate rotation support
|
||||
* 1.6 - Add quotas API
|
||||
* 1.7 - Add resize API
|
||||
* 1.8 - Add upgrade API
|
||||
"""
|
||||
|
||||
BASE_VER = '1.1'
|
||||
CURRENT_MAX_VER = '1.7'
|
||||
CURRENT_MAX_VER = '1.8'
|
||||
|
||||
|
||||
class Version(object):
|
||||
|
@ -183,6 +183,10 @@ class InvalidSubnet(Invalid):
|
||||
message = _("Received invalid subnet %(subnet)s.")
|
||||
|
||||
|
||||
class InvalidVersion(Invalid):
|
||||
message = _("Received invalid tag for %(tag)s.")
|
||||
|
||||
|
||||
class HTTPNotFound(ResourceNotFound):
|
||||
pass
|
||||
|
||||
|
@ -150,6 +150,17 @@ rules = [
|
||||
'method': 'POST'
|
||||
}
|
||||
]
|
||||
),
|
||||
policy.DocumentedRuleDefault(
|
||||
name=CLUSTER % 'upgrade',
|
||||
check_str=base.RULE_DENY_CLUSTER_USER,
|
||||
description='Upgrade an existing cluster.',
|
||||
operations=[
|
||||
{
|
||||
'path': '/v1/clusters/{cluster_ident}/actions/upgrade',
|
||||
'method': 'POST'
|
||||
}
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
|
@ -74,6 +74,22 @@ class API(rpc_service.API):
|
||||
nodes_to_remove=nodes_to_remove,
|
||||
nodegroup=nodegroup)
|
||||
|
||||
def cluster_upgrade(self, cluster, cluster_template, max_batch_size,
|
||||
nodegroup):
|
||||
return self._call('cluster_upgrade',
|
||||
cluster=cluster,
|
||||
cluster_template=cluster_template,
|
||||
max_batch_size=max_batch_size,
|
||||
nodegroup=nodegroup)
|
||||
|
||||
def cluster_upgrade_async(self, cluster, cluster_template, max_batch_size,
|
||||
nodegroup):
|
||||
return self._call('cluster_upgrade',
|
||||
cluster=cluster,
|
||||
cluster_template=cluster_template,
|
||||
max_batch_size=max_batch_size,
|
||||
nodegroup=nodegroup)
|
||||
|
||||
# Federation Operations
|
||||
|
||||
def federation_create(self, federation, create_timeout):
|
||||
|
@ -269,3 +269,52 @@ class Handler(object):
|
||||
|
||||
cluster.save()
|
||||
return cluster
|
||||
|
||||
def cluster_upgrade(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, rollback=False):
|
||||
LOG.debug('cluster_conductor cluster_upgrade')
|
||||
|
||||
# osc = clients.OpenStackClients(context)
|
||||
allow_update_status = (
|
||||
fields.ClusterStatus.CREATE_COMPLETE,
|
||||
fields.ClusterStatus.UPDATE_COMPLETE,
|
||||
fields.ClusterStatus.RESUME_COMPLETE,
|
||||
fields.ClusterStatus.RESTORE_COMPLETE,
|
||||
fields.ClusterStatus.ROLLBACK_COMPLETE,
|
||||
fields.ClusterStatus.SNAPSHOT_COMPLETE,
|
||||
fields.ClusterStatus.CHECK_COMPLETE,
|
||||
fields.ClusterStatus.ADOPT_COMPLETE
|
||||
)
|
||||
if cluster.status not in allow_update_status:
|
||||
conductor_utils.notify_about_cluster_operation(
|
||||
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
|
||||
operation = _('Upgrading a cluster when status is '
|
||||
'"%s"') % cluster.status
|
||||
raise exception.NotSupported(operation=operation)
|
||||
|
||||
# Get driver
|
||||
ct = conductor_utils.retrieve_cluster_template(context, cluster)
|
||||
cluster_driver = driver.Driver.get_driver(ct.server_type,
|
||||
ct.cluster_distro,
|
||||
ct.coe)
|
||||
# Upgrade cluster
|
||||
try:
|
||||
conductor_utils.notify_about_cluster_operation(
|
||||
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
|
||||
cluster_driver.upgrade_cluster(context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, rollback)
|
||||
cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
|
||||
cluster.status_reason = None
|
||||
except Exception as e:
|
||||
cluster.status = fields.ClusterStatus.UPDATE_FAILED
|
||||
cluster.status_reason = six.text_type(e)
|
||||
cluster.save()
|
||||
conductor_utils.notify_about_cluster_operation(
|
||||
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
|
||||
if isinstance(e, exc.HTTPBadRequest):
|
||||
e = exception.InvalidParameterValue(message=six.text_type(e))
|
||||
raise e
|
||||
raise
|
||||
|
||||
cluster.save()
|
||||
return cluster
|
||||
|
@ -45,6 +45,17 @@ def retrieve_cluster_uuid(context, cluster_ident):
|
||||
return cluster_ident
|
||||
|
||||
|
||||
def retrieve_ct_by_name_or_uuid(context, cluster_template_ident):
|
||||
if not uuidutils.is_uuid_like(cluster_template_ident):
|
||||
return cluster_template.ClusterTemplate.get_by_name(
|
||||
context,
|
||||
cluster_template_ident)
|
||||
else:
|
||||
return cluster_template.ClusterTemplate.get_by_uuid(
|
||||
context,
|
||||
cluster_template_ident)
|
||||
|
||||
|
||||
def object_has_stack(context, cluster_uuid):
|
||||
osc = clients.OpenStackClients(context)
|
||||
obj = retrieve_cluster(context, cluster_uuid)
|
||||
|
@ -176,6 +176,13 @@ class Driver(object):
|
||||
"""
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
raise NotImplementedError("Subclasses must implement "
|
||||
"'upgrade_cluster'.")
|
||||
|
||||
@abc.abstractmethod
|
||||
def delete_cluster(self, context, cluster):
|
||||
raise NotImplementedError("Subclasses must implement "
|
||||
|
@ -46,6 +46,17 @@ class HeatDriver(driver.Driver):
|
||||
orchestrating cluster lifecycle operations
|
||||
"""
|
||||
|
||||
def _extract_template_definition_up(self, context, cluster,
|
||||
cluster_template,
|
||||
scale_manager=None):
|
||||
ct_obj = conductor_utils.retrieve_ct_by_name_or_uuid(
|
||||
context,
|
||||
cluster_template)
|
||||
definition = self.get_template_definition()
|
||||
return definition.extract_definition(context, ct_obj,
|
||||
cluster,
|
||||
scale_manager=scale_manager)
|
||||
|
||||
def _extract_template_definition(self, context, cluster,
|
||||
scale_manager=None):
|
||||
cluster_template = conductor_utils.retrieve_cluster_template(context,
|
||||
@ -105,6 +116,12 @@ class HeatDriver(driver.Driver):
|
||||
rollback=False):
|
||||
self._update_stack(context, cluster, scale_manager, rollback)
|
||||
|
||||
@abc.abstractmethod
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
raise NotImplementedError("Must implement 'upgrade_cluster'")
|
||||
|
||||
def delete_cluster(self, context, cluster):
|
||||
self.pre_delete_cluster(context, cluster)
|
||||
|
||||
|
@ -38,3 +38,8 @@ class Driver(driver.HeatDriver):
|
||||
# the scale_manager.
|
||||
# https://bugs.launchpad.net/magnum/+bug/1746510
|
||||
return None
|
||||
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
raise NotImplementedError("Must implement 'upgrade_cluster'")
|
||||
|
@ -13,7 +13,10 @@
|
||||
# under the License.
|
||||
|
||||
from oslo_log import log as logging
|
||||
from pbr.version import SemanticVersion as SV
|
||||
|
||||
from magnum.common import clients
|
||||
from magnum.common import exception
|
||||
from magnum.common import keystone
|
||||
from magnum.common import octavia
|
||||
from magnum.drivers.common import k8s_monitor
|
||||
@ -51,3 +54,57 @@ class Driver(driver.HeatDriver):
|
||||
LOG.info("Starting to delete loadbalancers for cluster %s",
|
||||
cluster.uuid)
|
||||
octavia.delete_loadbalancers(context, cluster)
|
||||
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
osc = clients.OpenStackClients(context)
|
||||
_, heat_params, _ = (
|
||||
self._extract_template_definition(context, cluster,
|
||||
scale_manager=scale_manager))
|
||||
# Extract labels/tags from cluster not template
|
||||
# There are some version tags are not decalared in labels explicitly,
|
||||
# so we need to get them from heat_params based on the labels given in
|
||||
# new cluster template.
|
||||
current_addons = {}
|
||||
new_addons = {}
|
||||
for label in cluster_template.labels:
|
||||
# This is upgrade API, so we don't introduce new stuff by this API,
|
||||
# but just focus on the version change.
|
||||
new_addons[label] = cluster_template.labels[label]
|
||||
if ((label.endswith('_tag') or
|
||||
label.endswith('_version')) and label in heat_params):
|
||||
current_addons[label] = heat_params[label]
|
||||
if (SV.from_pip_string(new_addons[label]) <
|
||||
SV.from_pip_string(current_addons[label])):
|
||||
raise exception.InvalidVersion(tag=label)
|
||||
|
||||
heat_params["server_image"] = cluster_template.image_id
|
||||
heat_params["master_image"] = cluster_template.image_id
|
||||
heat_params["minion_image"] = cluster_template.image_id
|
||||
# NOTE(flwang): Overwrite the kube_tag as well to avoid a server
|
||||
# rebuild then do the k8s upgrade again, when both image id and
|
||||
# kube_tag changed
|
||||
heat_params["kube_tag"] = cluster_template.labels["kube_tag"]
|
||||
heat_params["master_kube_tag"] = cluster_template.labels["kube_tag"]
|
||||
heat_params["minion_kube_tag"] = cluster_template.labels["kube_tag"]
|
||||
heat_params["update_max_batch_size"] = max_batch_size
|
||||
# Rules: 1. No downgrade 2. Explicitly override 3. Merging based on set
|
||||
# Update heat_params based on the data generated above
|
||||
del heat_params['kube_service_account_private_key']
|
||||
del heat_params['kube_service_account_key']
|
||||
|
||||
for label in new_addons:
|
||||
heat_params[label] = cluster_template.labels[label]
|
||||
|
||||
cluster['cluster_template_id'] = cluster_template.uuid
|
||||
new_labels = cluster.labels.copy()
|
||||
new_labels.update(cluster_template.labels)
|
||||
cluster['labels'] = new_labels
|
||||
|
||||
fields = {
|
||||
'existing': True,
|
||||
'parameters': heat_params,
|
||||
'disable_rollback': not rollback
|
||||
}
|
||||
osc.heat().stacks.update(cluster.stack_id, **fields)
|
||||
|
@ -38,3 +38,7 @@ class Driver(driver.HeatDriver):
|
||||
# the scale_manager.
|
||||
# https://bugs.launchpad.net/magnum/+bug/1746510
|
||||
return None
|
||||
|
||||
def upgrade_cluster(self, context, cluster, scale_manager=None,
|
||||
rollback=False):
|
||||
raise NotImplementedError("Must implement 'upgrade_cluster'")
|
||||
|
@ -36,3 +36,8 @@ class Driver(driver.HeatDriver):
|
||||
|
||||
def get_scale_manager(self, context, osclient, cluster):
|
||||
return MesosScaleManager(context, osclient, cluster)
|
||||
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
raise NotImplementedError("Must implement 'upgrade_cluster'")
|
||||
|
@ -32,3 +32,8 @@ class Driver(driver.HeatDriver):
|
||||
|
||||
def get_monitor(self, context, cluster):
|
||||
return monitor.SwarmMonitor(context, cluster)
|
||||
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
raise NotImplementedError("Must implement 'upgrade_cluster'")
|
||||
|
@ -32,3 +32,8 @@ class Driver(driver.HeatDriver):
|
||||
|
||||
def get_monitor(self, context, cluster):
|
||||
return monitor.SwarmMonitor(context, cluster)
|
||||
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
raise NotImplementedError("Must implement 'upgrade_cluster'")
|
||||
|
@ -40,7 +40,7 @@ class TestRootController(api_base.FunctionalTest):
|
||||
[{u'href': u'http://localhost/v1/',
|
||||
u'rel': u'self'}],
|
||||
u'status': u'CURRENT',
|
||||
u'max_version': u'1.7',
|
||||
u'max_version': u'1.8',
|
||||
u'min_version': u'1.1'}]}
|
||||
|
||||
self.v1_expected = {
|
||||
|
@ -43,7 +43,7 @@ class TestClusterActions(api_base.FunctionalTest):
|
||||
self.cluster_obj.uuid,
|
||||
{"node_count": new_node_count},
|
||||
headers={"Openstack-Api-Version":
|
||||
"container-infra 1.7"})
|
||||
"container-infra latest"})
|
||||
self.assertEqual(202, response.status_code)
|
||||
|
||||
response = self.get_json('/clusters/%s' % self.cluster_obj.uuid)
|
||||
@ -66,7 +66,7 @@ class TestClusterActions(api_base.FunctionalTest):
|
||||
self.cluster_obj.uuid,
|
||||
cluster_resize_req,
|
||||
headers={"Openstack-Api-Version":
|
||||
"container-infra 1.7"})
|
||||
"container-infra latest"})
|
||||
self.assertEqual(202, response.status_code)
|
||||
|
||||
response = self.get_json('/clusters/%s' % self.cluster_obj.uuid)
|
||||
@ -86,7 +86,7 @@ class TestClusterActions(api_base.FunctionalTest):
|
||||
self.cluster_obj.uuid,
|
||||
cluster_resize_req,
|
||||
headers={"Openstack-Api-Version":
|
||||
"container-infra 1.7"},
|
||||
"container-infra latest"},
|
||||
expect_errors=True)
|
||||
self.assertEqual(400, response.status_code)
|
||||
|
||||
@ -103,7 +103,7 @@ class TestClusterActions(api_base.FunctionalTest):
|
||||
self.cluster_obj.uuid,
|
||||
cluster_resize_req,
|
||||
headers={"Openstack-Api-Version":
|
||||
"container-infra 1.7"},
|
||||
"container-infra latest"},
|
||||
expect_errors=True)
|
||||
self.assertEqual(400, response.status_code)
|
||||
|
||||
@ -120,6 +120,6 @@ class TestClusterActions(api_base.FunctionalTest):
|
||||
self.cluster_obj.uuid,
|
||||
cluster_resize_req,
|
||||
headers={"Openstack-Api-Version":
|
||||
"container-infra 1.7"},
|
||||
"container-infra latest"},
|
||||
expect_errors=True)
|
||||
self.assertEqual(400, response.status_code)
|
||||
|
6
releasenotes/notes/upgrade-api-975233ab93c0c092.yaml
Normal file
6
releasenotes/notes/upgrade-api-975233ab93c0c092.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
A new API endpoint <ClusterID>/actions/upgrade is added to support rolling
|
||||
upgrade the base OS of nodes and the version of Kubernetes. More details
|
||||
please refer the API Refreence document.
|
Loading…
Reference in New Issue
Block a user