Add cluster upgrade to the API

To enable the rolling upgrade ability of Kubernetes Cluster, this
patch is proposing a new API /upgrade to support upgrade the
base operating system of nodes and the version of Kubernetes, even
add-ons running on the k8s cluster:

POST <ClusterID>/actions/upgrade

And the post body will be:

{
    "cluster_template": 'dd9cc5ed-3a2b-11e9-9233-fa163e46bcc2',
    "max_batch_size": 1,
    "nodegroup": "production_group"
}

Co-Authored-By: Feilong Wang <flwang@catalyst.net.nz>

Task: 30168
Story: 2002210

Change-Id: Ia168877778aa0d473383eb06b1c8a16dc06b0576
This commit is contained in:
Spyros Trigazis (strigazi) 2017-06-08 08:45:51 +00:00 committed by Fei Long Wang
parent 49e5f17cb5
commit 9b1bd5da54
20 changed files with 295 additions and 7 deletions

View File

@ -33,6 +33,7 @@ created and managed by Magnum to support the COE's.
#. `Notification`_
#. `Container Monitoring`_
#. `Kubernetes External Load Balancer`_
#. `Rolling Upgrade`_
Overview
========
@ -3230,3 +3231,8 @@ Kubernetes External Load Balancer
=================================
.. include:: kubernetes-load-balancer.rst
Rolling Upgrade
===============
.. include:: rolling-upgrade.rst

View File

@ -0,0 +1,30 @@
Rolling upgrade is one of most important features user want to see for a
managed Kubernetes service. And in Magnum, we're thinking more deeper to
provide better user experience.
.. code-block:: bash
#!/bin/bash -x
IP="192.168.122.1"
CLUSTER="797b39e1-fac2-48d3-8377-d6e6cc443d39"
CT="e32c8cf7-394b-45e6-a17e-4fe6a30ad64b"
# Upgrade curl
req_body=$(cat << EOF
{
"max_batch_size": 1,
"nodegroup": "master",
"cluster_template": "${CT}"
}
EOF
)
USER_TOKEN=$(openstack token issue -c id -f value)
curl -g -i -X PATCH https://${IP}:9511/v1/clusters/${CLUSTER}/actions/upgrade \
-H "OpenStack-API-Version: container-infra latest" \
-H "X-Auth-Token: $USER_TOKEN" \
-H "Content-Type: application/json" \
-H "Accept: application/json" \
-H "User-Agent: None" \
-d "$req_body"

View File

@ -55,6 +55,22 @@ class ClusterResizeRequest(base.APIBase):
"""Group of nodes to be uprgaded (master or node)"""
class ClusterUpgradeRequest(base.APIBase):
"""API object for handling upgrade requests.
This class enforces type checking and value constraints.
"""
max_batch_size = wtypes.IntegerType(minimum=1)
"""Max batch size of nodes to be upraded in parallel"""
nodegroup = wtypes.StringType(min_length=1, max_length=255)
"""Group of nodes to be uprgaded (master or node)"""
cluster_template = wtypes.StringType(min_length=1, max_length=255)
"""The cluster_template UUID"""
class ActionsController(base.Controller):
"""REST controller for cluster actions."""
def __init__(self):
@ -62,6 +78,7 @@ class ActionsController(base.Controller):
_custom_actions = {
'resize': ['POST'],
'upgrade': ['POST']
}
@base.Controller.api_version("1.7")
@ -108,3 +125,35 @@ class ActionsController(base.Controller):
cluster_resize_req.nodes_to_remove,
nodegroup)
return ClusterID(cluster.uuid)
@base.Controller.api_version("1.8")
@expose.expose(None, types.uuid_or_name,
body=ClusterUpgradeRequest, status_code=202)
def upgrade(self, cluster_ident, cluster_upgrade_req):
"""Upgrade a cluster.
:param cluster_ident: UUID of a cluster or logical name of the cluster.
"""
context = pecan.request.context
cluster = api_utils.get_resource('Cluster', cluster_ident)
policy.enforce(context, 'cluster:upgrade', cluster,
action='cluster:upgrade')
new_cluster_template = api_utils.get_resource(
'ClusterTemplate',
cluster_upgrade_req.cluster_template)
if (cluster_upgrade_req.nodegroup == wtypes.Unset or
not cluster_upgrade_req.nodegroup):
# NOTE(ttsiouts): If the nodegroup is not specified
# reflect the change to the default worker nodegroup
nodegroup = cluster.default_ng_worker
else:
nodegroup = objects.NodeGroup.get(
context, cluster.uuid, cluster_upgrade_req.nodegroup)
pecan.request.rpcapi.cluster_upgrade(
cluster,
new_cluster_template,
cluster_upgrade_req.max_batch_size,
nodegroup)
return ClusterID(cluster.uuid)

View File

@ -40,10 +40,11 @@ REST_API_VERSION_HISTORY = """REST API Version History:
* 1.5 - Add cluster CA certificate rotation support
* 1.6 - Add quotas API
* 1.7 - Add resize API
* 1.8 - Add upgrade API
"""
BASE_VER = '1.1'
CURRENT_MAX_VER = '1.7'
CURRENT_MAX_VER = '1.8'
class Version(object):

View File

@ -183,6 +183,10 @@ class InvalidSubnet(Invalid):
message = _("Received invalid subnet %(subnet)s.")
class InvalidVersion(Invalid):
message = _("Received invalid tag for %(tag)s.")
class HTTPNotFound(ResourceNotFound):
pass

View File

@ -150,6 +150,17 @@ rules = [
'method': 'POST'
}
]
),
policy.DocumentedRuleDefault(
name=CLUSTER % 'upgrade',
check_str=base.RULE_DENY_CLUSTER_USER,
description='Upgrade an existing cluster.',
operations=[
{
'path': '/v1/clusters/{cluster_ident}/actions/upgrade',
'method': 'POST'
}
]
)
]

View File

@ -74,6 +74,22 @@ class API(rpc_service.API):
nodes_to_remove=nodes_to_remove,
nodegroup=nodegroup)
def cluster_upgrade(self, cluster, cluster_template, max_batch_size,
nodegroup):
return self._call('cluster_upgrade',
cluster=cluster,
cluster_template=cluster_template,
max_batch_size=max_batch_size,
nodegroup=nodegroup)
def cluster_upgrade_async(self, cluster, cluster_template, max_batch_size,
nodegroup):
return self._call('cluster_upgrade',
cluster=cluster,
cluster_template=cluster_template,
max_batch_size=max_batch_size,
nodegroup=nodegroup)
# Federation Operations
def federation_create(self, federation, create_timeout):

View File

@ -269,3 +269,52 @@ class Handler(object):
cluster.save()
return cluster
def cluster_upgrade(self, context, cluster, cluster_template,
max_batch_size, nodegroup, rollback=False):
LOG.debug('cluster_conductor cluster_upgrade')
# osc = clients.OpenStackClients(context)
allow_update_status = (
fields.ClusterStatus.CREATE_COMPLETE,
fields.ClusterStatus.UPDATE_COMPLETE,
fields.ClusterStatus.RESUME_COMPLETE,
fields.ClusterStatus.RESTORE_COMPLETE,
fields.ClusterStatus.ROLLBACK_COMPLETE,
fields.ClusterStatus.SNAPSHOT_COMPLETE,
fields.ClusterStatus.CHECK_COMPLETE,
fields.ClusterStatus.ADOPT_COMPLETE
)
if cluster.status not in allow_update_status:
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
operation = _('Upgrading a cluster when status is '
'"%s"') % cluster.status
raise exception.NotSupported(operation=operation)
# Get driver
ct = conductor_utils.retrieve_cluster_template(context, cluster)
cluster_driver = driver.Driver.get_driver(ct.server_type,
ct.cluster_distro,
ct.coe)
# Upgrade cluster
try:
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING)
cluster_driver.upgrade_cluster(context, cluster, cluster_template,
max_batch_size, nodegroup, rollback)
cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
cluster.status_reason = None
except Exception as e:
cluster.status = fields.ClusterStatus.UPDATE_FAILED
cluster.status_reason = six.text_type(e)
cluster.save()
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE)
if isinstance(e, exc.HTTPBadRequest):
e = exception.InvalidParameterValue(message=six.text_type(e))
raise e
raise
cluster.save()
return cluster

View File

@ -45,6 +45,17 @@ def retrieve_cluster_uuid(context, cluster_ident):
return cluster_ident
def retrieve_ct_by_name_or_uuid(context, cluster_template_ident):
if not uuidutils.is_uuid_like(cluster_template_ident):
return cluster_template.ClusterTemplate.get_by_name(
context,
cluster_template_ident)
else:
return cluster_template.ClusterTemplate.get_by_uuid(
context,
cluster_template_ident)
def object_has_stack(context, cluster_uuid):
osc = clients.OpenStackClients(context)
obj = retrieve_cluster(context, cluster_uuid)

View File

@ -176,6 +176,13 @@ class Driver(object):
"""
return None
@abc.abstractmethod
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
raise NotImplementedError("Subclasses must implement "
"'upgrade_cluster'.")
@abc.abstractmethod
def delete_cluster(self, context, cluster):
raise NotImplementedError("Subclasses must implement "

View File

@ -46,6 +46,17 @@ class HeatDriver(driver.Driver):
orchestrating cluster lifecycle operations
"""
def _extract_template_definition_up(self, context, cluster,
cluster_template,
scale_manager=None):
ct_obj = conductor_utils.retrieve_ct_by_name_or_uuid(
context,
cluster_template)
definition = self.get_template_definition()
return definition.extract_definition(context, ct_obj,
cluster,
scale_manager=scale_manager)
def _extract_template_definition(self, context, cluster,
scale_manager=None):
cluster_template = conductor_utils.retrieve_cluster_template(context,
@ -105,6 +116,12 @@ class HeatDriver(driver.Driver):
rollback=False):
self._update_stack(context, cluster, scale_manager, rollback)
@abc.abstractmethod
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
raise NotImplementedError("Must implement 'upgrade_cluster'")
def delete_cluster(self, context, cluster):
self.pre_delete_cluster(context, cluster)

View File

@ -38,3 +38,8 @@ class Driver(driver.HeatDriver):
# the scale_manager.
# https://bugs.launchpad.net/magnum/+bug/1746510
return None
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
raise NotImplementedError("Must implement 'upgrade_cluster'")

View File

@ -13,7 +13,10 @@
# under the License.
from oslo_log import log as logging
from pbr.version import SemanticVersion as SV
from magnum.common import clients
from magnum.common import exception
from magnum.common import keystone
from magnum.common import octavia
from magnum.drivers.common import k8s_monitor
@ -51,3 +54,57 @@ class Driver(driver.HeatDriver):
LOG.info("Starting to delete loadbalancers for cluster %s",
cluster.uuid)
octavia.delete_loadbalancers(context, cluster)
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
osc = clients.OpenStackClients(context)
_, heat_params, _ = (
self._extract_template_definition(context, cluster,
scale_manager=scale_manager))
# Extract labels/tags from cluster not template
# There are some version tags are not decalared in labels explicitly,
# so we need to get them from heat_params based on the labels given in
# new cluster template.
current_addons = {}
new_addons = {}
for label in cluster_template.labels:
# This is upgrade API, so we don't introduce new stuff by this API,
# but just focus on the version change.
new_addons[label] = cluster_template.labels[label]
if ((label.endswith('_tag') or
label.endswith('_version')) and label in heat_params):
current_addons[label] = heat_params[label]
if (SV.from_pip_string(new_addons[label]) <
SV.from_pip_string(current_addons[label])):
raise exception.InvalidVersion(tag=label)
heat_params["server_image"] = cluster_template.image_id
heat_params["master_image"] = cluster_template.image_id
heat_params["minion_image"] = cluster_template.image_id
# NOTE(flwang): Overwrite the kube_tag as well to avoid a server
# rebuild then do the k8s upgrade again, when both image id and
# kube_tag changed
heat_params["kube_tag"] = cluster_template.labels["kube_tag"]
heat_params["master_kube_tag"] = cluster_template.labels["kube_tag"]
heat_params["minion_kube_tag"] = cluster_template.labels["kube_tag"]
heat_params["update_max_batch_size"] = max_batch_size
# Rules: 1. No downgrade 2. Explicitly override 3. Merging based on set
# Update heat_params based on the data generated above
del heat_params['kube_service_account_private_key']
del heat_params['kube_service_account_key']
for label in new_addons:
heat_params[label] = cluster_template.labels[label]
cluster['cluster_template_id'] = cluster_template.uuid
new_labels = cluster.labels.copy()
new_labels.update(cluster_template.labels)
cluster['labels'] = new_labels
fields = {
'existing': True,
'parameters': heat_params,
'disable_rollback': not rollback
}
osc.heat().stacks.update(cluster.stack_id, **fields)

View File

@ -38,3 +38,7 @@ class Driver(driver.HeatDriver):
# the scale_manager.
# https://bugs.launchpad.net/magnum/+bug/1746510
return None
def upgrade_cluster(self, context, cluster, scale_manager=None,
rollback=False):
raise NotImplementedError("Must implement 'upgrade_cluster'")

View File

@ -36,3 +36,8 @@ class Driver(driver.HeatDriver):
def get_scale_manager(self, context, osclient, cluster):
return MesosScaleManager(context, osclient, cluster)
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
raise NotImplementedError("Must implement 'upgrade_cluster'")

View File

@ -32,3 +32,8 @@ class Driver(driver.HeatDriver):
def get_monitor(self, context, cluster):
return monitor.SwarmMonitor(context, cluster)
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
raise NotImplementedError("Must implement 'upgrade_cluster'")

View File

@ -32,3 +32,8 @@ class Driver(driver.HeatDriver):
def get_monitor(self, context, cluster):
return monitor.SwarmMonitor(context, cluster)
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
raise NotImplementedError("Must implement 'upgrade_cluster'")

View File

@ -40,7 +40,7 @@ class TestRootController(api_base.FunctionalTest):
[{u'href': u'http://localhost/v1/',
u'rel': u'self'}],
u'status': u'CURRENT',
u'max_version': u'1.7',
u'max_version': u'1.8',
u'min_version': u'1.1'}]}
self.v1_expected = {

View File

@ -43,7 +43,7 @@ class TestClusterActions(api_base.FunctionalTest):
self.cluster_obj.uuid,
{"node_count": new_node_count},
headers={"Openstack-Api-Version":
"container-infra 1.7"})
"container-infra latest"})
self.assertEqual(202, response.status_code)
response = self.get_json('/clusters/%s' % self.cluster_obj.uuid)
@ -66,7 +66,7 @@ class TestClusterActions(api_base.FunctionalTest):
self.cluster_obj.uuid,
cluster_resize_req,
headers={"Openstack-Api-Version":
"container-infra 1.7"})
"container-infra latest"})
self.assertEqual(202, response.status_code)
response = self.get_json('/clusters/%s' % self.cluster_obj.uuid)
@ -86,7 +86,7 @@ class TestClusterActions(api_base.FunctionalTest):
self.cluster_obj.uuid,
cluster_resize_req,
headers={"Openstack-Api-Version":
"container-infra 1.7"},
"container-infra latest"},
expect_errors=True)
self.assertEqual(400, response.status_code)
@ -103,7 +103,7 @@ class TestClusterActions(api_base.FunctionalTest):
self.cluster_obj.uuid,
cluster_resize_req,
headers={"Openstack-Api-Version":
"container-infra 1.7"},
"container-infra latest"},
expect_errors=True)
self.assertEqual(400, response.status_code)
@ -120,6 +120,6 @@ class TestClusterActions(api_base.FunctionalTest):
self.cluster_obj.uuid,
cluster_resize_req,
headers={"Openstack-Api-Version":
"container-infra 1.7"},
"container-infra latest"},
expect_errors=True)
self.assertEqual(400, response.status_code)

View File

@ -0,0 +1,6 @@
---
features:
- |
A new API endpoint <ClusterID>/actions/upgrade is added to support rolling
upgrade the base OS of nodes and the version of Kubernetes. More details
please refer the API Refreence document.