From 9b1bd5da54b9b3a5ad10a8efa1962b61320c3150 Mon Sep 17 00:00:00 2001 From: "Spyros Trigazis (strigazi)" Date: Thu, 8 Jun 2017 08:45:51 +0000 Subject: [PATCH] Add cluster upgrade to the API To enable the rolling upgrade ability of Kubernetes Cluster, this patch is proposing a new API /upgrade to support upgrade the base operating system of nodes and the version of Kubernetes, even add-ons running on the k8s cluster: POST /actions/upgrade And the post body will be: { "cluster_template": 'dd9cc5ed-3a2b-11e9-9233-fa163e46bcc2', "max_batch_size": 1, "nodegroup": "production_group" } Co-Authored-By: Feilong Wang Task: 30168 Story: 2002210 Change-Id: Ia168877778aa0d473383eb06b1c8a16dc06b0576 --- doc/source/user/index.rst | 6 ++ doc/source/user/rolling-upgrade.rst | 30 ++++++++++ magnum/api/controllers/v1/cluster_actions.py | 49 ++++++++++++++++ magnum/api/controllers/versions.py | 3 +- magnum/common/exception.py | 4 ++ magnum/common/policies/cluster.py | 11 ++++ magnum/conductor/api.py | 16 ++++++ .../conductor/handlers/cluster_conductor.py | 49 ++++++++++++++++ magnum/conductor/utils.py | 11 ++++ magnum/drivers/common/driver.py | 7 +++ magnum/drivers/heat/driver.py | 17 ++++++ magnum/drivers/k8s_coreos_v1/driver.py | 5 ++ magnum/drivers/k8s_fedora_atomic_v1/driver.py | 57 +++++++++++++++++++ magnum/drivers/k8s_fedora_ironic_v1/driver.py | 4 ++ magnum/drivers/mesos_ubuntu_v1/driver.py | 5 ++ .../drivers/swarm_fedora_atomic_v1/driver.py | 5 ++ .../drivers/swarm_fedora_atomic_v2/driver.py | 5 ++ .../tests/unit/api/controllers/test_root.py | 2 +- .../controllers/v1/test_cluster_actions.py | 10 ++-- .../notes/upgrade-api-975233ab93c0c092.yaml | 6 ++ 20 files changed, 295 insertions(+), 7 deletions(-) create mode 100644 doc/source/user/rolling-upgrade.rst create mode 100644 releasenotes/notes/upgrade-api-975233ab93c0c092.yaml diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst index 33fb905252..cf3151613e 100644 --- a/doc/source/user/index.rst +++ b/doc/source/user/index.rst @@ -33,6 +33,7 @@ created and managed by Magnum to support the COE's. #. `Notification`_ #. `Container Monitoring`_ #. `Kubernetes External Load Balancer`_ +#. `Rolling Upgrade`_ Overview ======== @@ -3230,3 +3231,8 @@ Kubernetes External Load Balancer ================================= .. include:: kubernetes-load-balancer.rst + +Rolling Upgrade +=============== + +.. include:: rolling-upgrade.rst diff --git a/doc/source/user/rolling-upgrade.rst b/doc/source/user/rolling-upgrade.rst new file mode 100644 index 0000000000..d35d255454 --- /dev/null +++ b/doc/source/user/rolling-upgrade.rst @@ -0,0 +1,30 @@ +Rolling upgrade is one of most important features user want to see for a +managed Kubernetes service. And in Magnum, we're thinking more deeper to +provide better user experience. + + +.. code-block:: bash + + #!/bin/bash -x + + IP="192.168.122.1" + CLUSTER="797b39e1-fac2-48d3-8377-d6e6cc443d39" + CT="e32c8cf7-394b-45e6-a17e-4fe6a30ad64b" + + # Upgrade curl + req_body=$(cat << EOF + { + "max_batch_size": 1, + "nodegroup": "master", + "cluster_template": "${CT}" + } + EOF + ) + USER_TOKEN=$(openstack token issue -c id -f value) + curl -g -i -X PATCH https://${IP}:9511/v1/clusters/${CLUSTER}/actions/upgrade \ + -H "OpenStack-API-Version: container-infra latest" \ + -H "X-Auth-Token: $USER_TOKEN" \ + -H "Content-Type: application/json" \ + -H "Accept: application/json" \ + -H "User-Agent: None" \ + -d "$req_body" diff --git a/magnum/api/controllers/v1/cluster_actions.py b/magnum/api/controllers/v1/cluster_actions.py index f09cb9b64d..e4fd7aa5ee 100644 --- a/magnum/api/controllers/v1/cluster_actions.py +++ b/magnum/api/controllers/v1/cluster_actions.py @@ -55,6 +55,22 @@ class ClusterResizeRequest(base.APIBase): """Group of nodes to be uprgaded (master or node)""" +class ClusterUpgradeRequest(base.APIBase): + """API object for handling upgrade requests. + + This class enforces type checking and value constraints. + """ + + max_batch_size = wtypes.IntegerType(minimum=1) + """Max batch size of nodes to be upraded in parallel""" + + nodegroup = wtypes.StringType(min_length=1, max_length=255) + """Group of nodes to be uprgaded (master or node)""" + + cluster_template = wtypes.StringType(min_length=1, max_length=255) + """The cluster_template UUID""" + + class ActionsController(base.Controller): """REST controller for cluster actions.""" def __init__(self): @@ -62,6 +78,7 @@ class ActionsController(base.Controller): _custom_actions = { 'resize': ['POST'], + 'upgrade': ['POST'] } @base.Controller.api_version("1.7") @@ -108,3 +125,35 @@ class ActionsController(base.Controller): cluster_resize_req.nodes_to_remove, nodegroup) return ClusterID(cluster.uuid) + + @base.Controller.api_version("1.8") + @expose.expose(None, types.uuid_or_name, + body=ClusterUpgradeRequest, status_code=202) + def upgrade(self, cluster_ident, cluster_upgrade_req): + """Upgrade a cluster. + + :param cluster_ident: UUID of a cluster or logical name of the cluster. + """ + context = pecan.request.context + cluster = api_utils.get_resource('Cluster', cluster_ident) + policy.enforce(context, 'cluster:upgrade', cluster, + action='cluster:upgrade') + + new_cluster_template = api_utils.get_resource( + 'ClusterTemplate', + cluster_upgrade_req.cluster_template) + + if (cluster_upgrade_req.nodegroup == wtypes.Unset or + not cluster_upgrade_req.nodegroup): + # NOTE(ttsiouts): If the nodegroup is not specified + # reflect the change to the default worker nodegroup + nodegroup = cluster.default_ng_worker + else: + nodegroup = objects.NodeGroup.get( + context, cluster.uuid, cluster_upgrade_req.nodegroup) + pecan.request.rpcapi.cluster_upgrade( + cluster, + new_cluster_template, + cluster_upgrade_req.max_batch_size, + nodegroup) + return ClusterID(cluster.uuid) diff --git a/magnum/api/controllers/versions.py b/magnum/api/controllers/versions.py index 22fb6ac753..d4ca14d1c8 100644 --- a/magnum/api/controllers/versions.py +++ b/magnum/api/controllers/versions.py @@ -40,10 +40,11 @@ REST_API_VERSION_HISTORY = """REST API Version History: * 1.5 - Add cluster CA certificate rotation support * 1.6 - Add quotas API * 1.7 - Add resize API + * 1.8 - Add upgrade API """ BASE_VER = '1.1' -CURRENT_MAX_VER = '1.7' +CURRENT_MAX_VER = '1.8' class Version(object): diff --git a/magnum/common/exception.py b/magnum/common/exception.py index 4aaf684a46..ad836fcbb6 100755 --- a/magnum/common/exception.py +++ b/magnum/common/exception.py @@ -183,6 +183,10 @@ class InvalidSubnet(Invalid): message = _("Received invalid subnet %(subnet)s.") +class InvalidVersion(Invalid): + message = _("Received invalid tag for %(tag)s.") + + class HTTPNotFound(ResourceNotFound): pass diff --git a/magnum/common/policies/cluster.py b/magnum/common/policies/cluster.py index abace3c6f4..c2617e3317 100644 --- a/magnum/common/policies/cluster.py +++ b/magnum/common/policies/cluster.py @@ -150,6 +150,17 @@ rules = [ 'method': 'POST' } ] + ), + policy.DocumentedRuleDefault( + name=CLUSTER % 'upgrade', + check_str=base.RULE_DENY_CLUSTER_USER, + description='Upgrade an existing cluster.', + operations=[ + { + 'path': '/v1/clusters/{cluster_ident}/actions/upgrade', + 'method': 'POST' + } + ] ) ] diff --git a/magnum/conductor/api.py b/magnum/conductor/api.py index ecf87c9a95..17b6ca8537 100644 --- a/magnum/conductor/api.py +++ b/magnum/conductor/api.py @@ -74,6 +74,22 @@ class API(rpc_service.API): nodes_to_remove=nodes_to_remove, nodegroup=nodegroup) + def cluster_upgrade(self, cluster, cluster_template, max_batch_size, + nodegroup): + return self._call('cluster_upgrade', + cluster=cluster, + cluster_template=cluster_template, + max_batch_size=max_batch_size, + nodegroup=nodegroup) + + def cluster_upgrade_async(self, cluster, cluster_template, max_batch_size, + nodegroup): + return self._call('cluster_upgrade', + cluster=cluster, + cluster_template=cluster_template, + max_batch_size=max_batch_size, + nodegroup=nodegroup) + # Federation Operations def federation_create(self, federation, create_timeout): diff --git a/magnum/conductor/handlers/cluster_conductor.py b/magnum/conductor/handlers/cluster_conductor.py index bebf1d9605..6ef013e8a4 100755 --- a/magnum/conductor/handlers/cluster_conductor.py +++ b/magnum/conductor/handlers/cluster_conductor.py @@ -269,3 +269,52 @@ class Handler(object): cluster.save() return cluster + + def cluster_upgrade(self, context, cluster, cluster_template, + max_batch_size, nodegroup, rollback=False): + LOG.debug('cluster_conductor cluster_upgrade') + + # osc = clients.OpenStackClients(context) + allow_update_status = ( + fields.ClusterStatus.CREATE_COMPLETE, + fields.ClusterStatus.UPDATE_COMPLETE, + fields.ClusterStatus.RESUME_COMPLETE, + fields.ClusterStatus.RESTORE_COMPLETE, + fields.ClusterStatus.ROLLBACK_COMPLETE, + fields.ClusterStatus.SNAPSHOT_COMPLETE, + fields.ClusterStatus.CHECK_COMPLETE, + fields.ClusterStatus.ADOPT_COMPLETE + ) + if cluster.status not in allow_update_status: + conductor_utils.notify_about_cluster_operation( + context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) + operation = _('Upgrading a cluster when status is ' + '"%s"') % cluster.status + raise exception.NotSupported(operation=operation) + + # Get driver + ct = conductor_utils.retrieve_cluster_template(context, cluster) + cluster_driver = driver.Driver.get_driver(ct.server_type, + ct.cluster_distro, + ct.coe) + # Upgrade cluster + try: + conductor_utils.notify_about_cluster_operation( + context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING) + cluster_driver.upgrade_cluster(context, cluster, cluster_template, + max_batch_size, nodegroup, rollback) + cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS + cluster.status_reason = None + except Exception as e: + cluster.status = fields.ClusterStatus.UPDATE_FAILED + cluster.status_reason = six.text_type(e) + cluster.save() + conductor_utils.notify_about_cluster_operation( + context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) + if isinstance(e, exc.HTTPBadRequest): + e = exception.InvalidParameterValue(message=six.text_type(e)) + raise e + raise + + cluster.save() + return cluster diff --git a/magnum/conductor/utils.py b/magnum/conductor/utils.py index a94e173aae..e6ab384e31 100644 --- a/magnum/conductor/utils.py +++ b/magnum/conductor/utils.py @@ -45,6 +45,17 @@ def retrieve_cluster_uuid(context, cluster_ident): return cluster_ident +def retrieve_ct_by_name_or_uuid(context, cluster_template_ident): + if not uuidutils.is_uuid_like(cluster_template_ident): + return cluster_template.ClusterTemplate.get_by_name( + context, + cluster_template_ident) + else: + return cluster_template.ClusterTemplate.get_by_uuid( + context, + cluster_template_ident) + + def object_has_stack(context, cluster_uuid): osc = clients.OpenStackClients(context) obj = retrieve_cluster(context, cluster_uuid) diff --git a/magnum/drivers/common/driver.py b/magnum/drivers/common/driver.py index babb7848c5..31e64e858d 100644 --- a/magnum/drivers/common/driver.py +++ b/magnum/drivers/common/driver.py @@ -176,6 +176,13 @@ class Driver(object): """ return None + @abc.abstractmethod + def upgrade_cluster(self, context, cluster, cluster_template, + max_batch_size, nodegroup, scale_manager=None, + rollback=False): + raise NotImplementedError("Subclasses must implement " + "'upgrade_cluster'.") + @abc.abstractmethod def delete_cluster(self, context, cluster): raise NotImplementedError("Subclasses must implement " diff --git a/magnum/drivers/heat/driver.py b/magnum/drivers/heat/driver.py index 18d256d0b1..f85072b6da 100755 --- a/magnum/drivers/heat/driver.py +++ b/magnum/drivers/heat/driver.py @@ -46,6 +46,17 @@ class HeatDriver(driver.Driver): orchestrating cluster lifecycle operations """ + def _extract_template_definition_up(self, context, cluster, + cluster_template, + scale_manager=None): + ct_obj = conductor_utils.retrieve_ct_by_name_or_uuid( + context, + cluster_template) + definition = self.get_template_definition() + return definition.extract_definition(context, ct_obj, + cluster, + scale_manager=scale_manager) + def _extract_template_definition(self, context, cluster, scale_manager=None): cluster_template = conductor_utils.retrieve_cluster_template(context, @@ -105,6 +116,12 @@ class HeatDriver(driver.Driver): rollback=False): self._update_stack(context, cluster, scale_manager, rollback) + @abc.abstractmethod + def upgrade_cluster(self, context, cluster, cluster_template, + max_batch_size, nodegroup, scale_manager=None, + rollback=False): + raise NotImplementedError("Must implement 'upgrade_cluster'") + def delete_cluster(self, context, cluster): self.pre_delete_cluster(context, cluster) diff --git a/magnum/drivers/k8s_coreos_v1/driver.py b/magnum/drivers/k8s_coreos_v1/driver.py index 0283aef38a..68d4ef4c10 100644 --- a/magnum/drivers/k8s_coreos_v1/driver.py +++ b/magnum/drivers/k8s_coreos_v1/driver.py @@ -38,3 +38,8 @@ class Driver(driver.HeatDriver): # the scale_manager. # https://bugs.launchpad.net/magnum/+bug/1746510 return None + + def upgrade_cluster(self, context, cluster, cluster_template, + max_batch_size, nodegroup, scale_manager=None, + rollback=False): + raise NotImplementedError("Must implement 'upgrade_cluster'") diff --git a/magnum/drivers/k8s_fedora_atomic_v1/driver.py b/magnum/drivers/k8s_fedora_atomic_v1/driver.py index 6bbe334505..a61d6b0b02 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/driver.py +++ b/magnum/drivers/k8s_fedora_atomic_v1/driver.py @@ -13,7 +13,10 @@ # under the License. from oslo_log import log as logging +from pbr.version import SemanticVersion as SV +from magnum.common import clients +from magnum.common import exception from magnum.common import keystone from magnum.common import octavia from magnum.drivers.common import k8s_monitor @@ -51,3 +54,57 @@ class Driver(driver.HeatDriver): LOG.info("Starting to delete loadbalancers for cluster %s", cluster.uuid) octavia.delete_loadbalancers(context, cluster) + + def upgrade_cluster(self, context, cluster, cluster_template, + max_batch_size, nodegroup, scale_manager=None, + rollback=False): + osc = clients.OpenStackClients(context) + _, heat_params, _ = ( + self._extract_template_definition(context, cluster, + scale_manager=scale_manager)) + # Extract labels/tags from cluster not template + # There are some version tags are not decalared in labels explicitly, + # so we need to get them from heat_params based on the labels given in + # new cluster template. + current_addons = {} + new_addons = {} + for label in cluster_template.labels: + # This is upgrade API, so we don't introduce new stuff by this API, + # but just focus on the version change. + new_addons[label] = cluster_template.labels[label] + if ((label.endswith('_tag') or + label.endswith('_version')) and label in heat_params): + current_addons[label] = heat_params[label] + if (SV.from_pip_string(new_addons[label]) < + SV.from_pip_string(current_addons[label])): + raise exception.InvalidVersion(tag=label) + + heat_params["server_image"] = cluster_template.image_id + heat_params["master_image"] = cluster_template.image_id + heat_params["minion_image"] = cluster_template.image_id + # NOTE(flwang): Overwrite the kube_tag as well to avoid a server + # rebuild then do the k8s upgrade again, when both image id and + # kube_tag changed + heat_params["kube_tag"] = cluster_template.labels["kube_tag"] + heat_params["master_kube_tag"] = cluster_template.labels["kube_tag"] + heat_params["minion_kube_tag"] = cluster_template.labels["kube_tag"] + heat_params["update_max_batch_size"] = max_batch_size + # Rules: 1. No downgrade 2. Explicitly override 3. Merging based on set + # Update heat_params based on the data generated above + del heat_params['kube_service_account_private_key'] + del heat_params['kube_service_account_key'] + + for label in new_addons: + heat_params[label] = cluster_template.labels[label] + + cluster['cluster_template_id'] = cluster_template.uuid + new_labels = cluster.labels.copy() + new_labels.update(cluster_template.labels) + cluster['labels'] = new_labels + + fields = { + 'existing': True, + 'parameters': heat_params, + 'disable_rollback': not rollback + } + osc.heat().stacks.update(cluster.stack_id, **fields) diff --git a/magnum/drivers/k8s_fedora_ironic_v1/driver.py b/magnum/drivers/k8s_fedora_ironic_v1/driver.py index d620f482b4..8eb656fc8c 100644 --- a/magnum/drivers/k8s_fedora_ironic_v1/driver.py +++ b/magnum/drivers/k8s_fedora_ironic_v1/driver.py @@ -38,3 +38,7 @@ class Driver(driver.HeatDriver): # the scale_manager. # https://bugs.launchpad.net/magnum/+bug/1746510 return None + + def upgrade_cluster(self, context, cluster, scale_manager=None, + rollback=False): + raise NotImplementedError("Must implement 'upgrade_cluster'") diff --git a/magnum/drivers/mesos_ubuntu_v1/driver.py b/magnum/drivers/mesos_ubuntu_v1/driver.py index c40f2395f3..dc049704c9 100644 --- a/magnum/drivers/mesos_ubuntu_v1/driver.py +++ b/magnum/drivers/mesos_ubuntu_v1/driver.py @@ -36,3 +36,8 @@ class Driver(driver.HeatDriver): def get_scale_manager(self, context, osclient, cluster): return MesosScaleManager(context, osclient, cluster) + + def upgrade_cluster(self, context, cluster, cluster_template, + max_batch_size, nodegroup, scale_manager=None, + rollback=False): + raise NotImplementedError("Must implement 'upgrade_cluster'") diff --git a/magnum/drivers/swarm_fedora_atomic_v1/driver.py b/magnum/drivers/swarm_fedora_atomic_v1/driver.py index 29bf236bf9..eec75c6b40 100644 --- a/magnum/drivers/swarm_fedora_atomic_v1/driver.py +++ b/magnum/drivers/swarm_fedora_atomic_v1/driver.py @@ -32,3 +32,8 @@ class Driver(driver.HeatDriver): def get_monitor(self, context, cluster): return monitor.SwarmMonitor(context, cluster) + + def upgrade_cluster(self, context, cluster, cluster_template, + max_batch_size, nodegroup, scale_manager=None, + rollback=False): + raise NotImplementedError("Must implement 'upgrade_cluster'") diff --git a/magnum/drivers/swarm_fedora_atomic_v2/driver.py b/magnum/drivers/swarm_fedora_atomic_v2/driver.py index b00d028a3f..7965b58298 100644 --- a/magnum/drivers/swarm_fedora_atomic_v2/driver.py +++ b/magnum/drivers/swarm_fedora_atomic_v2/driver.py @@ -32,3 +32,8 @@ class Driver(driver.HeatDriver): def get_monitor(self, context, cluster): return monitor.SwarmMonitor(context, cluster) + + def upgrade_cluster(self, context, cluster, cluster_template, + max_batch_size, nodegroup, scale_manager=None, + rollback=False): + raise NotImplementedError("Must implement 'upgrade_cluster'") diff --git a/magnum/tests/unit/api/controllers/test_root.py b/magnum/tests/unit/api/controllers/test_root.py index 27eb6d44a4..1e888f5f9a 100644 --- a/magnum/tests/unit/api/controllers/test_root.py +++ b/magnum/tests/unit/api/controllers/test_root.py @@ -40,7 +40,7 @@ class TestRootController(api_base.FunctionalTest): [{u'href': u'http://localhost/v1/', u'rel': u'self'}], u'status': u'CURRENT', - u'max_version': u'1.7', + u'max_version': u'1.8', u'min_version': u'1.1'}]} self.v1_expected = { diff --git a/magnum/tests/unit/api/controllers/v1/test_cluster_actions.py b/magnum/tests/unit/api/controllers/v1/test_cluster_actions.py index 8d7b7ba3ac..9a1eb83fe1 100644 --- a/magnum/tests/unit/api/controllers/v1/test_cluster_actions.py +++ b/magnum/tests/unit/api/controllers/v1/test_cluster_actions.py @@ -43,7 +43,7 @@ class TestClusterActions(api_base.FunctionalTest): self.cluster_obj.uuid, {"node_count": new_node_count}, headers={"Openstack-Api-Version": - "container-infra 1.7"}) + "container-infra latest"}) self.assertEqual(202, response.status_code) response = self.get_json('/clusters/%s' % self.cluster_obj.uuid) @@ -66,7 +66,7 @@ class TestClusterActions(api_base.FunctionalTest): self.cluster_obj.uuid, cluster_resize_req, headers={"Openstack-Api-Version": - "container-infra 1.7"}) + "container-infra latest"}) self.assertEqual(202, response.status_code) response = self.get_json('/clusters/%s' % self.cluster_obj.uuid) @@ -86,7 +86,7 @@ class TestClusterActions(api_base.FunctionalTest): self.cluster_obj.uuid, cluster_resize_req, headers={"Openstack-Api-Version": - "container-infra 1.7"}, + "container-infra latest"}, expect_errors=True) self.assertEqual(400, response.status_code) @@ -103,7 +103,7 @@ class TestClusterActions(api_base.FunctionalTest): self.cluster_obj.uuid, cluster_resize_req, headers={"Openstack-Api-Version": - "container-infra 1.7"}, + "container-infra latest"}, expect_errors=True) self.assertEqual(400, response.status_code) @@ -120,6 +120,6 @@ class TestClusterActions(api_base.FunctionalTest): self.cluster_obj.uuid, cluster_resize_req, headers={"Openstack-Api-Version": - "container-infra 1.7"}, + "container-infra latest"}, expect_errors=True) self.assertEqual(400, response.status_code) diff --git a/releasenotes/notes/upgrade-api-975233ab93c0c092.yaml b/releasenotes/notes/upgrade-api-975233ab93c0c092.yaml new file mode 100644 index 0000000000..98685c9ba8 --- /dev/null +++ b/releasenotes/notes/upgrade-api-975233ab93c0c092.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + A new API endpoint /actions/upgrade is added to support rolling + upgrade the base OS of nodes and the version of Kubernetes. More details + please refer the API Refreence document.