ng-13: Support nodegroup upgrade

Adds support for upgrading nodegroups. All non-default nodegroups,
are allowed to be upgraded using the CT set in the cluster. The
only label that gets upgraded for now is kube_tag. All other labels
in the new cluster_template are ignored.

Change-Id: Icade1a70f160d5ec1c0e6f06ee642e29fe9b02ff
This commit is contained in:
Theodoros Tsioutsias 2019-10-03 14:55:54 +00:00
parent 113fdc44b2
commit 0ac4db955f
8 changed files with 351 additions and 32 deletions

View File

@ -140,8 +140,7 @@ class ActionsController(base.Controller):
action='cluster:upgrade')
new_cluster_template = api_utils.get_resource(
'ClusterTemplate',
cluster_upgrade_req.cluster_template)
'ClusterTemplate', cluster_upgrade_req.cluster_template)
if (cluster_upgrade_req.nodegroup == wtypes.Unset or
not cluster_upgrade_req.nodegroup):
@ -151,6 +150,13 @@ class ActionsController(base.Controller):
else:
nodegroup = objects.NodeGroup.get(
context, cluster.uuid, cluster_upgrade_req.nodegroup)
if (new_cluster_template.uuid != cluster.cluster_template_id
and not nodegroup.is_default):
reason = ("Nodegroup %s can be upgraded only to "
"match cluster's template (%s).")
reason = reason % (nodegroup.name,
cluster.cluster_template.name)
raise exception.InvalidClusterTemplateForUpgrade(reason=reason)
pecan.request.rpcapi.cluster_upgrade(
cluster,

View File

@ -462,3 +462,7 @@ class CreateMasterNodeGroup(NotSupported):
class NgOperationInProgress(Invalid):
message = _("Nodegroup %(nodegroup)s already has an operation in "
"progress.")
class InvalidClusterTemplateForUpgrade(Conflict):
message = _("Cluster Template is not valid for upgrade: %(reason)s")

View File

@ -259,6 +259,7 @@ class Handler(object):
# Resize cluster
try:
nodegroup.node_count = node_count
nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
nodegroup.save()
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING,
@ -273,6 +274,8 @@ class Handler(object):
cluster.status_reason = six.text_type(e)
cluster.save()
nodegroup.node_count = old_node_count
nodegroup.status = fields.ClusterStatus.UPDATE_FAILED
nodegroup.status_reason = six.text_type(e)
nodegroup.save()
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
@ -321,11 +324,15 @@ class Handler(object):
cluster_driver.upgrade_cluster(context, cluster, cluster_template,
max_batch_size, nodegroup, rollback)
cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
cluster.status_reason = None
except Exception as e:
cluster.status = fields.ClusterStatus.UPDATE_FAILED
cluster.status_reason = six.text_type(e)
cluster.save()
nodegroup.status = fields.ClusterStatus.UPDATE_FAILED
nodegroup.status_reason = six.text_type(e)
nodegroup.save()
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
cluster)
@ -334,5 +341,6 @@ class Handler(object):
raise e
raise
nodegroup.save()
cluster.save()
return cluster

View File

@ -327,14 +327,14 @@ class FedoraKubernetesDriver(KubernetesDriver):
def upgrade_cluster(self, context, cluster, cluster_template,
max_batch_size, nodegroup, scale_manager=None,
rollback=False):
"""For Train release we are going to upgrade only the kube tag"""
osc = clients.OpenStackClients(context)
_, heat_params, _ = (
self._extract_template_definition(context, cluster,
scale_manager=scale_manager))
# Extract labels/tags from cluster not template
# There are some version tags are not decalared in labels explicitly,
# so we need to get them from heat_params based on the labels given in
# new cluster template.
# Use this just to check that we are not downgrading.
heat_params = {}
if 'kube_tag' in nodegroup.labels:
heat_params['kube_tag'] = nodegroup.labels['kube_tag']
current_addons = {}
new_addons = {}
for label in cluster_template.labels:
@ -348,41 +348,63 @@ class FedoraKubernetesDriver(KubernetesDriver):
if (SV.from_pip_string(new_addons[label]) <
SV.from_pip_string(current_addons[label])):
raise exception.InvalidVersion(tag=label)
except exception.InvalidVersion:
raise
except Exception as e:
# NOTE(flwang): Different cloud providers may use different
# tag/version format which maybe not able to parse by
# SemanticVersion. For this case, let's just skip it.
LOG.debug("Failed to parse tag/version %s", str(e))
heat_params["master_image"] = cluster_template.image_id
heat_params["minion_image"] = cluster_template.image_id
# NOTE(flwang): Overwrite the kube_tag as well to avoid a server
# rebuild then do the k8s upgrade again, when both image id and
# kube_tag changed
heat_params["kube_tag"] = cluster_template.labels["kube_tag"]
heat_params["kube_version"] = cluster_template.labels["kube_tag"]
heat_params["master_kube_tag"] = cluster_template.labels["kube_tag"]
heat_params["minion_kube_tag"] = cluster_template.labels["kube_tag"]
heat_params["update_max_batch_size"] = max_batch_size
# Since the above check passed just
# hardcode what we want to send to heat.
# Rules: 1. No downgrade 2. Explicitly override 3. Merging based on set
# Update heat_params based on the data generated above
del heat_params['kube_service_account_private_key']
del heat_params['kube_service_account_key']
try:
heat_params = {
"kube_tag": cluster_template.labels["kube_tag"],
"kube_version": cluster_template.labels["kube_tag"],
"master_kube_tag": cluster_template.labels["kube_tag"],
"minion_kube_tag": cluster_template.labels["kube_tag"],
"update_max_batch_size": max_batch_size
}
except KeyError:
# Corner case but if the user defined an invalid CT just abort
reason = ("Cluster template %s does not contain a "
"valid kube_tag") % cluster_template.name
raise exception.InvalidClusterTemplateForUpgrade(reason=reason)
for label in new_addons:
heat_params[label] = cluster_template.labels[label]
stack_id = nodegroup.stack_id
if nodegroup is not None and not nodegroup.is_default:
heat_params['is_cluster_stack'] = False
# For now set the worker_role explicitly in order to
# make sure that the is_master condition fails.
heat_params['worker_role'] = nodegroup.role
cluster['cluster_template_id'] = cluster_template.uuid
new_labels = cluster.labels.copy()
new_labels.update(cluster_template.labels)
cluster['labels'] = new_labels
new_kube_tag = cluster_template.labels['kube_tag']
new_labels = nodegroup.labels.copy()
new_labels.update({'kube_tag': new_kube_tag})
# we need to set the whole dict to the object
# and not just update the existing labels. This
# is how obj_what_changed works.
nodegroup.labels = new_labels
if nodegroup.is_default:
cluster.cluster_template_id = cluster_template.uuid
cluster.labels = new_labels
if nodegroup.role == 'master':
other_default_ng = cluster.default_ng_worker
else:
other_default_ng = cluster.default_ng_master
other_default_ng.labels = new_labels
other_default_ng.save()
fields = {
'existing': True,
'parameters': heat_params,
'disable_rollback': not rollback
}
osc.heat().stacks.update(cluster.stack_id, **fields)
osc.heat().stacks.update(stack_id, **fields)
def get_nodegroup_extra_params(self, cluster, osc):
network = osc.heat().resources.get(cluster.stack_id, 'network')

View File

@ -20,10 +20,10 @@ from magnum.tests.unit.objects import utils as obj_utils
CONF = magnum.conf.CONF
class TestClusterActions(api_base.FunctionalTest):
class TestClusterResize(api_base.FunctionalTest):
def setUp(self):
super(TestClusterActions, self).setUp()
super(TestClusterResize, self).setUp()
self.cluster_obj = obj_utils.create_test_cluster(
self.context, name='cluster_example_A', node_count=3)
p = mock.patch.object(rpcapi.API, 'cluster_resize_async')
@ -123,3 +123,126 @@ class TestClusterActions(api_base.FunctionalTest):
"container-infra latest"},
expect_errors=True)
self.assertEqual(400, response.status_code)
class TestClusterUpgrade(api_base.FunctionalTest):
def setUp(self):
super(TestClusterUpgrade, self).setUp()
self.cluster_template1 = obj_utils.create_test_cluster_template(
self.context, uuid='94889766-e686-11e9-81b4-2a2ae2dbcce4',
name='test_1', id=1)
self.cluster_template2 = obj_utils.create_test_cluster_template(
self.context, uuid='94889aa4-e686-11e9-81b4-2a2ae2dbcce4',
name='test_2', id=2)
self.cluster_obj = obj_utils.create_test_cluster(
self.context, name='cluster_example_A',
cluster_template_id=self.cluster_template1.uuid)
self.nodegroup_obj = obj_utils.create_test_nodegroup(
self.context, name='test_ng', cluster_id=self.cluster_obj.uuid,
uuid='27e3153e-d5bf-4b7e-b517-fb518e17f34c',
project_id=self.cluster_obj.project_id,
is_default=False)
p = mock.patch.object(rpcapi.API, 'cluster_upgrade')
self.mock_cluster_resize = p.start()
self.mock_cluster_resize.side_effect = self._sim_rpc_cluster_upgrade
self.addCleanup(p.stop)
def _sim_rpc_cluster_upgrade(self, cluster, cluster_template, batch_size,
nodegroup):
return cluster
def test_upgrade(self):
cluster_upgrade_req = {
"cluster_template": "test_2"
}
response = self.post_json('/clusters/%s/actions/upgrade' %
self.cluster_obj.uuid,
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"})
self.assertEqual(202, response.status_code)
def test_upgrade_default_worker(self):
cluster_upgrade_req = {
"cluster_template": "test_2",
"nodegroup": self.cluster_obj.default_ng_worker.uuid
}
response = self.post_json('/clusters/%s/actions/upgrade' %
self.cluster_obj.uuid,
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"})
self.assertEqual(202, response.status_code)
def test_upgrade_default_master(self):
cluster_upgrade_req = {
"cluster_template": "test_2",
"nodegroup": self.cluster_obj.default_ng_master.uuid
}
response = self.post_json('/clusters/%s/actions/upgrade' %
self.cluster_obj.uuid,
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"})
self.assertEqual(202, response.status_code)
def test_upgrade_non_default_ng(self):
cluster_upgrade_req = {
"cluster_template": "test_1",
"nodegroup": self.nodegroup_obj.uuid
}
response = self.post_json('/clusters/%s/actions/upgrade' %
self.cluster_obj.uuid,
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"})
self.assertEqual(202, response.status_code)
def test_upgrade_cluster_not_found(self):
cluster_upgrade_req = {
"cluster_template": "test_2"
}
response = self.post_json('/clusters/not_there/actions/upgrade',
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"},
expect_errors=True)
self.assertEqual(404, response.status_code)
def test_upgrade_ct_not_found(self):
cluster_upgrade_req = {
"cluster_template": "not_there"
}
response = self.post_json('/clusters/%s/actions/upgrade' %
self.cluster_obj.uuid,
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"},
expect_errors=True)
self.assertEqual(404, response.status_code)
def test_upgrade_ng_not_found(self):
cluster_upgrade_req = {
"cluster_template": "test_2",
"nodegroup": "not_there"
}
response = self.post_json('/clusters/%s/actions/upgrade' %
self.cluster_obj.uuid,
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"},
expect_errors=True)
self.assertEqual(404, response.status_code)
def test_upgrade_non_default_ng_invalid_ct(self):
cluster_upgrade_req = {
"cluster_template": "test_2",
"nodegroup": self.nodegroup_obj.uuid
}
response = self.post_json('/clusters/%s/actions/upgrade' %
self.cluster_obj.uuid,
cluster_upgrade_req,
headers={"Openstack-Api-Version":
"container-infra latest"},
expect_errors=True)
self.assertEqual(409, response.status_code)

View File

@ -333,7 +333,8 @@ def get_nodegroups_for_cluster(**kw):
node_addresses=kw.get('node_addresses', ['172.17.2.4']),
node_count=kw.get('node_count', 3),
status=kw.get('worker_status', 'CREATE_COMPLETE'),
status_reason=kw.get('worker_reason', 'Completed successfully')
status_reason=kw.get('worker_reason', 'Completed successfully'),
image_id=kw.get('image_id', 'test_image')
)
# get masters nodegroup
@ -347,7 +348,8 @@ def get_nodegroups_for_cluster(**kw):
node_addresses=kw.get('master_addresses', ['172.17.2.18']),
node_count=kw.get('master_count', 3),
status=kw.get('master_status', 'CREATE_COMPLETE'),
status_reason=kw.get('master_reason', 'Completed successfully')
status_reason=kw.get('master_reason', 'Completed successfully'),
image_id=kw.get('image_id', 'test_image')
)
return {'master': master, 'worker': worker}

View File

@ -0,0 +1,142 @@
# Copyright (c) 2018 European Organization for Nuclear Research.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from mock import patch
from magnum.common import exception
from magnum.drivers.k8s_fedora_atomic_v1 import driver
from magnum.tests.unit.db import base
from magnum.tests.unit.objects import utils as obj_utils
class K8sFedoraAtomicV1DriverTest(base.DbTestCase):
def setUp(self):
super(K8sFedoraAtomicV1DriverTest, self).setUp()
self.driver = driver.Driver()
self.cluster_template = obj_utils.create_test_cluster_template(
self.context, uuid='94889aa4-e686-11e9-81b4-2a2ae2dbcce4',
name='test_2', id=2, labels={'kube_tag': 'v1.14.7'},
image_id='test-image2')
self.cluster_obj = obj_utils.create_test_cluster(
self.context, name='cluster_example_A', image_id='test-image1')
self.cluster_obj.refresh()
self.nodegroup_obj = obj_utils.create_test_nodegroup(
self.context, name='test_ng', cluster_id=self.cluster_obj.uuid,
uuid='27e3153e-d5bf-4b7e-b517-fb518e17f34c',
project_id=self.cluster_obj.project_id, is_default=False,
image_id='test-image1')
self.nodegroup_obj.refresh()
@patch('magnum.common.keystone.KeystoneClientV3')
@patch('magnum.common.clients.OpenStackClients')
def test_upgrade_default_worker_ng(self, mock_osc, mock_keystone):
mock_keystone.is_octavia_enabled.return_value = False
def_ng = self.cluster_obj.default_ng_worker
self.driver.upgrade_cluster(self.context, self.cluster_obj,
self.cluster_template, 1, def_ng)
# make sure that the kube_tag is reflected correctly to the cluster
# and the default nodegroups.
self.assertEqual(self.cluster_template.labels['kube_tag'],
self.cluster_obj.labels['kube_tag'])
self.assertEqual(self.cluster_template.labels['kube_tag'],
self.cluster_obj.default_ng_master.labels['kube_tag'])
self.assertEqual(self.cluster_template.labels['kube_tag'],
def_ng.labels['kube_tag'])
# make sure that the image from the cluster
# template is NOT set to the default nodegroups.
self.assertEqual('test-image1',
self.cluster_obj.default_ng_master.image_id)
self.assertEqual('test-image1', def_ng.image_id)
# check that the non-default nodegroup was not changed
self.assertNotIn('kube_tag', self.nodegroup_obj.labels)
self.assertEqual('test-image1', self.nodegroup_obj.image_id)
@patch('magnum.common.keystone.KeystoneClientV3')
@patch('magnum.common.clients.OpenStackClients')
def test_upgrade_default_master_ng(self, mock_osc, mock_keystone):
mock_keystone.is_octavia_enabled.return_value = False
def_ng = self.cluster_obj.default_ng_master
self.driver.upgrade_cluster(self.context, self.cluster_obj,
self.cluster_template, 1, def_ng)
# make sure that the kube_tag is reflected correctly to the cluster
# and the default nodegroups.
self.assertEqual(self.cluster_template.labels['kube_tag'],
self.cluster_obj.labels['kube_tag'])
self.assertEqual(self.cluster_template.labels['kube_tag'],
self.cluster_obj.default_ng_worker.labels['kube_tag'])
self.assertEqual(self.cluster_template.labels['kube_tag'],
def_ng.labels['kube_tag'])
# make sure that the image from the cluster
# template is NOT set to the default nodegroups.
self.assertEqual('test-image1',
self.cluster_obj.default_ng_worker.image_id)
self.assertEqual('test-image1', def_ng.image_id)
# check that the non-default nodegroup was not changed
self.assertNotIn('kube_tag', self.nodegroup_obj.labels)
self.assertEqual('test-image1', self.nodegroup_obj.image_id)
@patch('magnum.common.keystone.KeystoneClientV3')
@patch('magnum.common.clients.OpenStackClients')
def test_upgrade_non_default_ng(self, mock_osc, mock_keystone):
mock_keystone.is_octavia_enabled.return_value = False
self.driver.upgrade_cluster(self.context, self.cluster_obj,
self.cluster_template, 1,
self.nodegroup_obj)
# check that the cluster and default nodegroups were not changed
self.assertNotIn('kube_tag', self.cluster_obj.labels)
self.assertNotIn('kube_tag', self.cluster_obj.default_ng_master.labels)
self.assertNotIn('kube_tag', self.cluster_obj.default_ng_worker.labels)
# make sure that the image from the cluster template
# is not reflected to the default nodegroups.
self.assertEqual('test-image1',
self.cluster_obj.default_ng_master.image_id)
self.assertEqual('test-image1',
self.cluster_obj.default_ng_worker.image_id)
# check that the non-default nodegroup reflects the cluster template.
self.assertEqual(self.cluster_template.labels['kube_tag'],
self.nodegroup_obj.labels['kube_tag'])
self.assertEqual('test-image1',
self.nodegroup_obj.image_id)
@patch('magnum.common.keystone.KeystoneClientV3')
@patch('magnum.common.clients.OpenStackClients')
def test_downgrade_not_supported(self, mock_osc, mock_keystone):
# Scenario, a user creates a nodegroup with kube_tag
# greater that the one set in cluster's template. Check
# that downgrading is not supported.
self.nodegroup_obj.labels = {'kube_tag': 'v1.14.7'}
self.nodegroup_obj.save()
self.cluster_template.labels = {'kube_tag': 'v1.14.3'}
self.cluster_template.save()
mock_keystone.is_octavia_enabled.return_value = False
self.assertRaises(exception.InvalidVersion,
self.driver.upgrade_cluster, self.context,
self.cluster_obj, self.cluster_template, 1,
self.nodegroup_obj)
@patch('magnum.common.keystone.KeystoneClientV3')
@patch('magnum.common.clients.OpenStackClients')
def test_invalid_ct(self, mock_osc, mock_keystone):
# Scenario, a user creates a nodegroup with kube_tag
# greater that the one set in cluster's template. Check
# that downgrading is not supported.
self.cluster_template.labels = {}
self.cluster_template.save()
mock_keystone.is_octavia_enabled.return_value = False
self.assertRaises(exception.InvalidClusterTemplateForUpgrade,
self.driver.upgrade_cluster, self.context,
self.cluster_obj, self.cluster_template, 1,
self.nodegroup_obj)

View File

@ -0,0 +1,12 @@
---
features:
- |
Cluster upgrade API supports upgrading specific nodegroups
in kubernetes clusters. If a user chooses a default nodegroup to
be upgraded, then both of the default nodegroups will be upgraded
since they are in one stack. For non-default nodegroups users are
allowed to use only the cluster template already set in the
cluster. This means that the cluster (default nodegroups) has to
be upgraded on the first hand. For now, the only label that is
taken into consideration during upgrades is the kube_tag. All
other labels are ignored.