Implement bay smart scale down

We currently allow Magnum to scale down bays by removing nodes from the Bay's ResourceGroup by updating the heat stack that created the bay. The problem with this approach is that Heat decides which node to delete, and all containers on that node will also be deleted. This patch make the selection process smarter: we ask Heat to delete Bay node(s) that have NO CONTAINERS on them. Change-Id: Ic4d754dfe358dd8aad020bc3b80a4c9f3f1525fc Implements: blueprint magnum-smart-bay-scale-down
2015-06-28 23:52:55 -04:00 · 2015-06-28 23:52:55 -04:00 · 3e6816a575
parent 2d153fc83d
commit 3e6816a575
6 changed files with 368 additions and 29 deletions
--- a/magnum/conductor/handlers/bay_conductor.py
+++ b/magnum/conductor/handlers/bay_conductor.py
@ -21,6 +21,7 @@ from oslo_service import loopingcall
 from magnum.common import clients
 from magnum.common import exception
 from magnum.common import short_id
+from magnum.conductor import scale_manager
 from magnum.conductor.template_definition import TemplateDefinition as TDef
 from magnum.conductor import utils as conductor_utils
 from magnum.i18n import _
@ -55,13 +56,14 @@ cfg.CONF.register_opts(bay_heat_opts, group='bay_heat')
 LOG = logging.getLogger(__name__)


-def _extract_template_definition(context, bay):
+def _extract_template_definition(context, bay, scale_manager=None):
    baymodel = conductor_utils.retrieve_baymodel(context, bay)
    cluster_distro = baymodel.cluster_distro
    cluster_coe = baymodel.coe
    definition = TDef.get_template_definition('vm', cluster_distro,
                                              cluster_coe)
-    return definition.extract_definition(baymodel, bay)
+    return definition.extract_definition(baymodel, bay,
+                                         scale_manager=scale_manager)


 def _create_stack(context, osc, bay, bay_create_timeout):
@ -90,8 +92,9 @@ def _create_stack(context, osc, bay, bay_create_timeout):
    return created_stack


-def _update_stack(context, osc, bay):
-    template_path, heat_params = _extract_template_definition(context, bay)
+def _update_stack(context, osc, bay, scale_manager=None):
+    template_path, heat_params = _extract_template_definition(
+        context, bay, scale_manager=scale_manager)

    tpl_files, template = template_utils.get_template_contents(template_path)
    fields = {
@ -152,8 +155,9 @@ class Handler(object):
        delta = set(bay.obj_what_changed())
        if 'node_count' in delta:
            delta.remove('node_count')
+            manager = scale_manager.ScaleManager(context, osc, bay)

-            _update_stack(context, osc, bay)
+            _update_stack(context, osc, bay, manager)
            self._poll_and_check(osc, bay)

        if delta:
--- a/magnum/conductor/scale_manager.py
+++ b/magnum/conductor/scale_manager.py
@ -0,0 +1,78 @@
+# Copyright 2015 Huawei Technologies Co.,LTD.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from oslo_log import log as logging
+
+from magnum.common import exception
+from magnum.conductor import k8s_api as k8s
+from magnum.i18n import _
+from magnum.i18n import _LI
+from magnum.i18n import _LW
+from magnum import objects
+
+
+LOG = logging.getLogger(__name__)
+
+
+class ScaleManager(object):
+
+    def __init__(self, context, osclient, bay):
+        self.context = context
+        self.osclient = osclient
+        self.old_bay = objects.Bay.get_by_uuid(context, bay.uuid)
+        self.new_bay = bay
+
+    def get_removal_nodes(self, hosts_output):
+        if not self._is_scale_down():
+            return list()
+
+        bay = self.new_bay
+        stack = self.osclient.heat().stacks.get(bay.stack_id)
+        hosts = hosts_output.get_output_value(stack)
+        if hosts is None:
+            raise exception.MagnumException(_(
+                "output key '%(output_key)s' is missing from stack "
+                "%(stack_id)s") % {'output_key': hosts_output.heat_output,
+                                   'stack_id': stack.id})
+
+        hosts_no_container = list(hosts)
+        k8s_api = k8s.create_k8s_api(self.context, bay)
+        for pod in k8s_api.listPod().items:
+            host = pod.spec.host
+            if host in hosts_no_container:
+                hosts_no_container.remove(host)
+
+        LOG.debug('List of hosts that has no container: %s' %
+                  str(hosts_no_container))
+
+        num_of_removal = self._get_num_of_removal()
+        if len(hosts_no_container) < num_of_removal:
+            LOG.warning(_LW(
+                "About to remove %(num_removal)d nodes, which is larger than "
+                "the number of empty nodes (%(num_empty)d). %(num_non_empty)d "
+                "non-empty nodes will be removed."), {
+                    'num_removal': num_of_removal,
+                    'num_empty': len(hosts_no_container),
+                    'num_non_empty': num_of_removal - len(hosts_no_container)})
+
+        hosts_to_remove = hosts_no_container[0:num_of_removal]
+        LOG.info(_LI('Require removal of hosts: %s') % hosts_to_remove)
+
+        return hosts_to_remove
+
+    def _is_scale_down(self):
+        return self.new_bay.node_count < self.old_bay.node_count
+
+    def _get_num_of_removal(self):
+        return self.old_bay.node_count - self.new_bay.node_count
--- a/magnum/conductor/template_definition.py
+++ b/magnum/conductor/template_definition.py
@ -15,14 +15,18 @@ import abc
 import uuid

 from oslo_config import cfg
+from oslo_log import log as logging
 from pkg_resources import iter_entry_points
 import requests
 import six

 from magnum.common import exception
-from magnum.i18n import _
-
 from magnum.common import paths
+from magnum.i18n import _
+from magnum.i18n import _LW
+
+
+LOG = logging.getLogger(__name__)


 template_def_opts = [
@ -120,10 +124,20 @@ class OutputMapping(object):
        if self.bay_attr is None:
            return

+        output_value = self.get_output_value(stack)
+        if output_value is not None:
+            setattr(bay, self.bay_attr, output_value)
+
+    def matched(self, output_key):
+        return self.heat_output == output_key
+
+    def get_output_value(self, stack):
        for output in stack.outputs:
            if output['output_key'] == self.heat_output:
-                setattr(bay, self.bay_attr, output['output_value'])
-                break
+                return output['output_value']
+
+        LOG.warning(_LW('stack does not have output_key %s'), self.heat_output)
+        return None


@six.add_metaclass(abc.ABCMeta)
@ -246,7 +260,14 @@ class TemplateDefinition(object):
        output = OutputMapping(*args, **kwargs)
        self.output_mappings.append(output)

-    def get_params(self, baymodel, bay, extra_params=None):
+    def get_output(self, *args, **kwargs):
+        for output in self.output_mappings:
+            if output.matched(*args, **kwargs):
+                return output
+
+        return None
+
+    def get_params(self, baymodel, bay, **kwargs):
        """Pulls template parameters from Baymodel and Bay.

        :param baymodel: Baymodel to pull template parameters from
@ -260,8 +281,8 @@ class TemplateDefinition(object):
        for mapping in self.param_mappings:
            mapping.set_param(template_params, baymodel, bay)

-        if extra_params:
-            template_params.update(extra_params)
+        if 'extra_params' in kwargs:
+            template_params.update(kwargs.get('extra_params'))

        return template_params

@ -273,9 +294,8 @@ class TemplateDefinition(object):
    def template_path(self):
        pass

-    def extract_definition(self, baymodel, bay, extra_params=None):
-        return self.template_path, self.get_params(baymodel, bay,
-                                                   extra_params=extra_params)
+    def extract_definition(self, baymodel, bay, **kwargs):
+        return self.template_path, self.get_params(baymodel, bay, **kwargs)


 class BaseTemplateDefinition(TemplateDefinition):
@ -328,6 +348,18 @@ class AtomicK8sTemplateDefinition(BaseTemplateDefinition):
        self.add_output('kube_minions_external',
                        bay_attr='node_addresses')

+    def get_params(self, baymodel, bay, **kwargs):
+        extra_params = kwargs.pop('extra_params', {})
+        scale_mgr = kwargs.pop('scale_manager', None)
+        if scale_mgr:
+            hosts = self.get_output('kube_minions')
+            extra_params['minions_to_remove'] = (
+                scale_mgr.get_removal_nodes(hosts))
+
+        return super(AtomicK8sTemplateDefinition,
+                     self).get_params(baymodel, bay, extra_params=extra_params,
+                                      **kwargs)
+
    @property
    def template_path(self):
        return cfg.CONF.bay.k8s_atomic_template_path
@ -353,14 +385,13 @@ class CoreOSK8sTemplateDefinition(AtomicK8sTemplateDefinition):
            token = uuid.uuid4().hex
        return token

-    def get_params(self, baymodel, bay, extra_params=None):
-        if not extra_params:
-            extra_params = dict()
-
+    def get_params(self, baymodel, bay, **kwargs):
+        extra_params = kwargs.pop('extra_params', {})
        extra_params['token'] = self.get_token()

        return super(CoreOSK8sTemplateDefinition,
-                     self).get_params(baymodel, bay, extra_params=extra_params)
+                     self).get_params(baymodel, bay, extra_params=extra_params,
+                                      **kwargs)

    @property
    def template_path(self):
@ -410,14 +441,13 @@ class AtomicSwarmTemplateDefinition(BaseTemplateDefinition):

        return discovery_url

-    def get_params(self, baymodel, bay, extra_params=None):
-        if not extra_params:
-            extra_params = dict()
-
+    def get_params(self, baymodel, bay, **kwargs):
+        extra_params = kwargs.pop('extra_params', {})
        extra_params['discovery_url'] = self.get_discovery_url(bay)

        return super(AtomicSwarmTemplateDefinition,
-                     self).get_params(baymodel, bay, extra_params=extra_params)
+                     self).get_params(baymodel, bay, extra_params=extra_params,
+                                      **kwargs)

    @property
    def template_path(self):
--- a/magnum/tests/unit/conductor/handlers/test_bay_conductor.py
+++ b/magnum/tests/unit/conductor/handlers/test_bay_conductor.py
@ -603,12 +603,14 @@ class TestHandler(db_base.DbTestCase):
        self.bay = objects.Bay(self.context, **bay_dict)
        self.bay.create()

+    @patch('magnum.conductor.scale_manager.ScaleManager')
    @patch('magnum.conductor.handlers.bay_conductor.Handler._poll_and_check')
    @patch('magnum.conductor.handlers.bay_conductor._update_stack')
    @patch('magnum.common.clients.OpenStackClients')
    def test_update_node_count_success(
            self, mock_openstack_client_class,
-            mock_update_stack, mock_poll_and_check):
+            mock_update_stack, mock_poll_and_check,
+            mock_scale_manager):
        mock_heat_stack = mock.MagicMock()
        mock_heat_stack.stack_status = bay_status.CREATE_COMPLETE
        mock_heat_client = mock.MagicMock()
@ -619,9 +621,9 @@ class TestHandler(db_base.DbTestCase):
        self.bay.node_count = 2
        self.handler.bay_update(self.context, self.bay)

-        mock_update_stack.assert_called_once_with(self.context,
-                                                  mock_openstack_client,
-                                                  self.bay)
+        mock_update_stack.assert_called_once_with(
+            self.context, mock_openstack_client, self.bay,
+            mock_scale_manager.return_value)
        bay = objects.Bay.get(self.context, self.bay.uuid)
        self.assertEqual(bay.node_count, 2)

--- a/magnum/tests/unit/conductor/test_scale_manager.py
+++ b/magnum/tests/unit/conductor/test_scale_manager.py
@ -0,0 +1,175 @@
+# Copyright 2015 Huawei Technologies Co.,LTD.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import mock
+
+from magnum.common import exception
+from magnum.conductor import scale_manager
+from magnum.tests import base
+
+
+class TestScaleManager(base.TestCase):
+
+    def _test_get_removal_nodes(
+            self, mock_create_k8s_api, mock_get_num_of_removal,
+            mock_is_scale_down, mock_get_by_uuid, is_scale_down,
+            num_of_removal, all_hosts, pod_hosts, expected_removal_hosts):
+
+        mock_is_scale_down.return_value = is_scale_down
+        mock_get_num_of_removal.return_value = num_of_removal
+
+        pods = list()
+        for h in pod_hosts:
+            pod = mock.MagicMock()
+            pod.spec.host = h
+            pods.append(pod)
+
+        mock_k8s_api = mock.MagicMock()
+        mock_k8s_api.listPod.return_value.items = pods
+        mock_create_k8s_api.return_value = mock_k8s_api
+
+        mock_heat_output = mock.MagicMock()
+        mock_heat_output.get_output_value.return_value = all_hosts
+
+        mock_stack = mock.MagicMock()
+        mock_heat_client = mock.MagicMock()
+        mock_osc = mock.MagicMock()
+        mock_heat_client.stacks.get.return_value = mock_stack
+        mock_osc.heat.return_value = mock_heat_client
+
+        mock_context = mock.MagicMock()
+        mock_bay = mock.MagicMock()
+        scale_mgr = scale_manager.ScaleManager(mock_context, mock_osc,
+                                               mock_bay)
+
+        if expected_removal_hosts is None:
+            self.assertRaises(exception.MagnumException,
+                              scale_mgr.get_removal_nodes, mock_heat_output)
+        else:
+            removal_hosts = scale_mgr.get_removal_nodes(mock_heat_output)
+            self.assertEqual(removal_hosts, expected_removal_hosts)
+
+    @mock.patch('magnum.objects.Bay.get_by_uuid')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager.'
+                '_get_num_of_removal')
+    @mock.patch('magnum.conductor.k8s_api.create_k8s_api')
+    def test_get_removal_nodes_no_pod(
+            self, mock_create_k8s_api, mock_get_num_of_removal,
+            mock_is_scale_down, mock_get_by_uuid):
+
+        is_scale_down = True
+        num_of_removal = 1
+        hosts = ['10.0.0.3', '10.0.0.4']
+        pods = []
+        expected_removal_hosts = ['10.0.0.3']
+        self._test_get_removal_nodes(
+            mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
+            mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
+            expected_removal_hosts)
+
+    @mock.patch('magnum.objects.Bay.get_by_uuid')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager.'
+                '_get_num_of_removal')
+    @mock.patch('magnum.conductor.k8s_api.create_k8s_api')
+    def test_get_removal_nodes_one_pod(
+            self, mock_create_k8s_api, mock_get_num_of_removal,
+            mock_is_scale_down, mock_get_by_uuid):
+
+        is_scale_down = True
+        num_of_removal = 1
+        hosts = ['10.0.0.3', '10.0.0.4']
+        pods = ['10.0.0.3']
+        expected_removal_hosts = ['10.0.0.4']
+        self._test_get_removal_nodes(
+            mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
+            mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
+            expected_removal_hosts)
+
+    @mock.patch('magnum.objects.Bay.get_by_uuid')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager.'
+                '_get_num_of_removal')
+    @mock.patch('magnum.conductor.k8s_api.create_k8s_api')
+    def test_get_removal_nodes_two_pods(
+            self, mock_create_k8s_api, mock_get_num_of_removal,
+            mock_is_scale_down, mock_get_by_uuid):
+
+        is_scale_down = True
+        num_of_removal = 1
+        hosts = ['10.0.0.3', '10.0.0.4']
+        pods = ['10.0.0.3', '10.0.0.4']
+        expected_removal_hosts = []
+        self._test_get_removal_nodes(
+            mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
+            mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
+            expected_removal_hosts)
+
+    @mock.patch('magnum.objects.Bay.get_by_uuid')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager.'
+                '_get_num_of_removal')
+    @mock.patch('magnum.conductor.k8s_api.create_k8s_api')
+    def test_get_removal_nodes_three_pods(
+            self, mock_create_k8s_api, mock_get_num_of_removal,
+            mock_is_scale_down, mock_get_by_uuid):
+
+        is_scale_down = True
+        num_of_removal = 1
+        hosts = ['10.0.0.3', '10.0.0.4']
+        pods = ['10.0.0.3', '10.0.0.4', '10.0.0.5']
+        expected_removal_hosts = []
+        self._test_get_removal_nodes(
+            mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
+            mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
+            expected_removal_hosts)
+
+    @mock.patch('magnum.objects.Bay.get_by_uuid')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager.'
+                '_get_num_of_removal')
+    @mock.patch('magnum.conductor.k8s_api.create_k8s_api')
+    def test_get_removal_nodes_scale_up(
+            self, mock_create_k8s_api, mock_get_num_of_removal,
+            mock_is_scale_down, mock_get_by_uuid):
+
+        is_scale_down = False
+        num_of_removal = -1
+        hosts = ['10.0.0.3', '10.0.0.4']
+        pods = []
+        expected_removal_hosts = []
+        self._test_get_removal_nodes(
+            mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
+            mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
+            expected_removal_hosts)
+
+    @mock.patch('magnum.objects.Bay.get_by_uuid')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
+    @mock.patch('magnum.conductor.scale_manager.ScaleManager.'
+                '_get_num_of_removal')
+    @mock.patch('magnum.conductor.k8s_api.create_k8s_api')
+    def test_get_removal_nodes_with_none_hosts(
+            self, mock_create_k8s_api, mock_get_num_of_removal,
+            mock_is_scale_down, mock_get_by_uuid):
+
+        is_scale_down = True
+        num_of_removal = 1
+        hosts = None
+        pods = []
+        expected_removal_hosts = None
+        self._test_get_removal_nodes(
+            mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
+            mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
+            expected_removal_hosts)
--- a/magnum/tests/unit/conductor/test_template_definition.py
+++ b/magnum/tests/unit/conductor/test_template_definition.py
@ -98,6 +98,27 @@ class TemplateDefinitionTestCase(base.TestCase):
        self.assertRaises(exception.RequiredParameterNotProvided,
                          param.set_param, {}, mock_baymodel, None)

+    @mock.patch('magnum.conductor.template_definition.BaseTemplateDefinition'
+                '.get_params')
+    @mock.patch('magnum.conductor.template_definition.TemplateDefinition'
+                '.get_output')
+    def test_k8s_get_params(self, mock_get_output, mock_get_params):
+        mock_baymodel = mock.MagicMock()
+        mock_bay = mock.MagicMock()
+        mock_scale_manager = mock.MagicMock()
+
+        removal_nodes = ['node1', 'node2']
+        mock_scale_manager.get_removal_nodes.return_value = removal_nodes
+        k8s_def = tdef.AtomicK8sTemplateDefinition()
+
+        k8s_def.get_params(mock_baymodel, mock_bay,
+                           scale_manager=mock_scale_manager)
+
+        expected_kwargs = {'extra_params': {
+            'minions_to_remove': removal_nodes}}
+        mock_get_params.assert_called_once_with(mock_baymodel, mock_bay,
+                                                **expected_kwargs)
+
    @mock.patch('requests.post')
    def test_swarm_discovery_url_public_token(self, mock_post):

@ -155,3 +176,32 @@ class TemplateDefinitionTestCase(base.TestCase):
        actual_url = swarm_def.get_discovery_url(mock_bay)

        self.assertEqual(mock_bay.discovery_url, actual_url)
+
+    def test_output_mapping(self):
+        heat_outputs = [
+            {
+                "output_value": "value1",
+                "description": "No description given",
+                "output_key": "key1"
+            },
+            {
+                "output_value": ["value2", "value3"],
+                "description": "No description given",
+                "output_key": "key2"
+            }
+        ]
+
+        mock_stack = mock.MagicMock()
+        mock_stack.outputs = heat_outputs
+
+        output = tdef.OutputMapping('key1')
+        value = output.get_output_value(mock_stack)
+        self.assertEqual(value, 'value1')
+
+        output = tdef.OutputMapping('key2')
+        value = output.get_output_value(mock_stack)
+        self.assertEqual(value, ["value2", "value3"])
+
+        output = tdef.OutputMapping('key3')
+        value = output.get_output_value(mock_stack)
+        self.assertIsNone(value)