Implement bay smart scale down

We currently allow Magnum to scale down bays by removing nodes from
the Bay's ResourceGroup by updating the heat stack that created the bay.
The problem with this approach is that Heat decides which node to delete,
and all containers on that node will also be deleted. This patch make
the selection process smarter: we ask Heat to delete Bay node(s)
that have NO CONTAINERS on them.

Change-Id: Ic4d754dfe358dd8aad020bc3b80a4c9f3f1525fc
Implements: blueprint magnum-smart-bay-scale-down
This commit is contained in:
Hongbin Lu 2015-06-28 23:52:55 -04:00
parent 2d153fc83d
commit 3e6816a575
6 changed files with 368 additions and 29 deletions

View File

@ -21,6 +21,7 @@ from oslo_service import loopingcall
from magnum.common import clients
from magnum.common import exception
from magnum.common import short_id
from magnum.conductor import scale_manager
from magnum.conductor.template_definition import TemplateDefinition as TDef
from magnum.conductor import utils as conductor_utils
from magnum.i18n import _
@ -55,13 +56,14 @@ cfg.CONF.register_opts(bay_heat_opts, group='bay_heat')
LOG = logging.getLogger(__name__)
def _extract_template_definition(context, bay):
def _extract_template_definition(context, bay, scale_manager=None):
baymodel = conductor_utils.retrieve_baymodel(context, bay)
cluster_distro = baymodel.cluster_distro
cluster_coe = baymodel.coe
definition = TDef.get_template_definition('vm', cluster_distro,
cluster_coe)
return definition.extract_definition(baymodel, bay)
return definition.extract_definition(baymodel, bay,
scale_manager=scale_manager)
def _create_stack(context, osc, bay, bay_create_timeout):
@ -90,8 +92,9 @@ def _create_stack(context, osc, bay, bay_create_timeout):
return created_stack
def _update_stack(context, osc, bay):
template_path, heat_params = _extract_template_definition(context, bay)
def _update_stack(context, osc, bay, scale_manager=None):
template_path, heat_params = _extract_template_definition(
context, bay, scale_manager=scale_manager)
tpl_files, template = template_utils.get_template_contents(template_path)
fields = {
@ -152,8 +155,9 @@ class Handler(object):
delta = set(bay.obj_what_changed())
if 'node_count' in delta:
delta.remove('node_count')
manager = scale_manager.ScaleManager(context, osc, bay)
_update_stack(context, osc, bay)
_update_stack(context, osc, bay, manager)
self._poll_and_check(osc, bay)
if delta:

View File

@ -0,0 +1,78 @@
# Copyright 2015 Huawei Technologies Co.,LTD.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
from magnum.common import exception
from magnum.conductor import k8s_api as k8s
from magnum.i18n import _
from magnum.i18n import _LI
from magnum.i18n import _LW
from magnum import objects
LOG = logging.getLogger(__name__)
class ScaleManager(object):
def __init__(self, context, osclient, bay):
self.context = context
self.osclient = osclient
self.old_bay = objects.Bay.get_by_uuid(context, bay.uuid)
self.new_bay = bay
def get_removal_nodes(self, hosts_output):
if not self._is_scale_down():
return list()
bay = self.new_bay
stack = self.osclient.heat().stacks.get(bay.stack_id)
hosts = hosts_output.get_output_value(stack)
if hosts is None:
raise exception.MagnumException(_(
"output key '%(output_key)s' is missing from stack "
"%(stack_id)s") % {'output_key': hosts_output.heat_output,
'stack_id': stack.id})
hosts_no_container = list(hosts)
k8s_api = k8s.create_k8s_api(self.context, bay)
for pod in k8s_api.listPod().items:
host = pod.spec.host
if host in hosts_no_container:
hosts_no_container.remove(host)
LOG.debug('List of hosts that has no container: %s' %
str(hosts_no_container))
num_of_removal = self._get_num_of_removal()
if len(hosts_no_container) < num_of_removal:
LOG.warning(_LW(
"About to remove %(num_removal)d nodes, which is larger than "
"the number of empty nodes (%(num_empty)d). %(num_non_empty)d "
"non-empty nodes will be removed."), {
'num_removal': num_of_removal,
'num_empty': len(hosts_no_container),
'num_non_empty': num_of_removal - len(hosts_no_container)})
hosts_to_remove = hosts_no_container[0:num_of_removal]
LOG.info(_LI('Require removal of hosts: %s') % hosts_to_remove)
return hosts_to_remove
def _is_scale_down(self):
return self.new_bay.node_count < self.old_bay.node_count
def _get_num_of_removal(self):
return self.old_bay.node_count - self.new_bay.node_count

View File

@ -15,14 +15,18 @@ import abc
import uuid
from oslo_config import cfg
from oslo_log import log as logging
from pkg_resources import iter_entry_points
import requests
import six
from magnum.common import exception
from magnum.i18n import _
from magnum.common import paths
from magnum.i18n import _
from magnum.i18n import _LW
LOG = logging.getLogger(__name__)
template_def_opts = [
@ -120,10 +124,20 @@ class OutputMapping(object):
if self.bay_attr is None:
return
output_value = self.get_output_value(stack)
if output_value is not None:
setattr(bay, self.bay_attr, output_value)
def matched(self, output_key):
return self.heat_output == output_key
def get_output_value(self, stack):
for output in stack.outputs:
if output['output_key'] == self.heat_output:
setattr(bay, self.bay_attr, output['output_value'])
break
return output['output_value']
LOG.warning(_LW('stack does not have output_key %s'), self.heat_output)
return None
@six.add_metaclass(abc.ABCMeta)
@ -246,7 +260,14 @@ class TemplateDefinition(object):
output = OutputMapping(*args, **kwargs)
self.output_mappings.append(output)
def get_params(self, baymodel, bay, extra_params=None):
def get_output(self, *args, **kwargs):
for output in self.output_mappings:
if output.matched(*args, **kwargs):
return output
return None
def get_params(self, baymodel, bay, **kwargs):
"""Pulls template parameters from Baymodel and Bay.
:param baymodel: Baymodel to pull template parameters from
@ -260,8 +281,8 @@ class TemplateDefinition(object):
for mapping in self.param_mappings:
mapping.set_param(template_params, baymodel, bay)
if extra_params:
template_params.update(extra_params)
if 'extra_params' in kwargs:
template_params.update(kwargs.get('extra_params'))
return template_params
@ -273,9 +294,8 @@ class TemplateDefinition(object):
def template_path(self):
pass
def extract_definition(self, baymodel, bay, extra_params=None):
return self.template_path, self.get_params(baymodel, bay,
extra_params=extra_params)
def extract_definition(self, baymodel, bay, **kwargs):
return self.template_path, self.get_params(baymodel, bay, **kwargs)
class BaseTemplateDefinition(TemplateDefinition):
@ -328,6 +348,18 @@ class AtomicK8sTemplateDefinition(BaseTemplateDefinition):
self.add_output('kube_minions_external',
bay_attr='node_addresses')
def get_params(self, baymodel, bay, **kwargs):
extra_params = kwargs.pop('extra_params', {})
scale_mgr = kwargs.pop('scale_manager', None)
if scale_mgr:
hosts = self.get_output('kube_minions')
extra_params['minions_to_remove'] = (
scale_mgr.get_removal_nodes(hosts))
return super(AtomicK8sTemplateDefinition,
self).get_params(baymodel, bay, extra_params=extra_params,
**kwargs)
@property
def template_path(self):
return cfg.CONF.bay.k8s_atomic_template_path
@ -353,14 +385,13 @@ class CoreOSK8sTemplateDefinition(AtomicK8sTemplateDefinition):
token = uuid.uuid4().hex
return token
def get_params(self, baymodel, bay, extra_params=None):
if not extra_params:
extra_params = dict()
def get_params(self, baymodel, bay, **kwargs):
extra_params = kwargs.pop('extra_params', {})
extra_params['token'] = self.get_token()
return super(CoreOSK8sTemplateDefinition,
self).get_params(baymodel, bay, extra_params=extra_params)
self).get_params(baymodel, bay, extra_params=extra_params,
**kwargs)
@property
def template_path(self):
@ -410,14 +441,13 @@ class AtomicSwarmTemplateDefinition(BaseTemplateDefinition):
return discovery_url
def get_params(self, baymodel, bay, extra_params=None):
if not extra_params:
extra_params = dict()
def get_params(self, baymodel, bay, **kwargs):
extra_params = kwargs.pop('extra_params', {})
extra_params['discovery_url'] = self.get_discovery_url(bay)
return super(AtomicSwarmTemplateDefinition,
self).get_params(baymodel, bay, extra_params=extra_params)
self).get_params(baymodel, bay, extra_params=extra_params,
**kwargs)
@property
def template_path(self):

View File

@ -603,12 +603,14 @@ class TestHandler(db_base.DbTestCase):
self.bay = objects.Bay(self.context, **bay_dict)
self.bay.create()
@patch('magnum.conductor.scale_manager.ScaleManager')
@patch('magnum.conductor.handlers.bay_conductor.Handler._poll_and_check')
@patch('magnum.conductor.handlers.bay_conductor._update_stack')
@patch('magnum.common.clients.OpenStackClients')
def test_update_node_count_success(
self, mock_openstack_client_class,
mock_update_stack, mock_poll_and_check):
mock_update_stack, mock_poll_and_check,
mock_scale_manager):
mock_heat_stack = mock.MagicMock()
mock_heat_stack.stack_status = bay_status.CREATE_COMPLETE
mock_heat_client = mock.MagicMock()
@ -619,9 +621,9 @@ class TestHandler(db_base.DbTestCase):
self.bay.node_count = 2
self.handler.bay_update(self.context, self.bay)
mock_update_stack.assert_called_once_with(self.context,
mock_openstack_client,
self.bay)
mock_update_stack.assert_called_once_with(
self.context, mock_openstack_client, self.bay,
mock_scale_manager.return_value)
bay = objects.Bay.get(self.context, self.bay.uuid)
self.assertEqual(bay.node_count, 2)

View File

@ -0,0 +1,175 @@
# Copyright 2015 Huawei Technologies Co.,LTD.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import mock
from magnum.common import exception
from magnum.conductor import scale_manager
from magnum.tests import base
class TestScaleManager(base.TestCase):
def _test_get_removal_nodes(
self, mock_create_k8s_api, mock_get_num_of_removal,
mock_is_scale_down, mock_get_by_uuid, is_scale_down,
num_of_removal, all_hosts, pod_hosts, expected_removal_hosts):
mock_is_scale_down.return_value = is_scale_down
mock_get_num_of_removal.return_value = num_of_removal
pods = list()
for h in pod_hosts:
pod = mock.MagicMock()
pod.spec.host = h
pods.append(pod)
mock_k8s_api = mock.MagicMock()
mock_k8s_api.listPod.return_value.items = pods
mock_create_k8s_api.return_value = mock_k8s_api
mock_heat_output = mock.MagicMock()
mock_heat_output.get_output_value.return_value = all_hosts
mock_stack = mock.MagicMock()
mock_heat_client = mock.MagicMock()
mock_osc = mock.MagicMock()
mock_heat_client.stacks.get.return_value = mock_stack
mock_osc.heat.return_value = mock_heat_client
mock_context = mock.MagicMock()
mock_bay = mock.MagicMock()
scale_mgr = scale_manager.ScaleManager(mock_context, mock_osc,
mock_bay)
if expected_removal_hosts is None:
self.assertRaises(exception.MagnumException,
scale_mgr.get_removal_nodes, mock_heat_output)
else:
removal_hosts = scale_mgr.get_removal_nodes(mock_heat_output)
self.assertEqual(removal_hosts, expected_removal_hosts)
@mock.patch('magnum.objects.Bay.get_by_uuid')
@mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
@mock.patch('magnum.conductor.scale_manager.ScaleManager.'
'_get_num_of_removal')
@mock.patch('magnum.conductor.k8s_api.create_k8s_api')
def test_get_removal_nodes_no_pod(
self, mock_create_k8s_api, mock_get_num_of_removal,
mock_is_scale_down, mock_get_by_uuid):
is_scale_down = True
num_of_removal = 1
hosts = ['10.0.0.3', '10.0.0.4']
pods = []
expected_removal_hosts = ['10.0.0.3']
self._test_get_removal_nodes(
mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
expected_removal_hosts)
@mock.patch('magnum.objects.Bay.get_by_uuid')
@mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
@mock.patch('magnum.conductor.scale_manager.ScaleManager.'
'_get_num_of_removal')
@mock.patch('magnum.conductor.k8s_api.create_k8s_api')
def test_get_removal_nodes_one_pod(
self, mock_create_k8s_api, mock_get_num_of_removal,
mock_is_scale_down, mock_get_by_uuid):
is_scale_down = True
num_of_removal = 1
hosts = ['10.0.0.3', '10.0.0.4']
pods = ['10.0.0.3']
expected_removal_hosts = ['10.0.0.4']
self._test_get_removal_nodes(
mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
expected_removal_hosts)
@mock.patch('magnum.objects.Bay.get_by_uuid')
@mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
@mock.patch('magnum.conductor.scale_manager.ScaleManager.'
'_get_num_of_removal')
@mock.patch('magnum.conductor.k8s_api.create_k8s_api')
def test_get_removal_nodes_two_pods(
self, mock_create_k8s_api, mock_get_num_of_removal,
mock_is_scale_down, mock_get_by_uuid):
is_scale_down = True
num_of_removal = 1
hosts = ['10.0.0.3', '10.0.0.4']
pods = ['10.0.0.3', '10.0.0.4']
expected_removal_hosts = []
self._test_get_removal_nodes(
mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
expected_removal_hosts)
@mock.patch('magnum.objects.Bay.get_by_uuid')
@mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
@mock.patch('magnum.conductor.scale_manager.ScaleManager.'
'_get_num_of_removal')
@mock.patch('magnum.conductor.k8s_api.create_k8s_api')
def test_get_removal_nodes_three_pods(
self, mock_create_k8s_api, mock_get_num_of_removal,
mock_is_scale_down, mock_get_by_uuid):
is_scale_down = True
num_of_removal = 1
hosts = ['10.0.0.3', '10.0.0.4']
pods = ['10.0.0.3', '10.0.0.4', '10.0.0.5']
expected_removal_hosts = []
self._test_get_removal_nodes(
mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
expected_removal_hosts)
@mock.patch('magnum.objects.Bay.get_by_uuid')
@mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
@mock.patch('magnum.conductor.scale_manager.ScaleManager.'
'_get_num_of_removal')
@mock.patch('magnum.conductor.k8s_api.create_k8s_api')
def test_get_removal_nodes_scale_up(
self, mock_create_k8s_api, mock_get_num_of_removal,
mock_is_scale_down, mock_get_by_uuid):
is_scale_down = False
num_of_removal = -1
hosts = ['10.0.0.3', '10.0.0.4']
pods = []
expected_removal_hosts = []
self._test_get_removal_nodes(
mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
expected_removal_hosts)
@mock.patch('magnum.objects.Bay.get_by_uuid')
@mock.patch('magnum.conductor.scale_manager.ScaleManager._is_scale_down')
@mock.patch('magnum.conductor.scale_manager.ScaleManager.'
'_get_num_of_removal')
@mock.patch('magnum.conductor.k8s_api.create_k8s_api')
def test_get_removal_nodes_with_none_hosts(
self, mock_create_k8s_api, mock_get_num_of_removal,
mock_is_scale_down, mock_get_by_uuid):
is_scale_down = True
num_of_removal = 1
hosts = None
pods = []
expected_removal_hosts = None
self._test_get_removal_nodes(
mock_create_k8s_api, mock_get_num_of_removal, mock_is_scale_down,
mock_get_by_uuid, is_scale_down, num_of_removal, hosts, pods,
expected_removal_hosts)

View File

@ -98,6 +98,27 @@ class TemplateDefinitionTestCase(base.TestCase):
self.assertRaises(exception.RequiredParameterNotProvided,
param.set_param, {}, mock_baymodel, None)
@mock.patch('magnum.conductor.template_definition.BaseTemplateDefinition'
'.get_params')
@mock.patch('magnum.conductor.template_definition.TemplateDefinition'
'.get_output')
def test_k8s_get_params(self, mock_get_output, mock_get_params):
mock_baymodel = mock.MagicMock()
mock_bay = mock.MagicMock()
mock_scale_manager = mock.MagicMock()
removal_nodes = ['node1', 'node2']
mock_scale_manager.get_removal_nodes.return_value = removal_nodes
k8s_def = tdef.AtomicK8sTemplateDefinition()
k8s_def.get_params(mock_baymodel, mock_bay,
scale_manager=mock_scale_manager)
expected_kwargs = {'extra_params': {
'minions_to_remove': removal_nodes}}
mock_get_params.assert_called_once_with(mock_baymodel, mock_bay,
**expected_kwargs)
@mock.patch('requests.post')
def test_swarm_discovery_url_public_token(self, mock_post):
@ -155,3 +176,32 @@ class TemplateDefinitionTestCase(base.TestCase):
actual_url = swarm_def.get_discovery_url(mock_bay)
self.assertEqual(mock_bay.discovery_url, actual_url)
def test_output_mapping(self):
heat_outputs = [
{
"output_value": "value1",
"description": "No description given",
"output_key": "key1"
},
{
"output_value": ["value2", "value3"],
"description": "No description given",
"output_key": "key2"
}
]
mock_stack = mock.MagicMock()
mock_stack.outputs = heat_outputs
output = tdef.OutputMapping('key1')
value = output.get_output_value(mock_stack)
self.assertEqual(value, 'value1')
output = tdef.OutputMapping('key2')
value = output.get_output_value(mock_stack)
self.assertEqual(value, ["value2", "value3"])
output = tdef.OutputMapping('key3')
value = output.get_output_value(mock_stack)
self.assertIsNone(value)