Improves anti-affinity behavior in sahara
Introduces a new field called anti_affinity_ratio. Creates more server groups in case the user defines the anti_affinity_ratio more than 1. Default value of anti_affinity_ratio is kept 1. Instances are placed in the server groups in round-robin fashion. Partially-Implements: blueprint improving-anti-affinity Change-Id: I86bda256ea661fa0ba9d6d3819477edff080bcee
This commit is contained in:
parent
b900c5bf55
commit
cd15698526
|
@ -33,6 +33,7 @@ CLUSTER_DEFAULTS = {
|
|||
"cluster_configs": {},
|
||||
"status": "undefined",
|
||||
"anti_affinity": [],
|
||||
"anti_affinity_ratio": 1,
|
||||
"status_description": "",
|
||||
"info": {},
|
||||
"rollback_info": {},
|
||||
|
|
|
@ -48,6 +48,7 @@ class Cluster(object):
|
|||
see the docs for details
|
||||
default_image_id
|
||||
anti_affinity
|
||||
anti_affinity_ratio
|
||||
management_private_key
|
||||
management_public_key
|
||||
user_keypair_id
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
# Copyright 2016 OpenStack Foundation.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""033_add anti_affinity_ratio field to cluster
|
||||
|
||||
Revision ID: 033
|
||||
Revises: 032
|
||||
Create Date: 2016-01-05 09:40:25.941365
|
||||
|
||||
"""
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '033'
|
||||
down_revision = '032'
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.add_column('clusters',
|
||||
sa.Column('anti_affinity_ratio', sa.Integer()))
|
|
@ -58,6 +58,7 @@ class Cluster(mb.SaharaBase):
|
|||
default_image_id = sa.Column(sa.String(36))
|
||||
neutron_management_network = sa.Column(sa.String(36))
|
||||
anti_affinity = sa.Column(st.JsonListType())
|
||||
anti_affinity_ratio = sa.Column(sa.Integer, default=1)
|
||||
management_private_key = sa.Column(sa.Text, nullable=False)
|
||||
management_public_key = sa.Column(sa.Text, nullable=False)
|
||||
user_keypair_id = sa.Column(sa.String(80))
|
||||
|
|
|
@ -45,6 +45,7 @@ conductor = c.API
|
|||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class Engine(object):
|
||||
|
||||
@abc.abstractmethod
|
||||
def create_cluster(self, cluster):
|
||||
pass
|
||||
|
@ -251,15 +252,27 @@ sed '/^Defaults requiretty*/ s/^/#/' -i /etc/sudoers\n
|
|||
LOG.warning(_LW("Failed to delete security group {name}").format(
|
||||
name=name))
|
||||
|
||||
def _delete_aa_server_group(self, cluster):
|
||||
def _delete_aa_server_groups(self, cluster):
|
||||
if cluster.anti_affinity:
|
||||
server_group_name = g.generate_aa_group_name(cluster.name)
|
||||
client = nova.client().server_groups
|
||||
for i in range(1, cluster.anti_affinity_ratio):
|
||||
server_group_name = g.generate_aa_group_name(cluster.name, i)
|
||||
|
||||
server_groups = b.execute_with_retries(client.findall,
|
||||
name=server_group_name)
|
||||
if len(server_groups) == 1:
|
||||
b.execute_with_retries(client.delete, server_groups[0].id)
|
||||
client = nova.client().server_groups
|
||||
|
||||
server_groups = b.execute_with_retries(client.findall,
|
||||
name=server_group_name)
|
||||
if len(server_groups) == 1:
|
||||
b.execute_with_retries(client.delete, server_groups[0].id)
|
||||
|
||||
'''In case the server group is created
|
||||
using mitaka or older version'''
|
||||
old_server_group_name = server_group_name.rsplit('-', 1)[0]
|
||||
server_groups_old = b.execute_with_retries(
|
||||
client.findall,
|
||||
name=old_server_group_name)
|
||||
if len(server_groups_old) == 1:
|
||||
b.execute_with_retries(client.delete,
|
||||
server_groups_old[0].id)
|
||||
|
||||
def _shutdown_instance(self, instance):
|
||||
# tmckay-fp perfect, already testing the right thing
|
||||
|
|
|
@ -202,7 +202,7 @@ class HeatEngine(e.Engine):
|
|||
# Stack not found. Trying to delete cluster like direct engine
|
||||
# do it
|
||||
self._shutdown_instances(cluster)
|
||||
self._delete_aa_server_group(cluster)
|
||||
self._delete_aa_server_groups(cluster)
|
||||
|
||||
self._clean_job_executions(cluster)
|
||||
self._remove_db_objects(cluster)
|
||||
|
|
|
@ -34,7 +34,7 @@ LOG = logging.getLogger(__name__)
|
|||
|
||||
SSH_PORT = 22
|
||||
INSTANCE_RESOURCE_NAME = "inst"
|
||||
SERVER_GROUP_PARAM_NAME = "servgroup"
|
||||
SERVER_GROUP_NAMES = "servgroups"
|
||||
AUTO_SECURITY_GROUP_PARAM_NAME = "autosecgroup"
|
||||
INTERNAL_DESIGNATE_REC = "internal_designate_record"
|
||||
INTERNAL_DESIGNATE_REV_REC = "internal_designate_reverse_record"
|
||||
|
@ -80,8 +80,8 @@ def _get_inst_domain_name(domain):
|
|||
}
|
||||
|
||||
|
||||
def _get_aa_group_name(cluster):
|
||||
return g.generate_aa_group_name(cluster.name)
|
||||
def _get_aa_group_name(cluster, server_group_index):
|
||||
return g.generate_aa_group_name(cluster.name, server_group_index)
|
||||
|
||||
|
||||
def _get_port_name(ng):
|
||||
|
@ -148,6 +148,7 @@ class ClusterStack(object):
|
|||
"Sahara engine: {version}".format(
|
||||
cluster=cluster.name, version=heat_common.HEAT_ENGINE_VERSION)
|
||||
)
|
||||
self._current_sg_index = 1
|
||||
|
||||
def _node_group_description(self, ng):
|
||||
return "{info}\nNode group {node_group}".format(
|
||||
|
@ -212,6 +213,15 @@ class ClusterStack(object):
|
|||
"{args}".format(stack=stack, args=log_kwargs))
|
||||
b.execute_with_retries(stack.update, **kwargs)
|
||||
|
||||
def _get_server_group_name(self):
|
||||
index = self._current_sg_index
|
||||
# computing server group index in round robin fashion
|
||||
if index < self.cluster.anti_affinity_ratio:
|
||||
self._current_sg_index = (index + 1)
|
||||
else:
|
||||
self._current_sg_index = 1
|
||||
return _get_aa_group_name(self.cluster, self._current_sg_index)
|
||||
|
||||
def _need_aa_server_group(self, node_group):
|
||||
for node_process in node_group.node_processes:
|
||||
if node_process in self.cluster.anti_affinity:
|
||||
|
@ -225,7 +235,8 @@ class ClusterStack(object):
|
|||
return {
|
||||
"scheduler_hints": {
|
||||
"group": {
|
||||
"get_param": SERVER_GROUP_PARAM_NAME,
|
||||
"get_param": [SERVER_GROUP_NAMES,
|
||||
{"get_param": "instance_index"}]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -234,7 +245,9 @@ class ClusterStack(object):
|
|||
resources = {}
|
||||
|
||||
if self.cluster.anti_affinity:
|
||||
resources.update(self._serialize_aa_server_group())
|
||||
# Creating server groups equal to the anti_affinity_ratio
|
||||
for i in range(1, self.cluster.anti_affinity_ratio):
|
||||
resources.update(self._serialize_aa_server_group(i))
|
||||
|
||||
for ng in self.cluster.node_groups:
|
||||
resources.update(self._serialize_ng_group(ng, outputs))
|
||||
|
@ -253,8 +266,14 @@ class ClusterStack(object):
|
|||
properties = {"instance_index": "%index%"}
|
||||
|
||||
if ng.cluster.anti_affinity:
|
||||
properties[SERVER_GROUP_PARAM_NAME] = {
|
||||
'get_resource': _get_aa_group_name(ng.cluster)}
|
||||
ng_count = ng.count
|
||||
# assuming instance_index also start from index 0
|
||||
for i in range(0, ng_count - 1):
|
||||
server_group_name = self._get_server_group_name()
|
||||
server_group_resource = {
|
||||
"get_resource": server_group_name
|
||||
}
|
||||
properties[SERVER_GROUP_NAMES].insert(i, server_group_resource)
|
||||
|
||||
if ng.auto_security_group:
|
||||
properties[AUTO_SECURITY_GROUP_PARAM_NAME] = {
|
||||
|
@ -277,7 +296,8 @@ class ClusterStack(object):
|
|||
parameters = {"instance_index": {"type": "string"}}
|
||||
|
||||
if ng.cluster.anti_affinity:
|
||||
parameters[SERVER_GROUP_PARAM_NAME] = {'type': "string"}
|
||||
parameters[SERVER_GROUP_NAMES] = {"type": "comma_delimited_list",
|
||||
"default": []}
|
||||
|
||||
if ng.auto_security_group:
|
||||
parameters[AUTO_SECURITY_GROUP_PARAM_NAME] = {'type': "string"}
|
||||
|
@ -609,8 +629,9 @@ class ClusterStack(object):
|
|||
]
|
||||
return node_group_sg
|
||||
|
||||
def _serialize_aa_server_group(self):
|
||||
server_group_name = _get_aa_group_name(self.cluster)
|
||||
def _serialize_aa_server_group(self, server_group_index):
|
||||
server_group_name = _get_aa_group_name(self.cluster,
|
||||
server_group_index)
|
||||
return {
|
||||
server_group_name: {
|
||||
"type": "OS::Nova::ServerGroup",
|
||||
|
|
|
@ -623,6 +623,9 @@ class SaharaMigrationsCheckers(object):
|
|||
self.assertColumnsExist(engine, 'plugin_data',
|
||||
plugins_data_columns)
|
||||
|
||||
def _check_033(self, engine, data):
|
||||
self.assertColumnExists(engine, 'clusters', 'anti_affinity_ratio')
|
||||
|
||||
|
||||
class TestMigrationsMySQL(SaharaMigrationsCheckers,
|
||||
base.BaseWalkMigrationTestCase,
|
||||
|
|
|
@ -53,7 +53,8 @@ class BaseTestClusterTemplate(base.SaharaWithDbTestCase):
|
|||
neutron_management_network=mng_network,
|
||||
default_image_id='1', image_id=None,
|
||||
anti_affinity=anti_affinity or [],
|
||||
domain_name=domain_name)
|
||||
domain_name=domain_name,
|
||||
anti_affinity_ratio=1)
|
||||
|
||||
|
||||
class TestClusterTemplate(BaseTestClusterTemplate):
|
||||
|
@ -70,17 +71,18 @@ class TestClusterTemplate(BaseTestClusterTemplate):
|
|||
cluster = self._make_cluster('private_net', ng1, ng2,
|
||||
anti_affinity=["datanode"])
|
||||
heat_template = self._make_heat_template(cluster, ng1, ng2)
|
||||
|
||||
ng1 = [ng for ng in cluster.node_groups if ng.name == "master"][0]
|
||||
ng2 = [ng for ng in cluster.node_groups if ng.name == "worker"][0]
|
||||
|
||||
expected = {
|
||||
"scheduler_hints": {
|
||||
"group": {
|
||||
"get_param": h.SERVER_GROUP_PARAM_NAME
|
||||
"get_param": [h.SERVER_GROUP_NAMES, {"get_param":
|
||||
"instance_index"}]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
actual = heat_template._get_anti_affinity_scheduler_hints(ng2)
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
|
|
|
@ -167,7 +167,7 @@ class TestDeletion(base.SaharaTestCase):
|
|||
|
||||
self.assertEqual(0, client.security_groups.delete.call_count)
|
||||
|
||||
@mock.patch('sahara.service.engine.Engine._delete_aa_server_group')
|
||||
@mock.patch('sahara.service.engine.Engine._delete_aa_server_groups')
|
||||
@mock.patch('sahara.service.engine.Engine._shutdown_instances')
|
||||
@mock.patch('sahara.service.engine.Engine._remove_db_objects')
|
||||
@mock.patch('sahara.service.engine.Engine._clean_job_executions')
|
||||
|
@ -192,15 +192,15 @@ class TestDeletion(base.SaharaTestCase):
|
|||
self.order.append('shutdown_instances')
|
||||
super(FakeHeatEngine, self)._shutdown_instances(cluster)
|
||||
|
||||
def _delete_aa_server_group(self, cluster):
|
||||
self.order.append('delete_aa_server_group')
|
||||
super(FakeHeatEngine, self)._delete_aa_server_group(cluster)
|
||||
def _delete_aa_server_groups(self, cluster):
|
||||
self.order.append('delete_aa_server_groups')
|
||||
super(FakeHeatEngine, self)._delete_aa_server_groups(cluster)
|
||||
|
||||
fake_cluster = mock.Mock()
|
||||
heat_client.side_effect = heat_exc.HTTPNotFound()
|
||||
engine = FakeHeatEngine()
|
||||
engine.shutdown_cluster(fake_cluster)
|
||||
self.assertEqual(['shutdown_instances', 'delete_aa_server_group',
|
||||
self.assertEqual(['shutdown_instances', 'delete_aa_server_groups',
|
||||
'clean_job_executions', 'remove_db_objects'],
|
||||
engine.order)
|
||||
self.assertEqual(
|
||||
|
|
|
@ -73,5 +73,5 @@ def generate_auto_security_group_name(node_group):
|
|||
node_group.id[:8])).lower()
|
||||
|
||||
|
||||
def generate_aa_group_name(cluster_name):
|
||||
return ("%s-aa-group" % cluster_name).lower()
|
||||
def generate_aa_group_name(cluster_name, server_group_index):
|
||||
return ("%s-aa-group-%d" % (cluster_name, server_group_index)).lower()
|
||||
|
|
Loading…
Reference in New Issue