deb-sahara/sahara/plugins/recommendations_utils.py
Vitaly Gridnev 22dbaa7248 Implement recommendations for vanilla 2.6.0
This patch implements base recommendation provider for vanilla 2.6.0. Also
added new fields in database for cluster and node groups which will allow
to switch off autoconfiguration.

Partial-implements blueprint: recommend-configuration
Change-Id: I9abb6b9c04494f4aed9b72479c06e45fe289c1ff
2015-07-21 14:19:31 +03:00

353 lines
14 KiB
Python

# Copyright (c) 2015 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
from oslo_log import log as logging
import six
from sahara import conductor as cond
from sahara import context
from sahara.utils.openstack import nova
conductor = cond.API
LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class AutoConfigsProvider(object):
def __init__(self, mapper, plugin_configs, cluster, extra_spec=None):
"""This meta class provides general recommendation utils for cluster
configuration.
:param mapper: dictionary, that describes which cluster configs and
node_configs to configure. It should maps to following dicts:
node_configs to configure and cluster_configs to configure. This
dicts should contains abstract names of configs as keys and
tuple (correct_applicable_target, correct_name) as values. Such
representation allows to use same AutoConfigsProvider for plugins
with almost same configs and configuring principles.
:param plugin_configs: all plugins_configs for specified plugin
:param cluster: cluster which is required to configure
:param extra_spec: extra helpful information about AutoConfigs
"""
self.plugin_configs = plugin_configs
self.cluster = cluster
self.node_configs_to_update = mapper.get('node_configs', {})
self.cluster_configs_to_update = mapper.get('cluster_configs', {})
self.extra_spec = {} if not extra_spec else extra_spec
@abc.abstractmethod
def _get_recommended_node_configs(self, node_group):
"""Method calculates and returns recommended configs for node_group.
It's not required to update node_configs of node_group using the
conductor api in this method, because it will be done in the method
apply_node_configs.
:param node_group: NodeGroup Sahara resource.
:return: dictionary with calculated recommended configs for
node_group.
"""
pass
@abc.abstractmethod
def _get_recommended_cluster_configs(self):
"""Method calculates and returns recommended configs for cluster.
It's not required to update cluster_configs of cluster using the
conductor api in this method, because it will be done in the method
apply_cluster_configs.
:return: dictionary with calculated recommended configs for
cluster.
"""
pass
def _can_be_recommended(self, configs_list, node_group=None):
"""Method calculates and returns True, when it's possible to
automatically configure provided list of configs configs_list.
Otherwise, method should return False.
:param configs_list: list of configs which we want to configure
:param node_group: optional argument, which should be provided if
some config can be used in node_configs of some node_group
:return: True if all configs can be configured and False otherwise
"""
# cluster configs is Frozen Dict, so let's call to_dict()
cl_configs = self.cluster.cluster_configs.to_dict()
for ncfg in configs_list:
section, name = self._get_correct_section_and_name(ncfg)
if section in cl_configs and name in cl_configs[section]:
return False
if not node_group:
return True
cl_configs = node_group.node_configs.to_dict()
for ncfg in configs_list:
section, name = self._get_correct_section_and_name(ncfg)
if section in cl_configs and name in cl_configs[section]:
return False
return True
def _get_correct_section_and_name(self, config_name):
"""Calculates and returns correct applicable target and name from
abstract name of config.
:param config_name: abstract name of config.
:return: correct applicable target and name for config_name
"""
section, name = None, None
if config_name in self.cluster_configs_to_update:
section = self.cluster_configs_to_update[config_name][0]
name = self.cluster_configs_to_update[config_name][1]
elif config_name in self.node_configs_to_update:
section = self.node_configs_to_update[config_name][0]
name = self.node_configs_to_update[config_name][1]
return section, name
def _get_default_config_value(self, config_name):
"""Calculates and return default value of config from
abstract name of config.
:param config_name: abstract name of config.
:return: default config value for config_name.
"""
section, name = self._get_correct_section_and_name(config_name)
for config in self.plugin_configs:
if config.applicable_target == section and config.name == name:
return config.default_value
def _merge_configs(self, current_configs, proposed_configs):
"""Correctly merges old configs and new extra configs"""
result = {}
for (section, configs) in six.iteritems(proposed_configs):
cfg_values = {}
if section in current_configs:
cfg_values = (current_configs[section] if
current_configs[section] else {})
cfg_values.update(configs)
result.update({section: cfg_values})
for (section, configs) in six.iteritems(current_configs):
if section not in result:
result.update({section: configs})
return result
def apply_node_configs(self, node_group):
"""Method applies configs for node_group using conductor api,
which were calculated with recommend_node_configs method.
:param node_group: NodeGroup Sahara resource.
:return: None.
"""
if not node_group.use_autoconfig or not self.cluster.use_autoconfig:
return
to_update = self.node_configs_to_update
recommended_node_configs = self._get_recommended_node_configs(
node_group)
if not recommended_node_configs:
# Nothing to configure
return
current_dict = node_group.node_configs.to_dict()
configuration = {}
for ncfg in six.iterkeys(to_update):
if ncfg not in recommended_node_configs:
continue
n_section = to_update[ncfg][0]
n_name = to_update[ncfg][1]
proposed_config_value = recommended_node_configs[ncfg]
if n_section not in configuration:
configuration.update({n_section: {}})
configuration[n_section].update({n_name: proposed_config_value})
current_dict = self._merge_configs(current_dict, configuration)
conductor.node_group_update(context.ctx(), node_group,
{'node_configs': current_dict})
def apply_cluster_configs(self):
"""Method applies configs for cluster using conductor api, which were
calculated with recommend_cluster_configs method.
:return: None.
"""
cluster = self.cluster
if not cluster.use_autoconfig:
return
to_update = self.cluster_configs_to_update
recommended_cluster_configs = self._get_recommended_cluster_configs()
if not recommended_cluster_configs:
# Nothing to configure
return
current_dict = cluster.cluster_configs.to_dict()
configuration = {}
for ncfg in six.iterkeys(to_update):
if ncfg not in recommended_cluster_configs:
continue
n_section = to_update[ncfg][0]
n_name = to_update[ncfg][1]
proposed_config_value = recommended_cluster_configs[ncfg]
if n_section not in configuration:
configuration.update({n_section: {}})
configuration[n_section].update({n_name: proposed_config_value})
current_dict = self._merge_configs(current_dict, configuration)
conductor.cluster_update(context.ctx(), cluster,
{'cluster_configs': current_dict})
def apply_recommended_configs(self):
"""Method applies recommended configs for cluster and for all
node_groups using conductor api.
:return: None.
"""
for ng in self.cluster.node_groups:
self.apply_node_configs(ng)
self.apply_cluster_configs()
configs = list(self.cluster_configs_to_update.keys())
configs.extend(list(self.node_configs_to_update.keys()))
LOG.debug("Following configs were auto-configured: {configs}".format(
configs=configs))
class HadoopAutoConfigsProvider(AutoConfigsProvider):
def __init__(self, mapper, plugin_configs, cluster, extra_spec=None,
hbase=False):
super(HadoopAutoConfigsProvider, self).__init__(
mapper, plugin_configs, cluster, extra_spec)
self.requested_flavors = {}
self.is_hbase_enabled = hbase
def _get_java_opts(self, value):
return "-Xmx%dm" % int(value)
def _transform_mb_to_gb(self, mb):
return mb / 1024.
def _transform_gb_to_mb(self, gb):
return gb * 1024.
def _get_min_size_of_container(self, ram):
if ram <= 4:
return 256
if ram <= 8:
return 512
if ram <= 24:
return 1024
return 2048
def _get_os_ram_recommendation(self, ram):
upper_bounds = [4, 8, 16, 24, 48, 64, 72, 96, 128, 256]
reserve_for_os = [1, 2, 2, 4, 6, 8, 8, 12, 24, 32]
for (upper, reserve) in zip(upper_bounds, reserve_for_os):
if ram <= upper:
return reserve
return 64
def _get_hbase_ram_recommendations(self, ram):
if not self.is_hbase_enabled:
return 0
upper_bounds = [4, 8, 16, 24, 48, 64, 72, 96, 128, 256]
reserve_for_hbase = [1, 1, 2, 4, 8, 8, 8, 16, 24, 32]
for (upper, reserve) in zip(upper_bounds, reserve_for_hbase):
if ram <= upper:
return reserve
return 64
def _get_node_group_data(self, node_group):
if node_group.flavor_id not in self.requested_flavors:
flavor = nova.get_flavor(id=node_group.flavor_id)
self.requested_flavors[node_group.flavor_id] = flavor
else:
flavor = self.requested_flavors[node_group.flavor_id]
cpu = flavor.vcpus
ram = flavor.ram
data = {}
# config recommendations was taken from Ambari code
os = self._get_os_ram_recommendation(self._transform_mb_to_gb(ram))
hbase = self._get_hbase_ram_recommendations(
self._transform_mb_to_gb(ram))
reserved_ram = self._transform_gb_to_mb(os + hbase)
min_container_size = self._get_min_size_of_container(
self._transform_mb_to_gb(ram))
# we use large amount of containers to allow users to run
# at least 4 jobs at same time on clusters based on small flavors
data["containers"] = int(max(
8, min(2 * cpu, ram / min_container_size)))
data["ramPerContainer"] = (ram - reserved_ram) / data["containers"]
data["ramPerContainer"] = max(data["ramPerContainer"],
min_container_size)
data["ramPerContainer"] = min(2048, int(data["ramPerContainer"]))
data["ramPerContainer"] = int(data["ramPerContainer"])
data["mapMemory"] = int(data["ramPerContainer"])
data["reduceMemory"] = int(2 * data["ramPerContainer"])
data["amMemory"] = int(min(data["mapMemory"], data["reduceMemory"]))
return data
def _get_recommended_node_configs(self, node_group):
"""Calculates recommended MapReduce and YARN configs for specified
node_group.
:param node_group: NodeGroup Sahara resource
:return: dictionary with recommended MapReduce and YARN configs
"""
configs_to_update = list(self.node_configs_to_update.keys())
if not self._can_be_recommended(configs_to_update, node_group):
return {}
data = self._get_node_group_data(node_group)
r = {}
r['yarn.nodemanager.resource.memory-mb'] = (data['containers'] *
data['ramPerContainer'])
r['yarn.scheduler.minimum-allocation-mb'] = data['ramPerContainer']
r['yarn.scheduler.maximum-allocation-mb'] = (data['containers'] *
data['ramPerContainer'])
r['yarn.nodemanager.vmem-check-enabled'] = "false"
r['yarn.app.mapreduce.am.resource.mb'] = data['amMemory']
r['yarn.app.mapreduce.am.command-opts'] = self._get_java_opts(
0.8 * data['amMemory'])
r['mapreduce.map.memory.mb'] = data['mapMemory']
r['mapreduce.reduce.memory.mb'] = data['reduceMemory']
r['mapreduce.map.java.opts'] = self._get_java_opts(
0.8 * data['mapMemory'])
r['mapreduce.reduce.java.opts'] = self._get_java_opts(
0.8 * data['reduceMemory'])
r['mapreduce.task.io.sort.mb'] = int(min(
0.4 * data['mapMemory'], 1024))
return r
def _get_recommended_cluster_configs(self):
"""Method recommends dfs_replication for cluster.
:return: recommended value of dfs_replication.
"""
if not self._can_be_recommended(['dfs.replication']):
return {}
datanode_count = 0
datanode_proc_name = "datanode"
if 'datanode_process_name' in self.extra_spec:
datanode_proc_name = self.extra_spec['datanode_process_name']
for ng in self.cluster.node_groups:
if datanode_proc_name in ng.node_processes:
datanode_count += ng.count
replica = 'dfs.replication'
recommended_value = self._get_default_config_value(replica)
if recommended_value:
return {replica: min(recommended_value, datanode_count)}
else:
return {}