deb-sahara/sahara/service/engine.py
Vitaly Gridnev a0c4d13e90 Migrate to oslo.log
oslo.log was added to global requirements, so we can migrate to this module.
Also we need this migration to be ensure that it works correctly with
log-improvements.

Since openstack.common.log is not dropped in oslo-incubator,
we shouldn't remove it.

Change-Id: I90468e4db812ae0b5d8a43a37206b236f8904661
Closes-bug: #1412673
2015-01-27 11:09:10 +03:00

195 lines
6.7 KiB
Python

# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import datetime
import string
from oslo_log import log as logging
import six
from sahara import conductor as c
from sahara import context
from sahara.i18n import _
from sahara.i18n import _LI
from sahara.i18n import _LW
from sahara.service import networks
from sahara.utils import cluster_progress_ops as cpo
from sahara.utils import edp
from sahara.utils import general as g
from sahara.utils.openstack import nova
from sahara.utils import remote
LOG = logging.getLogger(__name__)
conductor = c.API
@six.add_metaclass(abc.ABCMeta)
class Engine(object):
@abc.abstractmethod
def create_cluster(self, cluster):
pass
@abc.abstractmethod
def scale_cluster(self, cluster, node_group_id_map):
pass
@abc.abstractmethod
def shutdown_cluster(self, cluster):
pass
@abc.abstractmethod
def rollback_cluster(self, cluster, reason):
pass
@abc.abstractmethod
def get_type_and_version(self):
"""Returns engine type and version
Result should be in the form 'type.major.minor'.
"""
def get_node_group_image_username(self, node_group):
image_id = node_group.get_image_id()
return nova.client().images.get(image_id).username
def _await_networks(self, cluster, instances):
if not instances:
return
cpo.add_provisioning_step(cluster.id, _("Assign IPs"), len(instances))
ips_assigned = set()
while len(ips_assigned) != len(instances):
if not g.check_cluster_exists(cluster):
return
for instance in instances:
if instance.id not in ips_assigned:
if networks.init_instances_ips(instance):
ips_assigned.add(instance.id)
cpo.add_successful_event(instance)
context.sleep(1)
LOG.info(
_LI("Cluster '%s': all instances have IPs assigned"), cluster.id)
cluster = conductor.cluster_get(context.ctx(), cluster)
instances = g.get_instances(cluster, ips_assigned)
cpo.add_provisioning_step(
cluster.id, _("Wait for instance accessibility"), len(instances))
with context.ThreadGroup() as tg:
for instance in instances:
tg.spawn("wait-for-ssh-%s" % instance.instance_name,
self._wait_until_accessible, instance)
LOG.info(_LI("Cluster '%s': all instances are accessible"), cluster.id)
@cpo.event_wrapper(mark_successful_on_exit=True)
def _wait_until_accessible(self, instance):
while True:
try:
# check if ssh is accessible and cloud-init
# script is finished generating authorized_keys
exit_code, stdout = instance.remote().execute_command(
"ls .ssh/authorized_keys", raise_when_error=False)
if exit_code == 0:
LOG.debug(
'Instance %s is accessible' % instance.instance_name)
return
except Exception as ex:
LOG.debug("Can't login to node %s (%s), reason %s",
instance.instance_name, instance.management_ip, ex)
context.sleep(5)
if not g.check_cluster_exists(instance.node_group.cluster):
return
def _configure_instances(self, cluster):
"""Configure active instances.
* generate /etc/hosts
* setup passwordless login
* etc.
"""
hosts_file = g.generate_etc_hosts(cluster)
cpo.add_provisioning_step(
cluster.id, _("Configure instances"), g.count_instances(cluster))
with context.ThreadGroup() as tg:
for node_group in cluster.node_groups:
for instance in node_group.instances:
tg.spawn("configure-instance-%s" % instance.instance_name,
self._configure_instance, instance, hosts_file)
@cpo.event_wrapper(mark_successful_on_exit=True)
def _configure_instance(self, instance, hosts_file):
LOG.debug('Configuring instance %s' % instance.instance_name)
with instance.remote() as r:
r.write_file_to('etc-hosts', hosts_file)
r.execute_command('sudo hostname %s' % instance.fqdn())
r.execute_command('sudo mv etc-hosts /etc/hosts')
r.execute_command('sudo usermod -s /bin/bash $USER')
def _generate_user_data_script(self, node_group, instance_name):
script = """#!/bin/bash
echo "${public_key}" >> ${user_home}/.ssh/authorized_keys\n
# ====== COMMENT OUT Defaults requiretty in /etc/sudoers ========
sed '/^Defaults requiretty*/ s/^/#/' -i /etc/sudoers\n
"""
script += remote.get_userdata_template()
username = node_group.image_username
if username == "root":
user_home = "/root/"
else:
user_home = "/home/%s/" % username
script_template = string.Template(script)
return script_template.safe_substitute(
public_key=node_group.cluster.management_public_key,
user_home=user_home,
instance_name=instance_name)
def _clean_job_executions(self, cluster):
ctx = context.ctx()
for je in conductor.job_execution_get_all(ctx, cluster_id=cluster.id):
update = {"cluster_id": None}
if not je.end_time:
info = je.info.copy() if je.info else {}
info['status'] = edp.JOB_STATUS_KILLED
update.update({"info": info,
"end_time": datetime.datetime.now()})
conductor.job_execution_update(ctx, je, update)
def _log_operation_exception(self, message, cluster, ex):
# we want to log the initial exception even if cluster was deleted
cluster_name = cluster.name if cluster is not None else '_unknown_'
LOG.warn(message, {'cluster': cluster_name, 'reason': ex})
if cluster is None:
LOG.warn(
_LW("Presumably the operation failed because the cluster was "
"deleted by a user during the process."))