250 lines
9.4 KiB
Python
250 lines
9.4 KiB
Python
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
|
|
|
# Copyright (c) 2010 Openstack, LLC.
|
|
# Copyright 2010 United States Government as represented by the
|
|
# Administrator of the National Aeronautics and Space Administration.
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""
|
|
Scheduler base class that all Schedulers should inherit from
|
|
"""
|
|
|
|
import datetime
|
|
|
|
from nova import db
|
|
from nova import exception
|
|
from nova import flags
|
|
from nova import log as logging
|
|
from nova import rpc
|
|
from nova.compute import power_state
|
|
|
|
FLAGS = flags.FLAGS
|
|
flags.DEFINE_integer('service_down_time', 60,
|
|
'maximum time since last checkin for up service')
|
|
|
|
|
|
class NoValidHost(exception.Error):
|
|
"""There is no valid host for the command."""
|
|
pass
|
|
|
|
|
|
class WillNotSchedule(exception.Error):
|
|
"""The specified host is not up or doesn't exist."""
|
|
pass
|
|
|
|
|
|
class Scheduler(object):
|
|
"""The base class that all Scheduler clases should inherit from."""
|
|
|
|
@staticmethod
|
|
def service_is_up(service):
|
|
"""Check whether a service is up based on last heartbeat."""
|
|
last_heartbeat = service['updated_at'] or service['created_at']
|
|
# Timestamps in DB are UTC.
|
|
elapsed = datetime.datetime.utcnow() - last_heartbeat
|
|
return elapsed < datetime.timedelta(seconds=FLAGS.service_down_time)
|
|
|
|
def hosts_up(self, context, topic):
|
|
"""Return the list of hosts that have a running service for topic."""
|
|
|
|
services = db.service_get_all_by_topic(context, topic)
|
|
return [service.host
|
|
for service in services
|
|
if self.service_is_up(service)]
|
|
|
|
def schedule(self, context, topic, *_args, **_kwargs):
|
|
"""Must override at least this method for scheduler to work."""
|
|
raise NotImplementedError(_("Must implement a fallback schedule"))
|
|
|
|
def schedule_live_migration(self, context, instance_id, dest):
|
|
""" live migration method """
|
|
|
|
# Whether instance exists and running
|
|
instance_ref = db.instance_get(context, instance_id)
|
|
ec2_id = instance_ref['hostname']
|
|
|
|
# Checking instance.
|
|
self._live_migration_src_check(context, instance_ref)
|
|
|
|
# Checking destination host.
|
|
self._live_migration_dest_check(context, instance_ref, dest)
|
|
|
|
# Common checking.
|
|
self._live_migration_common_check(context, instance_ref, dest)
|
|
|
|
# Changing instance_state.
|
|
db.instance_set_state(context,
|
|
instance_id,
|
|
power_state.PAUSED,
|
|
'migrating')
|
|
|
|
# Changing volume state
|
|
for v in instance_ref['volumes']:
|
|
db.volume_update(context,
|
|
v['id'],
|
|
{'status': 'migrating'})
|
|
|
|
# Return value is necessary to send request to src
|
|
# Check _schedule() in detail.
|
|
src = instance_ref['host']
|
|
return src
|
|
|
|
def _live_migration_src_check(self, context, instance_ref):
|
|
"""Live migration check routine (for src host)"""
|
|
|
|
# Checking instance is running.
|
|
if power_state.RUNNING != instance_ref['state'] or \
|
|
'running' != instance_ref['state_description']:
|
|
msg = _('Instance(%s) is not running')
|
|
ec2_id = instance_ref['hostname']
|
|
raise exception.Invalid(msg % ec2_id)
|
|
|
|
# Checing volume node is running when any volumes are mounted
|
|
# to the instance.
|
|
if len(instance_ref['volumes']) != 0:
|
|
services = db.service_get_all_by_topic(context, 'volume')
|
|
if len(services) < 1 or not self.service_is_up(services[0]):
|
|
msg = _('volume node is not alive(time synchronize problem?)')
|
|
raise exception.Invalid(msg)
|
|
|
|
# Checking src host is alive.
|
|
src = instance_ref['host']
|
|
services = db.service_get_all_by_topic(context, 'compute')
|
|
services = [service for service in services if service.host == src]
|
|
if len(services) < 1 or not self.service_is_up(services[0]):
|
|
msg = _('%s is not alive(time synchronize problem?)')
|
|
raise exception.Invalid(msg % src)
|
|
|
|
def _live_migration_dest_check(self, context, instance_ref, dest):
|
|
"""Live migration check routine (for destination host)"""
|
|
|
|
# Checking dest exists and compute node.
|
|
dservice_refs = db.service_get_all_by_host(context, dest)
|
|
if len(dservice_refs) <= 0:
|
|
msg = _('%s does not exists.')
|
|
raise exception.Invalid(msg % dest)
|
|
|
|
dservice_ref = dservice_refs[0]
|
|
if dservice_ref['topic'] != 'compute':
|
|
msg = _('%s must be compute node')
|
|
raise exception.Invalid(msg % dest)
|
|
|
|
# Checking dest host is alive.
|
|
if not self.service_is_up(dservice_ref):
|
|
msg = _('%s is not alive(time synchronize problem?)')
|
|
raise exception.Invalid(msg % dest)
|
|
|
|
# Checking whether The host where instance is running
|
|
# and dest is not same.
|
|
src = instance_ref['host']
|
|
if dest == src:
|
|
ec2_id = instance_ref['hostname']
|
|
msg = _('%s is where %s is running now. choose other host.')
|
|
raise exception.Invalid(msg % (dest, ec2_id))
|
|
|
|
# Checking dst host still has enough capacities.
|
|
self.has_enough_resource(context, instance_ref, dest)
|
|
|
|
def _live_migration_common_check(self, context, instance_ref, dest):
|
|
"""
|
|
Live migration check routine.
|
|
Below pre-checkings are followed by
|
|
http://wiki.libvirt.org/page/TodoPreMigrationChecks
|
|
|
|
"""
|
|
|
|
# Checking dest exists.
|
|
dservice_refs = db.service_get_all_by_host(context, dest)
|
|
if len(dservice_refs) <= 0:
|
|
msg = _('%s does not exists.')
|
|
raise exception.Invalid(msg % dest)
|
|
dservice_ref = dservice_refs[0]
|
|
|
|
# Checking original host( where instance was launched at) exists.
|
|
orighost = instance_ref['launched_on']
|
|
oservice_refs = db.service_get_all_by_host(context, orighost)
|
|
if len(oservice_refs) <= 0:
|
|
msg = _('%s(where instance was launched at) does not exists.')
|
|
raise exception.Invalid(msg % orighost)
|
|
oservice_ref = oservice_refs[0]
|
|
|
|
# Checking hypervisor is same.
|
|
otype = oservice_ref['hypervisor_type']
|
|
dtype = dservice_ref['hypervisor_type']
|
|
if otype != dtype:
|
|
msg = _('Different hypervisor type(%s->%s)')
|
|
raise exception.Invalid(msg % (otype, dtype))
|
|
|
|
# Checkng hypervisor version.
|
|
oversion = oservice_ref['hypervisor_version']
|
|
dversion = dservice_ref['hypervisor_version']
|
|
if oversion > dversion:
|
|
msg = _('Older hypervisor version(%s->%s)')
|
|
raise exception.Invalid(msg % (oversion, dversion))
|
|
|
|
# Checking cpuinfo.
|
|
cpu_info = oservice_ref['cpu_info']
|
|
try:
|
|
rpc.call(context,
|
|
db.queue_get_for(context, FLAGS.compute_topic, dest),
|
|
{"method": 'compare_cpu',
|
|
"args": {'cpu_info': cpu_info}})
|
|
|
|
except rpc.RemoteError, e:
|
|
msg = _(("""%s doesnt have compatibility to %s"""
|
|
"""(where %s was launched at)"""))
|
|
ec2_id = instance_ref['hostname']
|
|
src = instance_ref['host']
|
|
logging.error(msg % (dest, src, ec2_id))
|
|
raise e
|
|
|
|
def has_enough_resource(self, context, instance_ref, dest):
|
|
""" Check if destination host has enough resource for live migration"""
|
|
|
|
# Getting instance information
|
|
ec2_id = instance_ref['hostname']
|
|
vcpus = instance_ref['vcpus']
|
|
mem = instance_ref['memory_mb']
|
|
hdd = instance_ref['local_gb']
|
|
|
|
# Gettin host information
|
|
service_refs = db.service_get_all_by_host(context, dest)
|
|
if len(service_refs) <= 0:
|
|
msg = _('%s does not exists.')
|
|
raise exception.Invalid(msg % dest)
|
|
service_ref = service_refs[0]
|
|
|
|
total_cpu = int(service_ref['vcpus'])
|
|
total_mem = int(service_ref['memory_mb'])
|
|
total_hdd = int(service_ref['local_gb'])
|
|
|
|
instances_ref = db.instance_get_all_by_host(context, dest)
|
|
for i_ref in instances_ref:
|
|
total_cpu -= int(i_ref['vcpus'])
|
|
total_mem -= int(i_ref['memory_mb'])
|
|
total_hdd -= int(i_ref['local_gb'])
|
|
|
|
# Checking host has enough information
|
|
logging.debug('host(%s) remains vcpu:%s mem:%s hdd:%s,' %
|
|
(dest, total_cpu, total_mem, total_hdd))
|
|
logging.debug('instance(%s) has vcpu:%s mem:%s hdd:%s,' %
|
|
(ec2_id, vcpus, mem, hdd))
|
|
|
|
if total_cpu <= vcpus or total_mem <= mem or total_hdd <= hdd:
|
|
msg = '%s doesnt have enough resource for %s' % (dest, ec2_id)
|
|
raise exception.NotEmpty(msg)
|
|
|
|
logging.debug(_('%s has_enough_resource() for %s') % (dest, ec2_id))
|