Files
gantt/nova/scheduler/driver.py
Kei Masumoto 28bbe0e8fb Fixed based on reviewer's comment.
1. DB schema change
   vcpu/memory/hdd info were stored into Service table.
   but reviewer pointed out to me creating new table is better
   since Service table has too much columns.

2. Querying service table method
   Querying the compute-node recode from DB, several method were
   used to same purpose. Changed to use same method.

3. Removing unnecessary operation.
   FixedIP no longer have host column.
   I didnt find that, remove unnecessary operation from post_live_migration..

4. Test code 
   Modified testcode to fit following the above changes.
2011-02-22 13:16:52 +09:00

260 lines
9.9 KiB
Python

# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright (c) 2010 Openstack, LLC.
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Scheduler base class that all Schedulers should inherit from
"""
import datetime
from nova import db
from nova import exception
from nova import flags
from nova import log as logging
from nova import rpc
from nova.compute import power_state
FLAGS = flags.FLAGS
flags.DEFINE_integer('service_down_time', 60,
'maximum time since last checkin for up service')
flags.DECLARE('instances_path', 'nova.compute.manager')
class NoValidHost(exception.Error):
"""There is no valid host for the command."""
pass
class WillNotSchedule(exception.Error):
"""The specified host is not up or doesn't exist."""
pass
class Scheduler(object):
"""The base class that all Scheduler clases should inherit from."""
@staticmethod
def service_is_up(service):
"""Check whether a service is up based on last heartbeat."""
last_heartbeat = service['updated_at'] or service['created_at']
# Timestamps in DB are UTC.
elapsed = datetime.datetime.utcnow() - last_heartbeat
return elapsed < datetime.timedelta(seconds=FLAGS.service_down_time)
def hosts_up(self, context, topic):
"""Return the list of hosts that have a running service for topic."""
services = db.service_get_all_by_topic(context, topic)
return [service.host
for service in services
if self.service_is_up(service)]
def schedule(self, context, topic, *_args, **_kwargs):
"""Must override at least this method for scheduler to work."""
raise NotImplementedError(_("Must implement a fallback schedule"))
def schedule_live_migration(self, context, instance_id, dest):
"""live migration method"""
# Whether instance exists and running
instance_ref = db.instance_get(context, instance_id)
# Checking instance.
self._live_migration_src_check(context, instance_ref)
# Checking destination host.
self._live_migration_dest_check(context, instance_ref, dest)
# Common checking.
self._live_migration_common_check(context, instance_ref, dest)
# Changing instance_state.
db.instance_set_state(context,
instance_id,
power_state.PAUSED,
'migrating')
# Changing volume state
for v in instance_ref['volumes']:
db.volume_update(context,
v['id'],
{'status': 'migrating'})
# Return value is necessary to send request to src
# Check _schedule() in detail.
src = instance_ref['host']
return src
def _live_migration_src_check(self, context, instance_ref):
"""Live migration check routine (for src host)"""
# Checking instance is running.
if power_state.RUNNING != instance_ref['state'] or \
'running' != instance_ref['state_description']:
msg = _('Instance(%s) is not running')
ec2_id = instance_ref['hostname']
raise exception.Invalid(msg % ec2_id)
# Checing volume node is running when any volumes are mounted
# to the instance.
if len(instance_ref['volumes']) != 0:
services = db.service_get_all_by_topic(context, 'volume')
if len(services) < 1 or not self.service_is_up(services[0]):
msg = _('volume node is not alive(time synchronize problem?)')
raise exception.Invalid(msg)
# Checking src host exists and compute node
src = instance_ref['host']
services = db.service_get_all_compute_by_host(context, src)
# Checking src host is alive.
if not self.service_is_up(services[0]):
msg = _('%s is not alive(time synchronize problem?)')
raise exception.Invalid(msg % src)
def _live_migration_dest_check(self, context, instance_ref, dest):
"""Live migration check routine (for destination host)"""
# Checking dest exists and compute node.
dservice_refs = db.service_get_all_compute_by_host(context, dest)
dservice_ref = dservice_refs[0]
# Checking dest host is alive.
if not self.service_is_up(dservice_ref):
msg = _('%s is not alive(time synchronize problem?)')
raise exception.Invalid(msg % dest)
# Checking whether The host where instance is running
# and dest is not same.
src = instance_ref['host']
if dest == src:
ec2_id = instance_ref['hostname']
msg = _("""%(dest)s is where %(ec2_id)s is """
"""running now. choose other host.""") % locals()
raise exception.Invalid(msg)
# Checking dst host still has enough capacities.
self.has_enough_resource(context, instance_ref, dest)
def _live_migration_common_check(self, context, instance_ref, dest):
"""
Live migration check routine.
Below pre-checkings are followed by
http://wiki.libvirt.org/page/TodoPreMigrationChecks
"""
# Checking shared storage connectivity
self.mounted_on_same_shared_storage(context, instance_ref, dest)
# Checking dest exists.
dservice_refs = db.service_get_all_compute_by_host(context, dest)
dservice_ref = dservice_refs[0]['compute_service'][0]
# Checking original host( where instance was launched at) exists.
try:
oservice_refs = \
db.service_get_all_compute_by_host(context,
instance_ref['launched_on'])
except exception.NotFound:
msg = _('%s(where instance was launched at) does not exists.')
raise exception.Invalid(msg % instance_ref['launched_on'])
oservice_ref = oservice_refs[0]['compute_service'][0]
# Checking hypervisor is same.
o = oservice_ref['hypervisor_type']
d = dservice_ref['hypervisor_type']
if o != d:
msg = _('Different hypervisor type(%(o)s->%(d)s)') % locals()
raise exception.Invalid(msg)
# Checkng hypervisor version.
o = oservice_ref['hypervisor_version']
d = dservice_ref['hypervisor_version']
if o > d:
msg = _('Older hypervisor version(%(o)s->%(d)s)') % locals()
raise exception.Invalid(msg)
# Checking cpuinfo.
try:
rpc.call(context,
db.queue_get_for(context, FLAGS.compute_topic, dest),
{"method": 'compare_cpu',
"args": {'cpu_info': oservice_ref['cpu_info']}})
except rpc.RemoteError, e:
ec2_id = instance_ref['hostname']
src = instance_ref['host']
msg = _("""%(dest)s doesnt have compatibility to %(src)s"""
"""(where %(ec2_id)s was launched at)""")
logging.exception(msg % locals())
raise e
def has_enough_resource(self, context, instance_ref, dest):
"""
Check if destination host has enough resource for live migration.
Currently, only memory checking has been done.
If storage migration(block migration, meaning live-migration
without any shared storage) will be available, local storage
checking is also necessary.
"""
# Getting instance information
ec2_id = instance_ref['hostname']
# Getting host information
service_refs = db.service_get_all_compute_by_host(context, dest)
compute_service_ref = service_refs[0]['compute_service'][0]
mem_total = int(compute_service_ref['memory_mb'])
mem_used = int(compute_service_ref['memory_mb_used'])
mem_avail = mem_total - mem_used
mem_inst = instance_ref['memory_mb']
if mem_avail <= mem_inst:
msg = _("""%(ec2_id)s is not capable to migrate %(dest)s"""
"""(host:%(mem_avail)s <= instance:%(mem_inst)s)""")
raise exception.NotEmpty(msg % locals())
def mounted_on_same_shared_storage(self, context, instance_ref, dest):
"""
Check if /nova-inst-dir/insntances is mounted same storage at
live-migration src and dest host.
"""
src = instance_ref['host']
dst_t = db.queue_get_for(context, FLAGS.compute_topic, dest)
src_t = db.queue_get_for(context, FLAGS.compute_topic, src)
# create tmpfile at dest host
try:
filename = rpc.call(context, dst_t, {"method": 'mktmpfile'})
except rpc.RemoteError, e:
msg = _("Cannot create tmpfile at %s to confirm shared storage.")
logging.error(msg % FLAGS.instance_path)
raise e
# make sure existence at src host.
try:
rpc.call(context, src_t,
{"method": 'confirm_tmpfile', "args": {'path': filename}})
except (rpc.RemoteError, exception.NotFound), e:
ipath = FLAGS.instance_path
msg = _("""Cannot comfirm %(ipath)s at %(dest)s is located at"""
""" same shared storage.""") % locals()
logging.error(msg)
raise e