Adds to manager init_host validation for instances location

While the compute was down the instances running on it could be
evacuated to another host. The method checks that instance host
identical to current host. Otherwise destroying it.

part of blueprint rebuild-for-ha

DocImpact

Change-Id: Ic90df2b2887ee203e6d8261084e3f97773c5d81c
Co-authored-by: Oshrit Feder <oshritf@il.ibm.com>
This commit is contained in:
Kravchenko Pavel 2013-01-08 22:54:14 +02:00
parent 889116d490
commit 6fc00d3465
10 changed files with 396 additions and 1 deletions

View File

@ -386,6 +386,71 @@ class ComputeManager(manager.SchedulerDependentManager):
return self.conductor_api.instance_get_all_by_host(context, self.host)
def _destroy_evacuated_instances(self, context):
"""Destroys evacuated instances.
While the compute was down the instances running on it could be
evacuated to another host. Checking that instance host identical to
current host. Otherwise destroying it
"""
# getting all vms on this host
local_instances = []
try:
# try to find all local instances by uuid
for uuid in self.driver.list_instance_uuids():
try:
local_instances.append(self.conductor_api.
instance_get_by_uuid(context, uuid))
except exception.InstanceNotFound as e:
LOG.error(_('Instance %(uuid)s found in the '
'hypervisor, but not in the database'),
locals())
continue
except NotImplementedError:
# the driver doesn't support uuids listing, will do it in ugly way
for instance_name in self.driver.list_instances():
try:
# couldn't find better way to find instance in db by it's
# name if i will run on the list of this host instances it
# will be hard to ignore instances that were created
# outside openstack. returns -1 if instance name doesn't
# match template
instance_id = compute_utils.parse_decimal_id(CONF
.instance_name_template, instance_name)
if instance_id == -1:
continue
local_instances.append(self.conductor_api.
instance_get(context, instance_id))
except exception.InstanceNotFound as e:
LOG.error(_('Instance %(instance_name)s found in the '
'hypervisor, but not in the database'),
locals())
continue
for instance in local_instances:
instance_host = instance['host']
host = self.host
instance_name = instance['name']
if instance['host'] != host:
LOG.info(_('instance host %(instance_host)s is not equal to '
'current host %(host)s. '
'Deleting zombie instance %(instance_name)s'),
locals())
network_info = self._get_instance_nw_info(context, instance)
bdi = self._get_instance_volume_block_device_info(context,
instance['uuid'])
self.driver.destroy(instance,
self._legacy_nw_info(network_info),
bdi,
False)
LOG.info(_('zombie vm destroyed'))
def _init_instance(self, context, instance):
'''Initialize this instance during service init.'''
db_state = instance['power_state']
@ -450,6 +515,8 @@ class ComputeManager(manager.SchedulerDependentManager):
self.driver.filter_defer_apply_on()
try:
# checking that instance was not already evacuated to other host
self._destroy_evacuated_instances(context)
for instance in instances:
self._init_instance(context, instance)
finally:

View File

@ -253,3 +253,63 @@ def usage_volume_info(vol_usage):
vol_usage['curr_write_bytes'])
return usage_info
def parse_decimal_id(template, instance_name):
"""Finds instance decimal id from instance name
:param template: template e.g. instance-%03x-james
:param instance_name: instance name like instance-007-james
:returns: parsed decimal id, e.g. 7 from the input above
"""
# find pattern like %05x, %d..etc.
reg = re.search('(%\d*)([ioxds])', template)
format = reg.group(0)
# split template to get prefix and suffix
tokens = template.split(format)
if tokens[0]:
if not instance_name.startswith(tokens[0]):
# template prefix not match
return -1
instance_name = instance_name[len(tokens[0]):]
if tokens[1]:
if not instance_name.endswith(tokens[1]):
# template suffix not match
return -1
instance_name = instance_name[:-len(tokens[1])]
# validate that instance_id length matches
expected_length = format[1:-1]
# if expected length is empty it means instance_id can be of any length
if expected_length:
if len(instance_name) < int(expected_length):
return -1
# if instance_id has preciding zeroes it must be of expected length
if (instance_name[:1] == '0' and
len(instance_name) != int(expected_length)):
return -1
# if the minimal expected length empty, there should be no preceding zeros
elif instance_name[0] == '0':
return -1
# finding base of the template to convert to decimal
base_fmt = format[-1:]
base = 10
if base_fmt == 'x':
base = 16
elif base_fmt == 'o':
base = 8
try:
res = int(instance_name, base)
except ValueError:
res = -1
return res

View File

@ -75,6 +75,9 @@ class LocalAPI(object):
"""Perform an instance update in the database"""
return self._manager.instance_update(context, instance_uuid, updates)
def instance_get(self, context, instance_id):
return self._manager.instance_get(context, instance_id)
def instance_get_by_uuid(self, context, instance_uuid):
return self._manager.instance_get_by_uuid(context, instance_uuid)
@ -252,6 +255,9 @@ class API(object):
def instance_destroy(self, context, instance):
return self.conductor_rpcapi.instance_destroy(context, instance)
def instance_get(self, context, instance_id):
return self.conductor_rpcapi.instance_get(context, instance_id)
def instance_get_by_uuid(self, context, instance_uuid):
return self.conductor_rpcapi.instance_get_by_uuid(context,
instance_uuid)

View File

@ -43,7 +43,7 @@ datetime_fields = ['launched_at', 'terminated_at']
class ConductorManager(manager.SchedulerDependentManager):
"""Mission: TBD"""
RPC_API_VERSION = '1.23'
RPC_API_VERSION = '1.24'
def __init__(self, *args, **kwargs):
super(ConductorManager, self).__init__(service_name='conductor',
@ -70,6 +70,11 @@ class ConductorManager(manager.SchedulerDependentManager):
notifications.send_update(context, old_ref, instance_ref)
return jsonutils.to_primitive(instance_ref)
@rpc_common.client_exceptions(exception.InstanceNotFound)
def instance_get(self, context, instance_id):
return jsonutils.to_primitive(
self.db.instance_get(context, instance_id))
@rpc_common.client_exceptions(exception.InstanceNotFound)
def instance_get_by_uuid(self, context, instance_uuid):
return jsonutils.to_primitive(

View File

@ -56,6 +56,7 @@ class ConductorAPI(nova.openstack.common.rpc.proxy.RpcProxy):
1.22 - Added ping
1.23 - Added instance_get_all
Un-Deprecate instance_get_all_by_host
1.24 - Added instance_get
"""
BASE_RPC_API_VERSION = '1.0'
@ -77,6 +78,11 @@ class ConductorAPI(nova.openstack.common.rpc.proxy.RpcProxy):
instance_uuid=instance_uuid,
updates=updates_p))
def instance_get(self, context, instance_id):
msg = self.make_msg('instance_get',
instance_id=instance_id)
return self.call(context, msg, version='1.24')
def instance_get_by_uuid(self, context, instance_uuid):
msg = self.make_msg('instance_get_by_uuid',
instance_uuid=instance_uuid)

View File

@ -3110,6 +3110,149 @@ class ComputeTestCase(BaseTestCase):
self.assertEqual(timeouts.count(10), 10)
self.assertTrue(None in timeouts)
def test_init_host_with_evacuated_instances_uuid_list(self):
# creating testdata
c = context.get_admin_context()
# instances in central db
instances = [
# those are still related to this host
jsonutils.to_primitive(self._create_fake_instance(
{'host': self.compute.host})),
jsonutils.to_primitive(self._create_fake_instance(
{'host': self.compute.host})),
jsonutils.to_primitive(self._create_fake_instance(
{'host': self.compute.host}))
]
# those are already been evacuated to other host
evacuated_instance = self._create_fake_instance({'host': 'otherhost'})
# creating mocks
self.mox.StubOutWithMock(self.compute.driver, 'init_host')
self.compute.driver.init_host(host=self.compute.host)
def fake_get_admin_context():
return c
def fake_all(*args, **kwargs):
pass
def fake_list_instance_uuids():
return [
# those are still related to this host
instances[0]['uuid'],
instances[1]['uuid'],
instances[2]['uuid'],
# and this one already been evacuated to other host
evacuated_instance['uuid']
]
def fake_destroy(instance, nw, bdi, destroyDisks):
self.assertFalse(destroyDisks)
self.assertEqual(instance['uuid'], evacuated_instance['uuid'])
self.stubs.Set(nova.context,
'get_admin_context',
fake_get_admin_context)
self.stubs.Set(self.compute.driver, 'filter_defer_apply_on', fake_all)
self.stubs.Set(self.compute.driver,
'list_instance_uuids',
fake_list_instance_uuids)
self.stubs.Set(self.compute, '_get_instance_nw_info', fake_all)
self.stubs.Set(self.compute, '_get_instance_volume_block_device_info',
fake_all)
self.stubs.Set(self.compute.driver, 'destroy', fake_destroy)
self.stubs.Set(self.compute, '_legacy_nw_info', fake_all)
self.stubs.Set(self.compute, '_init_instance', fake_all)
self.stubs.Set(self.compute.driver, 'filter_defer_apply_off', fake_all)
self.stubs.Set(self.compute, '_report_driver_status', fake_all)
self.stubs.Set(self.compute, 'publish_service_capabilities', fake_all)
# start test
self.mox.ReplayAll()
self.compute.init_host()
db.instance_destroy(c, evacuated_instance['uuid'])
for instance in instances:
db.instance_destroy(c, instance['uuid'])
def test_init_host_with_evacuated_instances_names_list(self):
# creating testdata
c = context.get_admin_context()
# instances in central db
instances = [
# those are still related to this host
jsonutils.to_primitive(self._create_fake_instance(
{'host': self.compute.host})),
jsonutils.to_primitive(self._create_fake_instance(
{'host': self.compute.host})),
jsonutils.to_primitive(self._create_fake_instance(
{'host': self.compute.host}))
]
# those are already been evacuated to other host
evacuated_instance = self._create_fake_instance({'host': 'otherhost'})
# creating mocks
self.mox.StubOutWithMock(self.compute.driver, 'init_host')
self.compute.driver.init_host(host=self.compute.host)
def fake_get_admin_context():
return c
def fake_all(*args, **kwargs):
pass
def fake_list_instances():
return [
# those are still related to this host
CONF.instance_name_template % instances[0]['id'],
CONF.instance_name_template % instances[1]['id'],
CONF.instance_name_template % instances[2]['id'],
# and this one already been evacuated to other host
CONF.instance_name_template % evacuated_instance['id']
]
def fake_list_instance_uuids():
raise NotImplementedError()
def fake_destroy(instance, nw, bdi, destroyDisks):
self.assertFalse(destroyDisks)
self.assertEqual(instance['uuid'], evacuated_instance['uuid'])
self.stubs.Set(nova.context,
'get_admin_context',
fake_get_admin_context)
self.stubs.Set(self.compute.driver, 'filter_defer_apply_on', fake_all)
self.stubs.Set(self.compute.driver,
'list_instances',
fake_list_instances)
self.stubs.Set(self.compute.driver,
'list_instance_uuids',
fake_list_instance_uuids)
self.stubs.Set(self.compute, '_get_instance_nw_info', fake_all)
self.stubs.Set(self.compute, '_get_instance_volume_block_device_info',
fake_all)
self.stubs.Set(self.compute.driver, 'destroy', fake_destroy)
self.stubs.Set(self.compute, '_legacy_nw_info', fake_all)
self.stubs.Set(self.compute, '_init_instance', fake_all)
self.stubs.Set(self.compute.driver, 'filter_defer_apply_off', fake_all)
self.stubs.Set(self.compute, '_report_driver_status', fake_all)
self.stubs.Set(self.compute, 'publish_service_capabilities', fake_all)
# start test
self.mox.ReplayAll()
self.compute.init_host()
db.instance_destroy(c, evacuated_instance['uuid'])
for instance in instances:
db.instance_destroy(c, instance['uuid'])
class ComputeAPITestCase(BaseTestCase):

View File

@ -390,3 +390,97 @@ class MetadataToDictTestCase(test.TestCase):
def test_metadata_to_dict_empty(self):
self.assertEqual(compute_utils.metadata_to_dict([]), {})
class ParseDecimalIDTestCase(test.TestCase):
def setUp(self):
super(ParseDecimalIDTestCase, self).setUp()
self.context = context.RequestContext('fake', 'fake')
self.templates = [
CONF.instance_name_template,
'instance-%08x',
'instance-%08o',
'instance-%08d',
'instance-%04x',
'instance-%04o',
'instance-%04d',
'instance-%x',
'instance-%o',
'instance-%d',
'james-%07x-bond',
'james-%07o-bond',
'james-%07d-bond',
'xxxx%xxxx',
'oooo%oooo',
'dddd%dddd',
'%02x',
'%02o',
'%02d',
'%x',
'%o',
'%d',
'%07x-bond',
'%07o-bond',
'%07d-bond',
'123%xxxx',
'123%oooo',
'123%dddd',
'007%02x',
'007%02o',
'007%02d',
'42%x',
'42%o',
'42%d',
'700%07x007',
'700%07o007',
'700%07d007']
self.ids = [
1,
5,
10,
42,
90,
100,
256,
500,
1000,
2500,
19294,
100500,
21093404
]
def _validate_id(self, template, name):
return compute_utils.parse_decimal_id(template, name)
def test_name_template_based(self):
for template in self.templates:
for id in self.ids:
self.assertEqual(id, self._validate_id(template,
template % id))
def test_name_not_template_based(self):
for template in self.templates:
for id in self.ids:
name = template % id
self.assertEqual(-1, self._validate_id(template,
'n%s' % name))
self.assertEqual(-1, self._validate_id(template,
'%sw' % name))
self.assertEqual(-1, self._validate_id(template,
'reg%s' % name))
self.assertEqual(-1, self._validate_id(template,
'%sex' % name))
self.assertEqual(-1, self._validate_id(template, '%s%s%s' % (
name[:1],
'abr',
name[-1:])))
self.assertEqual(-1, self._validate_id(template, '%s%s%s' % (
name[:1],
'qwer23456ert',
name[-1:])))

View File

@ -167,6 +167,13 @@ class ComputeDriver(object):
# TODO(Vek): Need to pass context in for access to auth_token
raise NotImplementedError()
def list_instance_uuids(self):
"""
Return the UUIDS of all the instances known to the virtualization
layer, as a list.
"""
raise NotImplementedError()
def spawn(self, context, instance, image_meta, injected_files,
admin_password, network_info=None, block_device_info=None):
"""

View File

@ -399,6 +399,9 @@ class FakeDriver(driver.ComputeDriver):
def instance_on_disk(self, instance):
return False
def list_instance_uuids(self):
return []
class FakeVirtAPI(virtapi.VirtAPI):
def instance_update(self, context, instance_uuid, updates):

View File

@ -456,6 +456,10 @@ class LibvirtDriver(driver.ComputeDriver):
return names
def list_instance_uuids(self):
return [self._conn.lookupByName(name).UUIDString()
for name in self.list_instances()]
def plug_vifs(self, instance, network_info):
"""Plug VIFs into networks."""
for (network, mapping) in network_info: