Merge "Sanity check that new hosts have no instances"
This commit is contained in:
commit
1e75373beb
@ -1570,6 +1570,18 @@ class ComputeManager(manager.Manager):
|
||||
LOG.debug('Verified node %s matches my host %s',
|
||||
node.uuid, self.host)
|
||||
|
||||
def _sanity_check_new_host(self):
|
||||
instances_on_hv = self.driver.list_instance_uuids()
|
||||
if len(instances_on_hv) > 0:
|
||||
# This means we have instances on our hypervisor, but we think
|
||||
# we are a new host (i.e. we created a new service record). That
|
||||
# likely means we're pointed at an empty database or the wrong
|
||||
# cell.
|
||||
raise exception.InvalidConfiguration(
|
||||
'My hypervisor has existing instances, but I appear to be '
|
||||
'a new service in this database. Possible database '
|
||||
'configuration error, refusing to start!')
|
||||
|
||||
def init_host(self, service_ref):
|
||||
"""Initialization for a standalone compute service."""
|
||||
|
||||
@ -1578,6 +1590,10 @@ class ComputeManager(manager.Manager):
|
||||
# to record a locally-persistent node identity because
|
||||
# we have upgraded from a previous version.
|
||||
self._ensure_existing_node_identity(service_ref)
|
||||
else:
|
||||
# If we are a new service (in the database), make sure we have no
|
||||
# instances on our hypervisor as we would expect.
|
||||
self._sanity_check_new_host()
|
||||
|
||||
if CONF.pci.device_spec:
|
||||
# Simply loading the PCI passthrough spec will do a bunch of
|
||||
|
@ -120,7 +120,8 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
"""Tests a scenario where a server is created on a host, the host
|
||||
goes down, the server is evacuated to another host, and then the
|
||||
source host compute service is deleted. After that the deleted
|
||||
compute service is restarted and starts successfully.
|
||||
compute service is restarted and refuses to run because it finds its
|
||||
service record deleted even though it has instances.
|
||||
"""
|
||||
# Create our source host that we will evacuate *from* later.
|
||||
host1 = self._start_compute('host1')
|
||||
@ -154,12 +155,16 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
# Then the resource provider is also deleted.
|
||||
resp = self.placement.get('/resource_providers/%s' % rp_uuid)
|
||||
self.assertEqual(404, resp.status)
|
||||
# Try to restart the host1 compute service to create a new service
|
||||
# and a new resource provider.
|
||||
self.restart_compute_service(host1)
|
||||
# Make sure the compute service record for host1 is recreated.
|
||||
service = self.admin_api.get_services(
|
||||
binary='nova-compute', host='host1')[0]
|
||||
# Try to restart the host1 compute service and make sure it recognizes
|
||||
# that its service record has been deleted even though it still has
|
||||
# instances running.
|
||||
self.assertRaises(exception.InvalidConfiguration,
|
||||
self.restart_compute_service, host1)
|
||||
# Make sure the compute service record for host1 is not recreated
|
||||
# since we aborted startup.
|
||||
services = self.admin_api.get_services(
|
||||
binary='nova-compute', host='host1')
|
||||
self.assertEqual([], services)
|
||||
|
||||
def test_migrate_confirm_after_deleted_source_compute(self):
|
||||
"""Tests a scenario where a server is cold migrated and while in
|
||||
|
@ -916,6 +916,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
return instance_obj._make_instance_list(
|
||||
self.context, objects.InstanceList(), db_list, None)
|
||||
|
||||
@mock.patch.object(manager.ComputeManager,
|
||||
'_sanity_check_new_host')
|
||||
@mock.patch.object(manager.ComputeManager,
|
||||
'_ensure_existing_node_identity')
|
||||
@mock.patch.object(manager.ComputeManager, '_get_nodes')
|
||||
@ -937,7 +939,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
mock_destroy, mock_admin_ctxt, mock_host_get,
|
||||
mock_init_host,
|
||||
mock_error_interrupted, mock_get_nodes,
|
||||
mock_existing_node):
|
||||
mock_existing_node, mock_check_new):
|
||||
mock_admin_ctxt.return_value = self.context
|
||||
inst_list = _make_instance_list(startup_instances)
|
||||
mock_host_get.return_value = inst_list
|
||||
@ -948,6 +950,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
|
||||
self.compute.init_host(None)
|
||||
|
||||
mock_check_new.assert_called_once_with()
|
||||
mock_existing_node.assert_not_called()
|
||||
mock_validate_pinning.assert_called_once_with(inst_list)
|
||||
mock_validate_vtpm.assert_called_once_with(inst_list)
|
||||
@ -1001,6 +1004,18 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
mock_get_nodes.assert_called_once_with(
|
||||
test.MatchType(nova.context.RequestContext))
|
||||
|
||||
def test_init_host_new_with_instances(self):
|
||||
"""Tests the case where we start up without an existing service_ref,
|
||||
indicating that we are a new service, but our hypervisor reports
|
||||
existing instances. This indicates we were moved to another cell,
|
||||
our database got wiped, etc.
|
||||
"""
|
||||
with mock.patch.object(self.compute.driver,
|
||||
'list_instance_uuids') as mock_insts:
|
||||
mock_insts.return_value = ['foo']
|
||||
self.assertRaises(exception.InvalidConfiguration,
|
||||
self.compute.init_host, None)
|
||||
|
||||
@mock.patch('nova.objects.InstanceList')
|
||||
@mock.patch('nova.objects.MigrationList.get_by_filters')
|
||||
@mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids')
|
||||
|
Loading…
x
Reference in New Issue
Block a user