Merge "Detect host renames and abort startup"
This commit is contained in:
commit
e9d716f555
nova
releasenotes/notes
@ -1536,6 +1536,22 @@ class ComputeManager(manager.Manager):
|
||||
|
||||
nova.virt.node.write_local_node_uuid(db_nodes[0].uuid)
|
||||
|
||||
def _check_for_host_rename(self, nodes_by_uuid):
|
||||
if 'ironic' in CONF.compute_driver.lower():
|
||||
# Ironic (currently) rebalances nodes at various times, and as
|
||||
# such, nodes being discovered as assigned to this host with a
|
||||
# different hostname is not surprising. Skip this check for
|
||||
# ironic.
|
||||
return
|
||||
for node in nodes_by_uuid.values():
|
||||
if node.host != self.host:
|
||||
raise exception.InvalidConfiguration(
|
||||
'My node %s has host %r but my host is %r; '
|
||||
'Possible rename detected, refusing to start!' % (
|
||||
node.uuid, node.host, self.host))
|
||||
LOG.debug('Verified node %s matches my host %s',
|
||||
node.uuid, self.host)
|
||||
|
||||
def init_host(self, service_ref):
|
||||
"""Initialization for a standalone compute service."""
|
||||
|
||||
@ -1574,15 +1590,6 @@ class ComputeManager(manager.Manager):
|
||||
raise exception.InvalidConfiguration(msg)
|
||||
|
||||
self.driver.init_host(host=self.host)
|
||||
context = nova.context.get_admin_context()
|
||||
instances = objects.InstanceList.get_by_host(
|
||||
context, self.host,
|
||||
expected_attrs=['info_cache', 'metadata', 'numa_topology'])
|
||||
|
||||
self.init_virt_events()
|
||||
|
||||
self._validate_pinning_configuration(instances)
|
||||
self._validate_vtpm_configuration(instances)
|
||||
|
||||
# NOTE(gibi): At this point the compute_nodes of the resource tracker
|
||||
# has not been populated yet so we cannot rely on the resource tracker
|
||||
@ -1593,8 +1600,23 @@ class ComputeManager(manager.Manager):
|
||||
# _destroy_evacuated_instances and
|
||||
# _error_out_instances_whose_build_was_interrupted out in the
|
||||
# background on startup
|
||||
context = nova.context.get_admin_context()
|
||||
nodes_by_uuid = self._get_nodes(context)
|
||||
|
||||
# NOTE(danms): Check for a possible host rename and abort
|
||||
# startup before we start mucking with instances we think are
|
||||
# ours.
|
||||
self._check_for_host_rename(nodes_by_uuid)
|
||||
|
||||
instances = objects.InstanceList.get_by_host(
|
||||
context, self.host,
|
||||
expected_attrs=['info_cache', 'metadata', 'numa_topology'])
|
||||
|
||||
self.init_virt_events()
|
||||
|
||||
self._validate_pinning_configuration(instances)
|
||||
self._validate_vtpm_configuration(instances)
|
||||
|
||||
try:
|
||||
# checking that instance was not already evacuated to other host
|
||||
evacuated_instances = self._destroy_evacuated_instances(
|
||||
|
@ -934,7 +934,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
inst_list = _make_instance_list(startup_instances)
|
||||
mock_host_get.return_value = inst_list
|
||||
our_node = objects.ComputeNode(
|
||||
host='fake-host', uuid=uuids.our_node_uuid,
|
||||
host=self.compute.host, uuid=uuids.our_node_uuid,
|
||||
hypervisor_hostname='fake-node')
|
||||
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
|
||||
|
||||
@ -983,7 +983,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
"""
|
||||
mock_get_nodes.return_value = {
|
||||
uuids.cn_uuid1: objects.ComputeNode(
|
||||
uuid=uuids.cn_uuid1, hypervisor_hostname='node1')}
|
||||
uuid=uuids.cn_uuid1, hypervisor_hostname='node1',
|
||||
host=self.compute.host)}
|
||||
self.compute.init_host(None)
|
||||
|
||||
mock_error_interrupted.assert_called_once_with(
|
||||
@ -1148,7 +1149,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
uuids.evac_instance: evacuating_instance
|
||||
}
|
||||
our_node = objects.ComputeNode(
|
||||
host='fake-host', uuid=uuids.our_node_uuid,
|
||||
host=self.compute.host, uuid=uuids.our_node_uuid,
|
||||
hypervisor_hostname='fake-node')
|
||||
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
|
||||
|
||||
@ -1227,6 +1228,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
|
||||
@mock.patch.object(objects.InstanceList, 'get_by_host',
|
||||
new=mock.Mock())
|
||||
@mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids',
|
||||
new=mock.Mock(return_value=[mock.MagicMock()]))
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_validate_pinning_configuration')
|
||||
def test_init_host_pinning_configuration_validation_failure(self,
|
||||
@ -1244,6 +1247,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_validate_pinning_configuration',
|
||||
new=mock.Mock())
|
||||
@mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids',
|
||||
new=mock.Mock(return_value=[mock.MagicMock()]))
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_validate_vtpm_configuration')
|
||||
def test_init_host_vtpm_configuration_validation_failure(self,
|
||||
@ -6434,6 +6439,49 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
mock.sentinel.service_ref)
|
||||
m.assert_called_once_with(mock.sentinel.service_ref)
|
||||
|
||||
def test_check_for_host_rename_ironic(self):
|
||||
self.flags(compute_driver='ironic')
|
||||
# Passing None here makes sure we take the early exit because of our
|
||||
# virt driver
|
||||
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||
host='not-this-host')}
|
||||
self.compute._check_for_host_rename(nodes)
|
||||
|
||||
def test_check_for_host_rename_renamed_only(self):
|
||||
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||
host='not-this-host')}
|
||||
self.assertRaises(exception.InvalidConfiguration,
|
||||
self.compute._check_for_host_rename, nodes)
|
||||
|
||||
def test_check_for_host_rename_renamed_one(self):
|
||||
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||
host=self.compute.host),
|
||||
uuids.node2: mock.MagicMock(uuid=uuids.node2,
|
||||
host='not-this-host')}
|
||||
self.assertRaises(exception.InvalidConfiguration,
|
||||
self.compute._check_for_host_rename, nodes)
|
||||
|
||||
def test_check_for_host_rename_not_renamed(self):
|
||||
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||
host=self.compute.host)}
|
||||
with mock.patch.object(manager.LOG, 'debug') as mock_debug:
|
||||
self.compute._check_for_host_rename(nodes)
|
||||
mock_debug.assert_called_once_with(
|
||||
'Verified node %s matches my host %s',
|
||||
uuids.node1, self.compute.host)
|
||||
|
||||
@mock.patch('nova.compute.manager.ComputeManager._get_nodes')
|
||||
def test_check_for_host_rename_called_by_init_host(self, mock_nodes):
|
||||
# Since testing init_host() requires a billion mocks, this
|
||||
# tests that we do call it when expected, but make it raise
|
||||
# to avoid running the rest of init_host().
|
||||
with mock.patch.object(self.compute,
|
||||
'_check_for_host_rename') as m:
|
||||
m.side_effect = test.TestingException
|
||||
self.assertRaises(test.TestingException,
|
||||
self.compute.init_host, None)
|
||||
m.assert_called_once_with(mock_nodes.return_value)
|
||||
|
||||
|
||||
class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||
def setUp(self):
|
||||
|
19
releasenotes/notes/stable-compute-uuid-08663a0955616728.yaml
Normal file
19
releasenotes/notes/stable-compute-uuid-08663a0955616728.yaml
Normal file
@ -0,0 +1,19 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
The compute manager now uses a local file to provide node uuid persistence
|
||||
to guard against problems with renamed services, among other things.
|
||||
Deployers wishing to ensure that *new* compute services get a predicatble
|
||||
uuid before initial startup may provision that file and nova will use it,
|
||||
otherwise nova will generate and write one to a `compute_id` file in
|
||||
`CONF.state_path` the first time it starts up. Accidental renames of a
|
||||
compute node's hostname will be detected and the manager will exit to avoid
|
||||
database corruption. Note that none of this applies to Ironic computes, as
|
||||
they manage nodes and uuids differently.
|
||||
upgrade:
|
||||
- |
|
||||
Existing compute nodes will, upon upgrade, perist the uuid of the compute
|
||||
node assigned to their hostname at first startup. Since this must match
|
||||
what is currently in the database, it is important to let nova provision
|
||||
this file from its database. Nova will only persist to a `compute_id` file
|
||||
in the `CONF.state_path` directory, which should already be writable.
|
Loading…
x
Reference in New Issue
Block a user