Consider startup scenario in _get_compute_nodes_in_db
Before this change, on the first start of the nova-compute service on any host, this method is logging the "No compute node record for host" error which is confusing for people debugging issues. For example, if the compute service starts up before the Placement service is running, the compute is checking in and failing to connect to the placement endpoint which might take the compute node out of consideration for scheduling. When debugging that kind of issue, people can get hung up on this error message which is actually an expected case on the first start of nova-compute on a new host. This change simply plumbs through a boolean to tell it if we're starting up or not. This doesn't tell us if it's the first time we're starting this service or a restart, but the thinking is, if it's a restart we don't get the NotFound error and if we do, then we do want the error level message. Change-Id: Id7a05b579ead6ac5445ca5b1eeab6d223d545a6c
This commit is contained in:
parent
d48aeb5783
commit
50d402821b
@ -1165,7 +1165,8 @@ class ComputeManager(manager.Manager):
|
|||||||
the service up by listening on RPC queues, make sure to update
|
the service up by listening on RPC queues, make sure to update
|
||||||
our available resources (and indirectly our available nodes).
|
our available resources (and indirectly our available nodes).
|
||||||
"""
|
"""
|
||||||
self.update_available_resource(nova.context.get_admin_context())
|
self.update_available_resource(nova.context.get_admin_context(),
|
||||||
|
startup=True)
|
||||||
|
|
||||||
def _get_power_state(self, context, instance):
|
def _get_power_state(self, context, instance):
|
||||||
"""Retrieve the power state for the given instance."""
|
"""Retrieve the power state for the given instance."""
|
||||||
@ -6559,17 +6560,20 @@ class ComputeManager(manager.Manager):
|
|||||||
"%(node)s."), {'node': nodename})
|
"%(node)s."), {'node': nodename})
|
||||||
|
|
||||||
@periodic_task.periodic_task(spacing=CONF.update_resources_interval)
|
@periodic_task.periodic_task(spacing=CONF.update_resources_interval)
|
||||||
def update_available_resource(self, context):
|
def update_available_resource(self, context, startup=False):
|
||||||
"""See driver.get_available_resource()
|
"""See driver.get_available_resource()
|
||||||
|
|
||||||
Periodic process that keeps that the compute host's understanding of
|
Periodic process that keeps that the compute host's understanding of
|
||||||
resource availability and usage in sync with the underlying hypervisor.
|
resource availability and usage in sync with the underlying hypervisor.
|
||||||
|
|
||||||
:param context: security context
|
:param context: security context
|
||||||
|
:param startup: True if this is being called when the nova-compute
|
||||||
|
service is starting, False otherwise.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
compute_nodes_in_db = self._get_compute_nodes_in_db(context,
|
compute_nodes_in_db = self._get_compute_nodes_in_db(context,
|
||||||
use_slave=True)
|
use_slave=True,
|
||||||
|
startup=startup)
|
||||||
nodenames = set(self.driver.get_available_nodes())
|
nodenames = set(self.driver.get_available_nodes())
|
||||||
for nodename in nodenames:
|
for nodename in nodenames:
|
||||||
self.update_available_resource_for_node(context, nodename)
|
self.update_available_resource_for_node(context, nodename)
|
||||||
@ -6589,12 +6593,19 @@ class ComputeManager(manager.Manager):
|
|||||||
self.scheduler_client.reportclient.delete_resource_provider(
|
self.scheduler_client.reportclient.delete_resource_provider(
|
||||||
context, cn, cascade=True)
|
context, cn, cascade=True)
|
||||||
|
|
||||||
def _get_compute_nodes_in_db(self, context, use_slave=False):
|
def _get_compute_nodes_in_db(self, context, use_slave=False,
|
||||||
|
startup=False):
|
||||||
try:
|
try:
|
||||||
return objects.ComputeNodeList.get_all_by_host(context, self.host,
|
return objects.ComputeNodeList.get_all_by_host(context, self.host,
|
||||||
use_slave=use_slave)
|
use_slave=use_slave)
|
||||||
except exception.NotFound:
|
except exception.NotFound:
|
||||||
LOG.error(_LE("No compute node record for host %s"), self.host)
|
if startup:
|
||||||
|
LOG.warning(
|
||||||
|
_LW("No compute node record found for host %s. If this is "
|
||||||
|
"the first time this service is starting on this "
|
||||||
|
"host, then you can ignore this warning."), self.host)
|
||||||
|
else:
|
||||||
|
LOG.error(_LE("No compute node record for host %s"), self.host)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@periodic_task.periodic_task(
|
@periodic_task.periodic_task(
|
||||||
|
@ -160,7 +160,7 @@ class BaseTestCase(test.TestCase):
|
|||||||
self.compute.driver)
|
self.compute.driver)
|
||||||
self.compute._resource_tracker = fake_rt
|
self.compute._resource_tracker = fake_rt
|
||||||
|
|
||||||
def fake_get_compute_nodes_in_db(self, context, use_slave=False):
|
def fake_get_compute_nodes_in_db(self, context, **kwargs):
|
||||||
fake_compute_nodes = [{'local_gb': 259,
|
fake_compute_nodes = [{'local_gb': 259,
|
||||||
'uuid': uuids.fake_compute_node,
|
'uuid': uuids.fake_compute_node,
|
||||||
'vcpus_used': 0,
|
'vcpus_used': 0,
|
||||||
|
@ -226,7 +226,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||||||
get_db_nodes.return_value = db_nodes
|
get_db_nodes.return_value = db_nodes
|
||||||
get_avail_nodes.return_value = avail_nodes
|
get_avail_nodes.return_value = avail_nodes
|
||||||
self.compute.update_available_resource(self.context)
|
self.compute.update_available_resource(self.context)
|
||||||
get_db_nodes.assert_called_once_with(self.context, use_slave=True)
|
get_db_nodes.assert_called_once_with(self.context, use_slave=True,
|
||||||
|
startup=False)
|
||||||
update_mock.has_calls(
|
update_mock.has_calls(
|
||||||
[mock.call(self.context, node) for node in avail_nodes_l]
|
[mock.call(self.context, node) for node in avail_nodes_l]
|
||||||
)
|
)
|
||||||
@ -240,6 +241,32 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||||||
else:
|
else:
|
||||||
self.assertFalse(db_node.destroy.called)
|
self.assertFalse(db_node.destroy.called)
|
||||||
|
|
||||||
|
@mock.patch('nova.context.get_admin_context')
|
||||||
|
def test_pre_start_hook(self, get_admin_context):
|
||||||
|
"""Very simple test just to make sure update_available_resource is
|
||||||
|
called as expected.
|
||||||
|
"""
|
||||||
|
with mock.patch.object(
|
||||||
|
self.compute, 'update_available_resource') as update_res:
|
||||||
|
self.compute.pre_start_hook()
|
||||||
|
update_res.assert_called_once_with(
|
||||||
|
get_admin_context.return_value, startup=True)
|
||||||
|
|
||||||
|
@mock.patch.object(objects.ComputeNodeList, 'get_all_by_host',
|
||||||
|
side_effect=exception.NotFound)
|
||||||
|
@mock.patch('nova.compute.manager.LOG')
|
||||||
|
def test_get_compute_nodes_in_db_on_startup(self, mock_log,
|
||||||
|
get_all_by_host):
|
||||||
|
"""Tests to make sure we only log a warning when we do not find a
|
||||||
|
compute node on startup since this may be expected.
|
||||||
|
"""
|
||||||
|
self.assertEqual([], self.compute._get_compute_nodes_in_db(
|
||||||
|
self.context, startup=True))
|
||||||
|
get_all_by_host.assert_called_once_with(
|
||||||
|
self.context, self.compute.host, use_slave=False)
|
||||||
|
self.assertTrue(mock_log.warning.called)
|
||||||
|
self.assertFalse(mock_log.error.called)
|
||||||
|
|
||||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||||
def test_delete_instance_without_info_cache(self, mock_notify):
|
def test_delete_instance_without_info_cache(self, mock_notify):
|
||||||
instance = fake_instance.fake_instance_obj(
|
instance = fake_instance.fake_instance_obj(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user