Delete a compute node's resource provider when node is deleted
Currently when a compute node is deleted, its record in the cell DB is deleted, but its representation as a resource provider in the placement service remains, along with any inventory and allocations. This could cause the placement engine to return that provider record, even though the compute node no longer exists. And since the periodic "healing" by the resource tracker only updates compute node resources for records in the compute_nodes table, these old records are never removed. This patch adds a call to delete the resource provider when the compute node is deleted. It also adds a method to the scheduler report client to make these calls to the placement API. Partial-Bug: #1661258 Closes-Bug: #1661014 Change-Id: I6098d186d05ff8b9a568e23f860295a7bc2e6447
This commit is contained in:
parent
6f3338612d
commit
965fffc09d
@ -6583,6 +6583,11 @@ class ComputeManager(manager.Manager):
|
||||
{'id': cn.id, 'hh': cn.hypervisor_hostname,
|
||||
'nodes': nodenames})
|
||||
cn.destroy()
|
||||
# Delete the corresponding resource provider in placement,
|
||||
# along with any associated allocations and inventory.
|
||||
# TODO(cdent): Move use of reportclient into resource tracker.
|
||||
self.scheduler_client.reportclient.delete_resource_provider(
|
||||
context, cn, cascade=True)
|
||||
|
||||
def _get_compute_nodes_in_db(self, context, use_slave=False):
|
||||
try:
|
||||
|
@ -749,3 +749,40 @@ class SchedulerReportClient(object):
|
||||
LOG.warning(_LW('Deleting stale allocation for instance %s'),
|
||||
uuid)
|
||||
self._delete_allocation_for_instance(uuid)
|
||||
|
||||
@safe_connect
|
||||
def delete_resource_provider(self, context, compute_node, cascade=False):
|
||||
"""Deletes the ResourceProvider record for the compute_node.
|
||||
|
||||
:param context: The security context
|
||||
:param compute_node: The nova.objects.ComputeNode object that is the
|
||||
resource provider being deleted.
|
||||
:param cascade: Boolean value that, when True, will first delete any
|
||||
associated Allocation and Inventory records for the
|
||||
compute node
|
||||
"""
|
||||
nodename = compute_node.hypervisor_hostname
|
||||
host = compute_node.host
|
||||
rp_uuid = compute_node.uuid
|
||||
if cascade:
|
||||
# Delete any allocations for this resource provider.
|
||||
# Since allocations are by consumer, we get the consumers on this
|
||||
# host, which are its instances.
|
||||
instances = objects.InstanceList.get_by_host_and_node(context,
|
||||
host, nodename)
|
||||
for instance in instances:
|
||||
self._delete_allocation_for_instance(instance.uuid)
|
||||
url = "/resource_providers/%s" % rp_uuid
|
||||
resp = self.delete(url)
|
||||
if resp:
|
||||
LOG.info(_LI("Deleted resource provider %s"), rp_uuid)
|
||||
else:
|
||||
# Check for 404 since we don't need to log a warning if we tried to
|
||||
# delete something which doesn"t actually exist.
|
||||
if resp.status_code != 404:
|
||||
LOG.warning(
|
||||
_LW("Unable to delete resource provider "
|
||||
"%(uuid)s: (%(code)i %(text)s)"),
|
||||
{"uuid": rp_uuid,
|
||||
"code": resp.status_code,
|
||||
"text": resp.text})
|
||||
|
@ -210,12 +210,14 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
||||
self.assertTrue(log_mock.info.called)
|
||||
self.assertIsNone(self.compute._resource_tracker)
|
||||
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'delete_resource_provider')
|
||||
@mock.patch.object(manager.ComputeManager,
|
||||
'update_available_resource_for_node')
|
||||
@mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
|
||||
@mock.patch.object(manager.ComputeManager, '_get_compute_nodes_in_db')
|
||||
def test_update_available_resource(self, get_db_nodes, get_avail_nodes,
|
||||
update_mock):
|
||||
update_mock, del_rp_mock):
|
||||
db_nodes = [self._make_compute_node('node%s' % i, i)
|
||||
for i in range(1, 5)]
|
||||
avail_nodes = set(['node2', 'node3', 'node4', 'node5'])
|
||||
@ -233,6 +235,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
||||
for db_node in db_nodes:
|
||||
if db_node.hypervisor_hostname == 'node1':
|
||||
db_node.destroy.assert_called_once_with()
|
||||
del_rp_mock.assert_called_once_with(self.context, db_node,
|
||||
cascade=True)
|
||||
else:
|
||||
self.assertFalse(db_node.destroy.called)
|
||||
|
||||
|
@ -1301,3 +1301,83 @@ class TestAllocations(SchedulerReportClientTestCase):
|
||||
mock_log.info.assert_not_called()
|
||||
# make sure warning wasn't called for the 404
|
||||
mock_log.warning.assert_not_called()
|
||||
|
||||
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
|
||||
"delete")
|
||||
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
|
||||
"_delete_allocation_for_instance")
|
||||
@mock.patch("nova.objects.InstanceList.get_by_host_and_node")
|
||||
def test_delete_resource_provider_cascade(self, mock_by_host,
|
||||
mock_del_alloc, mock_delete):
|
||||
cn = objects.ComputeNode(uuid=uuids.cn, host="fake_host",
|
||||
hypervisor_hostname="fake_hostname", )
|
||||
inst1 = objects.Instance(uuid=uuids.inst1)
|
||||
inst2 = objects.Instance(uuid=uuids.inst2)
|
||||
mock_by_host.return_value = objects.InstanceList(
|
||||
objects=[inst1, inst2])
|
||||
resp_mock = mock.Mock(status_code=204)
|
||||
mock_delete.return_value = resp_mock
|
||||
self.client.delete_resource_provider(self.context, cn, cascade=True)
|
||||
self.assertEqual(2, mock_del_alloc.call_count)
|
||||
exp_url = "/resource_providers/%s" % uuids.cn
|
||||
mock_delete.assert_called_once_with(exp_url)
|
||||
|
||||
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
|
||||
"delete")
|
||||
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
|
||||
"_delete_allocation_for_instance")
|
||||
@mock.patch("nova.objects.InstanceList.get_by_host_and_node")
|
||||
def test_delete_resource_provider_no_cascade(self, mock_by_host,
|
||||
mock_del_alloc, mock_delete):
|
||||
cn = objects.ComputeNode(uuid=uuids.cn, host="fake_host",
|
||||
hypervisor_hostname="fake_hostname", )
|
||||
inst1 = objects.Instance(uuid=uuids.inst1)
|
||||
inst2 = objects.Instance(uuid=uuids.inst2)
|
||||
mock_by_host.return_value = objects.InstanceList(
|
||||
objects=[inst1, inst2])
|
||||
resp_mock = mock.Mock(status_code=204)
|
||||
mock_delete.return_value = resp_mock
|
||||
self.client.delete_resource_provider(self.context, cn)
|
||||
mock_del_alloc.assert_not_called()
|
||||
exp_url = "/resource_providers/%s" % uuids.cn
|
||||
mock_delete.assert_called_once_with(exp_url)
|
||||
|
||||
@mock.patch("nova.scheduler.client.report.SchedulerReportClient."
|
||||
"delete")
|
||||
@mock.patch('nova.scheduler.client.report.LOG')
|
||||
def test_delete_resource_provider_log_calls(self, mock_log, mock_delete):
|
||||
# First, check a successful call
|
||||
cn = objects.ComputeNode(uuid=uuids.cn, host="fake_host",
|
||||
hypervisor_hostname="fake_hostname", )
|
||||
resp_mock = mock.MagicMock(status_code=204)
|
||||
try:
|
||||
resp_mock.__nonzero__.return_value = True
|
||||
except AttributeError:
|
||||
# py3 uses __bool__
|
||||
resp_mock.__bool__.return_value = True
|
||||
mock_delete.return_value = resp_mock
|
||||
self.client.delete_resource_provider(self.context, cn)
|
||||
# With a 204, only the info should be called
|
||||
self.assertEqual(1, mock_log.info.call_count)
|
||||
self.assertEqual(0, mock_log.warning.call_count)
|
||||
|
||||
# Now check a 404 response
|
||||
mock_log.reset_mock()
|
||||
resp_mock.status_code = 404
|
||||
try:
|
||||
resp_mock.__nonzero__.return_value = False
|
||||
except AttributeError:
|
||||
# py3 uses __bool__
|
||||
resp_mock.__bool__.return_value = False
|
||||
self.client.delete_resource_provider(self.context, cn)
|
||||
# With a 404, neither log message should be called
|
||||
self.assertEqual(0, mock_log.info.call_count)
|
||||
self.assertEqual(0, mock_log.warning.call_count)
|
||||
|
||||
# Finally, check a 409 response
|
||||
mock_log.reset_mock()
|
||||
resp_mock.status_code = 409
|
||||
self.client.delete_resource_provider(self.context, cn)
|
||||
# With a 409, only the warning should be called
|
||||
self.assertEqual(0, mock_log.info.call_count)
|
||||
self.assertEqual(1, mock_log.warning.call_count)
|
||||
|
9
releasenotes/notes/bug-1661258-ee202843157f6a27.yaml
Normal file
9
releasenotes/notes/bug-1661258-ee202843157f6a27.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
issues:
|
||||
- |
|
||||
Ironic nodes that were deleted from ironic's database during Newton
|
||||
may result in orphaned resource providers causing incorrect scheduling
|
||||
decisions, leading to a reschedule. If this happens, the orphaned
|
||||
resource providers will need to be identified and removed.
|
||||
|
||||
See also https://bugs.launchpad.net/nova/+bug/1661258
|
Loading…
Reference in New Issue
Block a user