From 66e79e1bbca66a6f0f7be14006a87426c4dda402 Mon Sep 17 00:00:00 2001 From: Jay Pipes Date: Fri, 11 Mar 2016 13:16:40 -0500 Subject: [PATCH] pci - Claim devices outside of Claim constructor During the nova.compute.Claim.__init__() call, there are a bunch of _test_XXX() methods that are called. These methods should test to see whether the requested resources of various types can be satisfied by the inventory on the ComputeNode. However, that inventory *should not* be claimed for a particular request during the Claim object's constructor. The Claim._test_pci() method was *actually* claiming the PCI device for the requested instance. Unfortunately, this meant that if an instance launch request's demand for a resource like RAM was not able to be satisfied by the compute node but the launch request's demand for PCI devices *was* able to be satisfied by the compute node, those PCI devices were actually claimed for the instance even though the claim itself would end up being aborted. This resulted in a data corruption/inconsistency where a PCI device would be claimed for an instance that actually was not running on the node. This patch moves the claim of PCI devices out of the _test_pci() method and into the ResourceTracker.instance_claim() method. In the process of fixing this bug, it was discovered that the unit tests for the Claim object with regards to PCI devices were just plain broken. They were testing for nothing at all because of the way the Claim constructor works. These unit tests were reworked completely, along with the MoveClaim unit tests which similarly were not testing the PCI code paths at all. An additional unit test was added on the resource tracker to verify that nova.pci.manager.PciDevTracker.claim_instance() is called when PCI requests are included and satisfied by the Claim. Change-Id: Icf75439a552de84ec31c1a47faeee3caf8a5b0a7 Closes-bug: #1549984 --- nova/compute/claims.py | 5 +- nova/compute/resource_tracker.py | 5 + nova/tests/unit/compute/test_claims.py | 204 +++++++++++------------- nova/tests/unit/compute/test_tracker.py | 44 +++++ 4 files changed, 141 insertions(+), 117 deletions(-) diff --git a/nova/compute/claims.py b/nova/compute/claims.py index 60ef76e7397b..f0634de850de 100644 --- a/nova/compute/claims.py +++ b/nova/compute/claims.py @@ -190,9 +190,8 @@ class Claim(NopClaim): self.context, self.instance.uuid) if pci_requests.requests: - devs = self.tracker.pci_tracker.claim_instance(self.context, - self.instance) - if not devs: + stats = self.tracker.pci_tracker.stats + if not stats.support_requests(pci_requests.requests): return _('Claim pci failed.') def _test_ext_resources(self, limits): diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index c2650fde9d1c..cf15c3d75381 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -201,6 +201,11 @@ class ResourceTracker(object): claim = claims.Claim(context, instance_ref, self, self.compute_node, overhead=overhead, limits=limits) + if self.pci_tracker: + # NOTE(jaypipes): ComputeNode.pci_device_pools is set below + # in _update_usage_from_instance(). + self.pci_tracker.claim_instance(context, instance_ref) + # self._set_instance_host_and_node() will save instance_ref to the DB # so set instance_ref['numa_topology'] first. We need to make sure # that numa_topology is saved while under COMPUTE_RESOURCE_SEMAPHORE diff --git a/nova/tests/unit/compute/test_claims.py b/nova/tests/unit/compute/test_claims.py index 2a22e95bdaeb..455667a0cf11 100644 --- a/nova/tests/unit/compute/test_claims.py +++ b/nova/tests/unit/compute/test_claims.py @@ -18,11 +18,9 @@ import uuid import mock -from oslo_serialization import jsonutils from nova.compute import claims from nova import context -from nova import db from nova import exception from nova import objects from nova.pci import manager as pci_manager @@ -60,8 +58,6 @@ class DummyTracker(object): self.pci_tracker = pci_manager.PciDevTracker(ctxt) -@mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - return_value=objects.InstancePCIRequests(requests=[])) class ClaimTestCase(test.NoDBTestCase): def setUp(self): @@ -69,8 +65,11 @@ class ClaimTestCase(test.NoDBTestCase): self.context = context.RequestContext('fake-user', 'fake-project') self.resources = self._fake_resources() self.tracker = DummyTracker() + self.empty_requests = objects.InstancePCIRequests( + requests=[] + ) - def _claim(self, limits=None, overhead=None, **kwargs): + def _claim(self, limits=None, overhead=None, requests=None, **kwargs): numa_topology = kwargs.pop('numa_topology', None) instance = self._fake_instance(**kwargs) if numa_topology: @@ -78,18 +77,23 @@ class ClaimTestCase(test.NoDBTestCase): 'id': 1, 'created_at': None, 'updated_at': None, 'deleted_at': None, 'deleted': None, 'instance_uuid': instance.uuid, - 'numa_topology': numa_topology._to_json() + 'numa_topology': numa_topology._to_json(), + 'pci_requests': (requests or self.empty_requests).to_json() } else: db_numa_topology = None if overhead is None: overhead = {'memory_mb': 0} - with mock.patch.object( - db, 'instance_extra_get_by_instance_uuid', - return_value=db_numa_topology): + + @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', + return_value=requests or self.empty_requests) + @mock.patch('nova.db.instance_extra_get_by_instance_uuid', + return_value=db_numa_topology) + def get_claim(mock_extra_get, mock_pci_get): return claims.Claim(self.context, instance, self.tracker, self.resources, overhead=overhead, limits=limits) + return get_claim() def _fake_instance(self, **kwargs): instance = { @@ -141,22 +145,22 @@ class ClaimTestCase(test.NoDBTestCase): resources.update(values) return resources - def test_memory_unlimited(self, mock_get): + def test_memory_unlimited(self): self._claim(memory_mb=99999999) - def test_disk_unlimited_root(self, mock_get): + def test_disk_unlimited_root(self): self._claim(root_gb=999999) - def test_disk_unlimited_ephemeral(self, mock_get): + def test_disk_unlimited_ephemeral(self): self._claim(ephemeral_gb=999999) - def test_memory_with_overhead(self, mock_get): + def test_memory_with_overhead(self): overhead = {'memory_mb': 8} limits = {'memory_mb': 2048} self._claim(memory_mb=2040, limits=limits, overhead=overhead) - def test_memory_with_overhead_insufficient(self, mock_get): + def test_memory_with_overhead_insufficient(self): overhead = {'memory_mb': 9} limits = {'memory_mb': 2048} @@ -164,27 +168,27 @@ class ClaimTestCase(test.NoDBTestCase): self._claim, limits=limits, overhead=overhead, memory_mb=2040) - def test_memory_oversubscription(self, mock_get): + def test_memory_oversubscription(self): self._claim(memory_mb=4096) - def test_memory_insufficient(self, mock_get): + def test_memory_insufficient(self): limits = {'memory_mb': 8192} self.assertRaises(exception.ComputeResourcesUnavailable, self._claim, limits=limits, memory_mb=16384) - def test_disk_oversubscription(self, mock_get): + def test_disk_oversubscription(self): limits = {'disk_gb': 60} self._claim(root_gb=10, ephemeral_gb=40, limits=limits) - def test_disk_insufficient(self, mock_get): + def test_disk_insufficient(self): limits = {'disk_gb': 45} self.assertRaisesRegex( exception.ComputeResourcesUnavailable, "disk", self._claim, limits=limits, root_gb=10, ephemeral_gb=40) - def test_disk_and_memory_insufficient(self, mock_get): + def test_disk_and_memory_insufficient(self): limits = {'disk_gb': 45, 'memory_mb': 8192} self.assertRaisesRegex( exception.ComputeResourcesUnavailable, @@ -192,78 +196,49 @@ class ClaimTestCase(test.NoDBTestCase): self._claim, limits=limits, root_gb=10, ephemeral_gb=40, memory_mb=16384) - @pci_fakes.patch_pci_whitelist - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance') - def test_pci_pass(self, mock_get_by_instance, mock_get): - dev_dict = { - 'compute_node_id': 1, - 'address': 'a', - 'product_id': 'p', - 'vendor_id': 'v', - 'numa_node': 0, - 'dev_type': 'type-PCI', - 'parent_addr': 'a1', - 'status': 'available'} - self.tracker.new_pci_tracker() - self.tracker.pci_tracker._set_hvdevs([dev_dict]) - claim = self._claim() + @mock.patch('nova.pci.stats.PciDeviceStats.support_requests', + return_value=True) + def test_pci_pass(self, mock_supports): request = objects.InstancePCIRequest(count=1, spec=[{'vendor_id': 'v', 'product_id': 'p'}]) requests = objects.InstancePCIRequests(requests=[request]) - mock_get.return_value = requests - mock_get_by_instance.return_value = requests - self.assertIsNone(claim._test_pci()) - @pci_fakes.patch_pci_whitelist - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance') - def test_pci_fail(self, mock_get_by_instance, mock_get): - dev_dict = { - 'compute_node_id': 1, - 'address': 'a', - 'product_id': 'p', - 'vendor_id': 'v1', - 'numa_node': 1, - 'dev_type': 'type-PCI', - 'parent_addr': 'a1', - 'status': 'available'} - self.tracker.new_pci_tracker() - self.tracker.pci_tracker._set_hvdevs([dev_dict]) - claim = self._claim() + # Claim.__init__() would raise ComputeResourcesUnavailable + # if Claim._test_pci() did not return None. + self._claim(requests=requests) + mock_supports.assert_called_once_with(requests.requests) + + @mock.patch('nova.pci.stats.PciDeviceStats.support_requests', + return_value=False) + def test_pci_fail(self, mock_supports): request = objects.InstancePCIRequest(count=1, spec=[{'vendor_id': 'v', 'product_id': 'p'}]) requests = objects.InstancePCIRequests(requests=[request]) - mock_get.return_value = requests - mock_get_by_instance.return_value = requests - claim._test_pci() - @pci_fakes.patch_pci_whitelist - def test_pci_pass_no_requests(self, mock_get): - dev_dict = { - 'compute_node_id': 1, - 'address': 'a', - 'product_id': 'p', - 'vendor_id': 'v', - 'numa_node': 0, - 'dev_type': 'type-PCI', - 'parent_addr': 'a1', - 'status': 'available'} - self.tracker.new_pci_tracker() - self.tracker.pci_tracker._set_hvdevs([dev_dict]) - claim = self._claim() - self.assertIsNone(claim._test_pci()) + self.assertRaises(exception.ComputeResourcesUnavailable, + self._claim, requests=requests) + mock_supports.assert_called_once_with(requests.requests) - def test_ext_resources(self, mock_get): + @mock.patch('nova.pci.stats.PciDeviceStats.support_requests', + return_value=True) + def test_pci_pass_no_requests(self, mock_supports): + # Claim.__init__() would raise ComputeResourcesUnavailable + # if Claim._test_pci() did not return None. + self._claim() + self.assertFalse(mock_supports.called) + + def test_ext_resources(self): self._claim() self.assertTrue(self.tracker.ext_resources_handler.test_called) self.assertFalse(self.tracker.ext_resources_handler.usage_is_itype) - def test_numa_topology_no_limit(self, mock_get): + def test_numa_topology_no_limit(self): huge_instance = objects.InstanceNUMATopology( cells=[objects.InstanceNUMACell( id=1, cpuset=set([1, 2]), memory=512)]) self._claim(numa_topology=huge_instance) - def test_numa_topology_fails(self, mock_get): + def test_numa_topology_fails(self): huge_instance = objects.InstanceNUMATopology( cells=[objects.InstanceNUMACell( id=1, cpuset=set([1, 2, 3, 4, 5]), memory=2048)]) @@ -274,7 +249,7 @@ class ClaimTestCase(test.NoDBTestCase): limits={'numa_topology': limit_topo}, numa_topology=huge_instance) - def test_numa_topology_passes(self, mock_get): + def test_numa_topology_passes(self): huge_instance = objects.InstanceNUMATopology( cells=[objects.InstanceNUMACell( id=1, cpuset=set([1, 2]), memory=512)]) @@ -285,7 +260,7 @@ class ClaimTestCase(test.NoDBTestCase): @pci_fakes.patch_pci_whitelist @mock.patch('nova.objects.InstancePCIRequests.get_by_instance') - def test_numa_topology_with_pci(self, mock_get_by_instance, mock_get): + def test_numa_topology_with_pci(self, mock_get_by_instance): dev_dict = { 'compute_node_id': 1, 'address': 'a', @@ -300,18 +275,17 @@ class ClaimTestCase(test.NoDBTestCase): request = objects.InstancePCIRequest(count=1, spec=[{'vendor_id': 'v', 'product_id': 'p'}]) requests = objects.InstancePCIRequests(requests=[request]) - mock_get.return_value = requests mock_get_by_instance.return_value = requests huge_instance = objects.InstanceNUMATopology( cells=[objects.InstanceNUMACell( id=1, cpuset=set([1, 2]), memory=512)]) - self._claim(numa_topology=huge_instance) + self._claim(requests=requests, numa_topology=huge_instance) @pci_fakes.patch_pci_whitelist @mock.patch('nova.objects.InstancePCIRequests.get_by_instance') - def test_numa_topology_with_pci_fail(self, mock_get_by_instance, mock_get): + def test_numa_topology_with_pci_fail(self, mock_get_by_instance): dev_dict = { 'compute_node_id': 1, 'address': 'a', @@ -336,7 +310,6 @@ class ClaimTestCase(test.NoDBTestCase): request = objects.InstancePCIRequest(count=2, spec=[{'vendor_id': 'v', 'product_id': 'p'}]) requests = objects.InstancePCIRequests(requests=[request]) - mock_get.return_value = requests mock_get_by_instance.return_value = requests huge_instance = objects.InstanceNUMATopology( @@ -345,13 +318,12 @@ class ClaimTestCase(test.NoDBTestCase): self.assertRaises(exception.ComputeResourcesUnavailable, self._claim, + requests=requests, numa_topology=huge_instance) @pci_fakes.patch_pci_whitelist @mock.patch('nova.objects.InstancePCIRequests.get_by_instance') - def test_numa_topology_with_pci_no_numa_info(self, - mock_get_by_instance, - mock_get): + def test_numa_topology_with_pci_no_numa_info(self, mock_get_by_instance): dev_dict = { 'compute_node_id': 1, 'address': 'a', @@ -367,16 +339,15 @@ class ClaimTestCase(test.NoDBTestCase): request = objects.InstancePCIRequest(count=1, spec=[{'vendor_id': 'v', 'product_id': 'p'}]) requests = objects.InstancePCIRequests(requests=[request]) - mock_get.return_value = requests mock_get_by_instance.return_value = requests huge_instance = objects.InstanceNUMATopology( cells=[objects.InstanceNUMACell( id=1, cpuset=set([1, 2]), memory=512)]) - self._claim(numa_topology= huge_instance) + self._claim(requests=requests, numa_topology=huge_instance) - def test_abort(self, mock_get): + def test_abort(self): claim = self._abort() self.assertTrue(claim.tracker.icalled) @@ -393,56 +364,60 @@ class ClaimTestCase(test.NoDBTestCase): class MoveClaimTestCase(ClaimTestCase): - def setUp(self): - super(MoveClaimTestCase, self).setUp() - self.instance = self._fake_instance() - self.get_numa_constraint_patch = None - - def _claim(self, limits=None, overhead=None, **kwargs): - numa_constraint = kwargs.pop('numa_topology', None) + def _claim(self, limits=None, overhead=None, requests=None, **kwargs): instance_type = self._fake_instance_type(**kwargs) + numa_topology = kwargs.pop('numa_topology', None) + self.instance = self._fake_instance(**kwargs) + self.instance.numa_topology = None + if numa_topology: + self.db_numa_topology = { + 'id': 1, 'created_at': None, 'updated_at': None, + 'deleted_at': None, 'deleted': None, + 'instance_uuid': self.instance.uuid, + 'numa_topology': numa_topology._to_json(), + 'pci_requests': (requests or self.empty_requests).to_json() + } + else: + self.db_numa_topology = None if overhead is None: overhead = {'memory_mb': 0} - with mock.patch( - 'nova.virt.hardware.numa_get_constraints', - return_value=numa_constraint): + + @mock.patch('nova.objects.InstancePCIRequests.' + 'get_by_instance_uuid_and_newness', + return_value=requests or self.empty_requests) + @mock.patch('nova.virt.hardware.numa_get_constraints', + return_value=numa_topology) + @mock.patch('nova.db.instance_extra_get_by_instance_uuid', + return_value=self.db_numa_topology) + def get_claim(mock_extra_get, mock_numa_get, mock_pci_get): return claims.MoveClaim(self.context, self.instance, instance_type, {}, self.tracker, self.resources, overhead=overhead, limits=limits) + return get_claim() - def _set_pci_request(self, claim): - request = [{'count': 1, - 'spec': [{'vendor_id': 'v', 'product_id': 'p'}], - }] - claim.instance.update( - system_metadata={'new_pci_requests': jsonutils.dumps(request)}) - - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - return_value=objects.InstancePCIRequests(requests=[])) - def test_ext_resources(self, mock_get): + def test_ext_resources(self): self._claim() self.assertTrue(self.tracker.ext_resources_handler.test_called) self.assertTrue(self.tracker.ext_resources_handler.usage_is_itype) - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - return_value=objects.InstancePCIRequests(requests=[])) - def test_abort(self, mock_get): + def test_abort(self): claim = self._abort() self.assertTrue(claim.tracker.rcalled) - @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid', - return_value=objects.InstancePCIRequests(requests=[])) - def test_create_migration_context(self, mock_get): + def test_create_migration_context(self): numa_topology = objects.InstanceNUMATopology( cells=[objects.InstanceNUMACell( id=1, cpuset=set([1, 2]), memory=512)]) - self.instance.numa_topology = None claim = self._claim(numa_topology=numa_topology) migration = objects.Migration(context=self.context, id=42) claim.migration = migration fake_mig_context = mock.Mock(spec=objects.MigrationContext) - with mock.patch('nova.objects.MigrationContext', - return_value=fake_mig_context) as ctxt_mock: + + @mock.patch('nova.db.instance_extra_get_by_instance_uuid', + return_value=None) + @mock.patch('nova.objects.MigrationContext', + return_value=fake_mig_context) + def _test(ctxt_mock, mock_get_extra): claim.create_migration_context() ctxt_mock.assert_called_once_with( context=self.context, instance_uuid=self.instance.uuid, @@ -451,3 +426,4 @@ class MoveClaimTestCase(ClaimTestCase): self.assertIsInstance(ctxt_mock.call_args[1]['new_numa_topology'], objects.InstanceNUMATopology) self.assertEqual(migration, claim.migration) + _test() diff --git a/nova/tests/unit/compute/test_tracker.py b/nova/tests/unit/compute/test_tracker.py index eab96ed7fc4d..6a834578684b 100644 --- a/nova/tests/unit/compute/test_tracker.py +++ b/nova/tests/unit/compute/test_tracker.py @@ -26,6 +26,7 @@ from nova.compute import vm_states from nova import exception as exc from nova import objects from nova.objects import base as obj_base +from nova.pci import manager as pci_manager from nova import test _VIRT_DRIVER_AVAIL_RESOURCES = { @@ -1285,6 +1286,49 @@ class TestInstanceClaim(BaseTestCase): self.assertTrue(obj_base.obj_equal_prims(expected, self.rt.compute_node)) + @mock.patch('nova.pci.stats.PciDeviceStats.support_requests', + return_value=True) + @mock.patch('nova.pci.manager.PciDevTracker.claim_instance') + @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') + @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') + def test_claim_with_pci(self, migr_mock, pci_mock, + pci_manager_mock, pci_stats_mock): + # Test that a claim involving PCI requests correctly claims + # PCI devices on the host and sends an updated pci_device_pools + # attribute of the ComputeNode object. + self.assertFalse(self.rt.disabled) + + # TODO(jaypipes): Remove once the PCI tracker is always created + # upon the resource tracker being initialized... + self.rt.pci_tracker = pci_manager.PciDevTracker(mock.sentinel.ctx) + + pci_pools = objects.PciDevicePoolList() + pci_manager_mock.return_value = pci_pools + + request = objects.InstancePCIRequest(count=1, + spec=[{'vendor_id': 'v', 'product_id': 'p'}]) + pci_mock.return_value = objects.InstancePCIRequests(requests=[request]) + + disk_used = self.instance.root_gb + self.instance.ephemeral_gb + expected = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0]) + expected.update({ + 'local_gb_used': disk_used, + 'memory_mb_used': self.instance.memory_mb, + 'free_disk_gb': expected['local_gb'] - disk_used, + "free_ram_mb": expected['memory_mb'] - self.instance.memory_mb, + 'running_vms': 1, + 'vcpus_used': 1, + 'pci_device_pools': pci_pools + }) + with mock.patch.object(self.rt, '_update') as update_mock: + with mock.patch.object(self.instance, 'save'): + self.rt.instance_claim(self.ctx, self.instance, None) + update_mock.assert_called_once_with(self.elevated) + pci_manager_mock.assert_called_once_with(mock.ANY, # context... + self.instance) + self.assertTrue(obj_base.obj_equal_prims(expected, + self.rt.compute_node)) + @mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid') @mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node') def test_claim_abort_context_manager(self, migr_mock, pci_mock):