diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 3da3940ef9b5..0b4166b1f70d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -507,7 +507,7 @@ class ComputeVirtAPI(virtapi.VirtAPI): if new_traits is not None: self.reportclient.set_traits_for_provider( - context, rp_uuid, new_traits) + context, rp_uuid, new_traits, generation=trait_info.generation) class ComputeManager(manager.Manager): diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 8e301fffbb89..d09f13348778 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -19,6 +19,7 @@ import copy import functools import random import time +import typing as ty from keystoneauth1 import exceptions as ks_exc import os_resource_classes as orc @@ -32,6 +33,7 @@ import six from nova.compute import provider_tree import nova.conf +from nova import context as nova_context from nova import exception from nova.i18n import _ from nova import objects @@ -998,7 +1000,13 @@ class SchedulerReportClient(object): raise exception.TraitRetrievalFailed(error=resp.text) @safe_connect - def set_traits_for_provider(self, context, rp_uuid, traits): + def set_traits_for_provider( + self, + context: nova_context.RequestContext, + rp_uuid: str, + traits: ty.Iterable[str], + generation: int = None + ): """Replace a provider's traits with those specified. The provider must exist - this method does not attempt to create it. @@ -1006,6 +1014,9 @@ class SchedulerReportClient(object): :param context: The security context :param rp_uuid: The UUID of the provider whose traits are to be updated :param traits: Iterable of traits to set on the provider + :param generation: The resource provider generation if known. If not + provided then the value from the provider tree cache + will be used. :raises: ResourceProviderUpdateConflict if the provider's generation doesn't match the generation in the cache. Callers may choose to retrieve the provider and its associations afresh and @@ -1028,7 +1039,8 @@ class SchedulerReportClient(object): # that method doesn't return content, and we need to update the cached # provider tree with the new generation. traits = list(traits) if traits else [] - generation = self._provider_tree.data(rp_uuid).generation + if generation is None: + generation = self._provider_tree.data(rp_uuid).generation payload = { 'resource_provider_generation': generation, 'traits': traits, diff --git a/nova/tests/functional/notification_sample_tests/test_compute_task.py b/nova/tests/functional/notification_sample_tests/test_compute_task.py index c5852dc9bff9..e7e7b13f9b41 100644 --- a/nova/tests/functional/notification_sample_tests/test_compute_task.py +++ b/nova/tests/functional/notification_sample_tests/test_compute_task.py @@ -109,9 +109,7 @@ class TestComputeTaskNotificationSample( # NoValidHost asynchronously. self.admin_api.post_server_action(server['id'], {'migrate': None}) self._wait_for_notification('compute_task.migrate_server.error') - # 0. scheduler.select_destinations.start - # 1. compute_task.migrate_server.error - self.assertEqual(2, len(fake_notifier.VERSIONED_NOTIFICATIONS), + self.assertEqual(1, len(fake_notifier.VERSIONED_NOTIFICATIONS), fake_notifier.VERSIONED_NOTIFICATIONS) self._verify_notification( 'compute_task-migrate_server-error', @@ -121,8 +119,9 @@ class TestComputeTaskNotificationSample( 'request_spec.security_groups': [], 'request_spec.numa_topology.instance_uuid': server['id'], 'request_spec.pci_requests.instance_uuid': server['id'], + 'reason.exception_message': 'No valid host was found. ', 'reason.function_name': self.ANY, 'reason.module_name': self.ANY, 'reason.traceback': self.ANY }, - actual=fake_notifier.VERSIONED_NOTIFICATIONS[1]) + actual=fake_notifier.VERSIONED_NOTIFICATIONS[0]) diff --git a/nova/tests/functional/regressions/test_bug_1886418.py b/nova/tests/functional/regressions/test_bug_1886418.py index 5ac06a9770d2..dac343b66e9a 100644 --- a/nova/tests/functional/regressions/test_bug_1886418.py +++ b/nova/tests/functional/regressions/test_bug_1886418.py @@ -58,16 +58,9 @@ class TestServices(integrated_helpers._IntegratedTestBase): self._create_server(networks=[]) self._disable_compute() - # FIXME(gibi): Check that COMPUTE_STATUS_DISABLED is now on the - # compute. Unfortunately it is not true as the compute manager failed - # to update the traits in placement due to a stale provide tree cache. - # It is stale because a server is booted on the compute since the last - # update_available_resource periodic was run. - self.assertIn( - 'An error occurred while updating COMPUTE_STATUS_DISABLED trait ' - 'on compute node resource provider', - self.stdlog.logger.output) - self.assertFalse(self._has_disabled_trait()) + + # Check that COMPUTE_STATUS_DISABLED is now on the compute. + self.assertTrue(self._has_disabled_trait()) # This would be the expected behavior # @@ -82,3 +75,8 @@ class TestServices(integrated_helpers._IntegratedTestBase): self._enable_compute() # Check that COMPUTE_STATUS_DISABLED is removed from the compute self.assertFalse(self._has_disabled_trait()) + self.assertNotIn( + 'An error occurred while updating COMPUTE_STATUS_DISABLED trait ' + 'on compute node resource provider', + self.stdlog.logger.output, + "This is probably bug 1886418.") diff --git a/nova/tests/unit/compute/test_virtapi.py b/nova/tests/unit/compute/test_virtapi.py index b1993d7114fc..7455efdf8e4d 100644 --- a/nova/tests/unit/compute/test_virtapi.py +++ b/nova/tests/unit/compute/test_virtapi.py @@ -98,7 +98,8 @@ class FakeCompute(object): def _get_provider_traits(self, context, rp_uuid): return mock.Mock(traits=self.provider_traits[rp_uuid]) - def _set_traits_for_provider(self, context, rp_uuid, traits): + def _set_traits_for_provider( + self, context, rp_uuid, traits, generation=None): self.provider_traits[rp_uuid] = traits def _event_waiter(self): diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index 1cb9f74d9b4d..c52bd754dd8f 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -2938,6 +2938,45 @@ class TestTraits(SchedulerReportClientTestCase): self.assertEqual( 1, self.client._provider_tree.data(uuids.rp).generation) + def test_set_traits_for_provider_with_generation(self): + traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA'] + + # Make _ensure_traits succeed without PUTting + get_mock = mock.Mock(status_code=200) + get_mock.json.return_value = {'traits': traits} + self.ks_adap_mock.get.return_value = get_mock + + # Prime the provider tree cache + self.client._provider_tree.new_root('rp', uuids.rp, generation=0) + + # Mock the /rp/{u}/traits PUT to succeed + put_mock = mock.Mock(status_code=200) + put_mock.json.return_value = {'traits': traits, + 'resource_provider_generation': 2} + self.ks_adap_mock.put.return_value = put_mock + + # Invoke + self.client.set_traits_for_provider( + self.context, uuids.rp, traits, generation=1) + + # Verify API calls + self.ks_adap_mock.get.assert_called_once_with( + '/traits?name=in:' + ','.join(traits), + global_request_id=self.context.global_id, + **self.trait_api_kwargs) + self.ks_adap_mock.put.assert_called_once_with( + '/resource_providers/%s/traits' % uuids.rp, + json={'traits': traits, 'resource_provider_generation': 1}, + global_request_id=self.context.global_id, + **self.trait_api_kwargs) + + # And ensure the provider tree cache was updated appropriately + self.assertFalse( + self.client._provider_tree.have_traits_changed(uuids.rp, traits)) + # Validate the generation + self.assertEqual( + 2, self.client._provider_tree.data(uuids.rp).generation) + def test_set_traits_for_provider_fail(self): traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA'] get_mock = mock.Mock()