Merge "Reduce gen conflict in COMPUTE_STATUS_DISABLED handling"
This commit is contained in:
commit
b1e34c594b
@ -507,7 +507,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
|
||||
|
||||
if new_traits is not None:
|
||||
self.reportclient.set_traits_for_provider(
|
||||
context, rp_uuid, new_traits)
|
||||
context, rp_uuid, new_traits, generation=trait_info.generation)
|
||||
|
||||
|
||||
class ComputeManager(manager.Manager):
|
||||
|
@ -19,6 +19,7 @@ import copy
|
||||
import functools
|
||||
import random
|
||||
import time
|
||||
import typing as ty
|
||||
|
||||
from keystoneauth1 import exceptions as ks_exc
|
||||
import os_resource_classes as orc
|
||||
@ -32,6 +33,7 @@ import six
|
||||
|
||||
from nova.compute import provider_tree
|
||||
import nova.conf
|
||||
from nova import context as nova_context
|
||||
from nova import exception
|
||||
from nova.i18n import _
|
||||
from nova import objects
|
||||
@ -998,7 +1000,13 @@ class SchedulerReportClient(object):
|
||||
raise exception.TraitRetrievalFailed(error=resp.text)
|
||||
|
||||
@safe_connect
|
||||
def set_traits_for_provider(self, context, rp_uuid, traits):
|
||||
def set_traits_for_provider(
|
||||
self,
|
||||
context: nova_context.RequestContext,
|
||||
rp_uuid: str,
|
||||
traits: ty.Iterable[str],
|
||||
generation: int = None
|
||||
):
|
||||
"""Replace a provider's traits with those specified.
|
||||
|
||||
The provider must exist - this method does not attempt to create it.
|
||||
@ -1006,6 +1014,9 @@ class SchedulerReportClient(object):
|
||||
:param context: The security context
|
||||
:param rp_uuid: The UUID of the provider whose traits are to be updated
|
||||
:param traits: Iterable of traits to set on the provider
|
||||
:param generation: The resource provider generation if known. If not
|
||||
provided then the value from the provider tree cache
|
||||
will be used.
|
||||
:raises: ResourceProviderUpdateConflict if the provider's generation
|
||||
doesn't match the generation in the cache. Callers may choose
|
||||
to retrieve the provider and its associations afresh and
|
||||
@ -1028,7 +1039,8 @@ class SchedulerReportClient(object):
|
||||
# that method doesn't return content, and we need to update the cached
|
||||
# provider tree with the new generation.
|
||||
traits = list(traits) if traits else []
|
||||
generation = self._provider_tree.data(rp_uuid).generation
|
||||
if generation is None:
|
||||
generation = self._provider_tree.data(rp_uuid).generation
|
||||
payload = {
|
||||
'resource_provider_generation': generation,
|
||||
'traits': traits,
|
||||
|
@ -109,9 +109,7 @@ class TestComputeTaskNotificationSample(
|
||||
# NoValidHost asynchronously.
|
||||
self.admin_api.post_server_action(server['id'], {'migrate': None})
|
||||
self._wait_for_notification('compute_task.migrate_server.error')
|
||||
# 0. scheduler.select_destinations.start
|
||||
# 1. compute_task.migrate_server.error
|
||||
self.assertEqual(2, len(fake_notifier.VERSIONED_NOTIFICATIONS),
|
||||
self.assertEqual(1, len(fake_notifier.VERSIONED_NOTIFICATIONS),
|
||||
fake_notifier.VERSIONED_NOTIFICATIONS)
|
||||
self._verify_notification(
|
||||
'compute_task-migrate_server-error',
|
||||
@ -121,8 +119,9 @@ class TestComputeTaskNotificationSample(
|
||||
'request_spec.security_groups': [],
|
||||
'request_spec.numa_topology.instance_uuid': server['id'],
|
||||
'request_spec.pci_requests.instance_uuid': server['id'],
|
||||
'reason.exception_message': 'No valid host was found. ',
|
||||
'reason.function_name': self.ANY,
|
||||
'reason.module_name': self.ANY,
|
||||
'reason.traceback': self.ANY
|
||||
},
|
||||
actual=fake_notifier.VERSIONED_NOTIFICATIONS[1])
|
||||
actual=fake_notifier.VERSIONED_NOTIFICATIONS[0])
|
||||
|
@ -58,16 +58,9 @@ class TestServices(integrated_helpers._IntegratedTestBase):
|
||||
self._create_server(networks=[])
|
||||
|
||||
self._disable_compute()
|
||||
# FIXME(gibi): Check that COMPUTE_STATUS_DISABLED is now on the
|
||||
# compute. Unfortunately it is not true as the compute manager failed
|
||||
# to update the traits in placement due to a stale provide tree cache.
|
||||
# It is stale because a server is booted on the compute since the last
|
||||
# update_available_resource periodic was run.
|
||||
self.assertIn(
|
||||
'An error occurred while updating COMPUTE_STATUS_DISABLED trait '
|
||||
'on compute node resource provider',
|
||||
self.stdlog.logger.output)
|
||||
self.assertFalse(self._has_disabled_trait())
|
||||
|
||||
# Check that COMPUTE_STATUS_DISABLED is now on the compute.
|
||||
self.assertTrue(self._has_disabled_trait())
|
||||
|
||||
# This would be the expected behavior
|
||||
#
|
||||
@ -82,3 +75,8 @@ class TestServices(integrated_helpers._IntegratedTestBase):
|
||||
self._enable_compute()
|
||||
# Check that COMPUTE_STATUS_DISABLED is removed from the compute
|
||||
self.assertFalse(self._has_disabled_trait())
|
||||
self.assertNotIn(
|
||||
'An error occurred while updating COMPUTE_STATUS_DISABLED trait '
|
||||
'on compute node resource provider',
|
||||
self.stdlog.logger.output,
|
||||
"This is probably bug 1886418.")
|
||||
|
@ -98,7 +98,8 @@ class FakeCompute(object):
|
||||
def _get_provider_traits(self, context, rp_uuid):
|
||||
return mock.Mock(traits=self.provider_traits[rp_uuid])
|
||||
|
||||
def _set_traits_for_provider(self, context, rp_uuid, traits):
|
||||
def _set_traits_for_provider(
|
||||
self, context, rp_uuid, traits, generation=None):
|
||||
self.provider_traits[rp_uuid] = traits
|
||||
|
||||
def _event_waiter(self):
|
||||
|
@ -2938,6 +2938,45 @@ class TestTraits(SchedulerReportClientTestCase):
|
||||
self.assertEqual(
|
||||
1, self.client._provider_tree.data(uuids.rp).generation)
|
||||
|
||||
def test_set_traits_for_provider_with_generation(self):
|
||||
traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA']
|
||||
|
||||
# Make _ensure_traits succeed without PUTting
|
||||
get_mock = mock.Mock(status_code=200)
|
||||
get_mock.json.return_value = {'traits': traits}
|
||||
self.ks_adap_mock.get.return_value = get_mock
|
||||
|
||||
# Prime the provider tree cache
|
||||
self.client._provider_tree.new_root('rp', uuids.rp, generation=0)
|
||||
|
||||
# Mock the /rp/{u}/traits PUT to succeed
|
||||
put_mock = mock.Mock(status_code=200)
|
||||
put_mock.json.return_value = {'traits': traits,
|
||||
'resource_provider_generation': 2}
|
||||
self.ks_adap_mock.put.return_value = put_mock
|
||||
|
||||
# Invoke
|
||||
self.client.set_traits_for_provider(
|
||||
self.context, uuids.rp, traits, generation=1)
|
||||
|
||||
# Verify API calls
|
||||
self.ks_adap_mock.get.assert_called_once_with(
|
||||
'/traits?name=in:' + ','.join(traits),
|
||||
global_request_id=self.context.global_id,
|
||||
**self.trait_api_kwargs)
|
||||
self.ks_adap_mock.put.assert_called_once_with(
|
||||
'/resource_providers/%s/traits' % uuids.rp,
|
||||
json={'traits': traits, 'resource_provider_generation': 1},
|
||||
global_request_id=self.context.global_id,
|
||||
**self.trait_api_kwargs)
|
||||
|
||||
# And ensure the provider tree cache was updated appropriately
|
||||
self.assertFalse(
|
||||
self.client._provider_tree.have_traits_changed(uuids.rp, traits))
|
||||
# Validate the generation
|
||||
self.assertEqual(
|
||||
2, self.client._provider_tree.data(uuids.rp).generation)
|
||||
|
||||
def test_set_traits_for_provider_fail(self):
|
||||
traits = ['HW_NIC_OFFLOAD_UCS', 'HW_NIC_OFFLOAD_RDMA']
|
||||
get_mock = mock.Mock()
|
||||
|
Loading…
Reference in New Issue
Block a user