Set COMPUTE_STATUS_DISABLED trait from update_provider_tree flow

This adds code which hooks in the update_provider_tree flow in the
ResourceTracker, specifically when the RT is generically modifying
the compute-owned traits for the given compute node resource provider.

A future change will add the scheduler request pre-filter and
API code to sync the trait when enabling/disabling a compute service.
This is necessary for two cases specifically:

1. After upgrading an older disabled compute we will sync the trait.

2. If enabling/disabling a compute and the service is down, the API
   will not call the compute service to sync the trait. When the
   compute service is restarted we will sync the trait on startup
   with this code.

The COMPUTE_STATUS_DISABLED trait was added to os-traits in change
Ia8e4487bfb59f764a6817ec8650785ffa902eab5 which is in the 0.15.0
release of os-traits so the requirements are bumped here as well.

Part of blueprint pre-filter-disabled-computes

Change-Id: I3005b46221ac3c0e559e1072131a7e4846c9867c
This commit is contained in:
Matt Riedemann 2019-07-01 19:06:34 -04:00
parent a8d2b24f1f
commit 5e78122458
4 changed files with 122 additions and 8 deletions

View File

@ -68,7 +68,7 @@ os-brick==2.6.1
os-client-config==1.29.0
os-resource-classes==0.1.0
os-service-types==1.2.0
os-traits==0.12.0
os-traits==0.15.0
os-vif==1.14.0
os-win==3.0.0
os-xenapi==0.3.3

View File

@ -23,6 +23,7 @@ import copy
from keystoneauth1 import exceptions as ks_exc
import os_resource_classes as orc
import os_traits
from oslo_log import log as logging
from oslo_serialization import jsonutils
import retrying
@ -909,7 +910,46 @@ class ResourceTracker(object):
return True
return False
def _get_traits(self, nodename, provider_tree):
def _sync_compute_service_disabled_trait(self, context, traits):
"""Synchronize the COMPUTE_STATUS_DISABLED trait on the node provider.
Determines if the COMPUTE_STATUS_DISABLED trait should be added to
or removed from the provider's set of traits based on the related
nova-compute service disabled status.
:param context: RequestContext for cell database access
:param traits: set of traits for the compute node resource provider;
this is modified by reference
"""
trait = os_traits.COMPUTE_STATUS_DISABLED
try:
service = objects.Service.get_by_compute_host(context, self.host)
if service.disabled:
# The service is disabled so make sure the trait is reported.
traits.add(trait)
else:
# The service is not disabled so do not report the trait.
traits.discard(trait)
except exception.NotFound:
# This should not happen but handle it gracefully. The scheduler
# should ignore this node if the compute service record is gone.
LOG.error('Unable to find services table record for nova-compute '
'host %s', self.host)
def _get_traits(self, context, nodename, provider_tree):
"""Synchronizes internal and external traits for the node provider.
This works in conjunction with the ComptueDriver.update_provider_tree
flow and is used to synchronize traits reported by the compute driver,
traits based on information in the ComputeNode record, and traits set
externally using the placement REST API.
:param context: RequestContext for cell database access
:param nodename: ComputeNode.hypervisor_hostname for the compute node
resource provider whose traits are being synchronized; the node
must be in the ProviderTree.
:param provider_tree: ProviderTree being updated
"""
# Get the traits from the ProviderTree which will be the set
# of virt-owned traits plus any externally defined traits set
# on the provider that aren't owned by the virt driver.
@ -924,6 +964,8 @@ class ResourceTracker(object):
elif trait in traits:
traits.remove(trait)
self._sync_compute_service_disabled_trait(context, traits)
return list(traits)
@retrying.retry(stop_max_attempt_number=4,
@ -979,7 +1021,10 @@ class ResourceTracker(object):
# remove it. But at both t1 and t2 there is a
# CUSTOM_VENDOR_TRAIT_X which we can't touch because it
# was set externally on the provider.
traits = self._get_traits(nodename, provider_tree=prov_tree)
# We also want to sync the COMPUTE_STATUS_DISABLED trait based
# on the related nova-compute service's disabled status.
traits = self._get_traits(
context, nodename, provider_tree=prov_tree)
prov_tree.update_traits(nodename, traits)
except NotImplementedError:
# update_provider_tree isn't implemented yet - try get_inventory

View File

@ -16,6 +16,7 @@ import datetime
from keystoneauth1 import exceptions as ks_exc
import mock
import os_resource_classes as orc
import os_traits
from oslo_config import cfg
from oslo_utils.fixture import uuidsentinel as uuids
from oslo_utils import timeutils
@ -1414,7 +1415,9 @@ class TestUpdateComputeNode(BaseTestCase):
save_mock.assert_called_once_with()
norm_mock.assert_called_once_with(mock.sentinel.inv_data, new_compute)
def test_existing_node_capabilities_as_traits(self):
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait')
def test_existing_node_capabilities_as_traits(self, mock_sync_disabled):
"""The capabilities_as_traits() driver method returns traits
information for a node/provider.
"""
@ -1441,9 +1444,14 @@ class TestUpdateComputeNode(BaseTestCase):
new_compute.hypervisor_hostname,
[mock.sentinel.trait]
)
mock_sync_disabled.assert_called_once_with(
mock.sentinel.ctx, {mock.sentinel.trait})
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait')
@mock.patch('nova.objects.ComputeNode.save')
def test_existing_node_update_provider_tree_implemented(self, save_mock):
def test_existing_node_update_provider_tree_implemented(self, save_mock,
mock_sync_disable):
"""The update_provider_tree() virt driver method is only implemented
for some virt drivers. This method returns inventory, trait, and
aggregate information for resource providers in a tree associated with
@ -1526,10 +1534,14 @@ class TestUpdateComputeNode(BaseTestCase):
# 1024MB in GB
exp_inv[orc.DISK_GB]['reserved'] = 1
self.assertEqual(exp_inv, ptree.data(new_compute.uuid).inventory)
mock_sync_disable.assert_called_once()
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait')
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_resource_change', return_value=False)
def test_update_retry_success(self, mock_resource_change):
def test_update_retry_success(self, mock_resource_change,
mock_sync_disabled):
self._setup_rt()
orig_compute = _COMPUTE_NODE_FIXTURES[0].obj_clone()
self.rt.compute_nodes[_NODENAME] = orig_compute
@ -1550,12 +1562,16 @@ class TestUpdateComputeNode(BaseTestCase):
self.rt._update(mock.sentinel.ctx, new_compute)
self.assertEqual(2, ufpt_mock.call_count)
self.assertEqual(2, mock_sync_disabled.call_count)
# The retry is restricted to _update_to_placement
self.assertEqual(1, mock_resource_change.call_count)
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_sync_compute_service_disabled_trait')
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_resource_change', return_value=False)
def test_update_retry_raises(self, mock_resource_change):
def test_update_retry_raises(self, mock_resource_change,
mock_sync_disabled):
self._setup_rt()
orig_compute = _COMPUTE_NODE_FIXTURES[0].obj_clone()
self.rt.compute_nodes[_NODENAME] = orig_compute
@ -1577,9 +1593,62 @@ class TestUpdateComputeNode(BaseTestCase):
self.rt._update, mock.sentinel.ctx, new_compute)
self.assertEqual(4, ufpt_mock.call_count)
self.assertEqual(4, mock_sync_disabled.call_count)
# The retry is restricted to _update_to_placement
self.assertEqual(1, mock_resource_change.call_count)
@mock.patch('nova.objects.Service.get_by_compute_host',
return_value=objects.Service(disabled=True))
def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):
"""Tests the scenario that the compute service is disabled so the
COMPUTE_STATUS_DISABLED trait is added to the traits set.
"""
self._setup_rt()
ctxt = context.get_admin_context()
traits = set()
self.rt._sync_compute_service_disabled_trait(ctxt, traits)
self.assertEqual({os_traits.COMPUTE_STATUS_DISABLED}, traits)
mock_get_by_host.assert_called_once_with(ctxt, self.rt.host)
@mock.patch('nova.objects.Service.get_by_compute_host',
return_value=objects.Service(disabled=False))
def test_sync_compute_service_disabled_trait_remove(
self, mock_get_by_host):
"""Tests the scenario that the compute service is enabled so the
COMPUTE_STATUS_DISABLED trait is removed from the traits set.
"""
self._setup_rt()
ctxt = context.get_admin_context()
# First test with the trait actually in the set.
traits = {os_traits.COMPUTE_STATUS_DISABLED}
self.rt._sync_compute_service_disabled_trait(ctxt, traits)
self.assertEqual(0, len(traits))
mock_get_by_host.assert_called_once_with(ctxt, self.rt.host)
# Now run it again with the empty set to make sure the method handles
# the trait not already being in the set (idempotency).
self.rt._sync_compute_service_disabled_trait(ctxt, traits)
self.assertEqual(0, len(traits))
@mock.patch('nova.objects.Service.get_by_compute_host',
# One might think Service.get_by_compute_host would raise
# ServiceNotFound but the DB API raises ComputeHostNotFound.
side_effect=exc.ComputeHostNotFound(host=_HOSTNAME))
@mock.patch('nova.compute.resource_tracker.LOG.error')
def test_sync_compute_service_disabled_trait_service_not_found(
self, mock_log_error, mock_get_by_host):
"""Tests the scenario that the compute service is not found so the
traits set is unmodified and an error is logged.
"""
self._setup_rt()
ctxt = context.get_admin_context()
traits = set()
self.rt._sync_compute_service_disabled_trait(ctxt, traits)
self.assertEqual(0, len(traits))
mock_get_by_host.assert_called_once_with(ctxt, self.rt.host)
mock_log_error.assert_called_once()
self.assertIn('Unable to find services table record for nova-compute',
mock_log_error.call_args[0][0])
def test_copy_resources_no_update_allocation_ratios(self):
"""Tests that a ComputeNode object's allocation ratio fields are
not set if the configured allocation ratio values are default None.

View File

@ -56,7 +56,7 @@ psutil>=3.2.2 # BSD
oslo.versionedobjects>=1.35.0 # Apache-2.0
os-brick>=2.6.1 # Apache-2.0
os-resource-classes>=0.1.0 # Apache-2.0
os-traits>=0.12.0 # Apache-2.0
os-traits>=0.15.0 # Apache-2.0
os-vif>=1.14.0 # Apache-2.0
os-win>=3.0.0 # Apache-2.0
castellan>=0.16.0 # Apache-2.0