Merge "Convert driver supported capabilities to compute node provider traits"

This commit is contained in:
Zuul 2019-03-05 09:50:12 +00:00 committed by Gerrit Code Review
commit 9692f16b35
11 changed files with 401 additions and 19 deletions

View File

@ -70,7 +70,7 @@ os-brick==2.6.1
os-client-config==1.29.0
os-resource-classes==0.1.0
os-service-types==1.2.0
os-traits==0.4.0
os-traits==0.8.0
os-vif==1.14.0
os-win==3.0.0
os-xenapi==0.3.3

View File

@ -917,6 +917,23 @@ class ResourceTracker(object):
return True
return False
def _get_traits(self, nodename, provider_tree=None):
# Get the traits from the ProviderTree which will be the set
# of virt-owned traits plus any externally defined traits set
# on the provider that aren't owned by the virt driver.
traits = provider_tree.data(nodename).traits
# Now get the driver's capabilities and add any supported
# traits that are missing, and remove any existing set traits
# that are not currently supported.
for trait, supported in self.driver.capabilities_as_traits().items():
if supported:
traits.add(trait)
elif trait in traits:
traits.remove(trait)
return list(traits)
@retrying.retry(stop_max_attempt_number=4,
retry_on_exception=lambda e: isinstance(
e, exception.ResourceProviderUpdateConflict))
@ -957,6 +974,21 @@ class ResourceTracker(object):
self.driver.update_provider_tree(prov_tree, nodename,
allocations=allocs)
# Inject driver capabilities traits into the provider
# tree. We need to determine the traits that the virt
# driver owns - so those that come from the tree itself
# (via the virt driver) plus the compute capabilities
# traits, and then merge those with the traits set
# externally that the driver does not own - and remove any
# set on the provider externally that the virt owns but
# aren't in the current list of supported traits. For
# example, let's say we reported multiattach support as a
# trait at t1 and then at t2 it's not, so we need to
# remove it. But at both t1 and t2 there is a
# CUSTOM_VENDOR_TRAIT_X which we can't touch because it
# was set externally on the provider.
traits = self._get_traits(nodename, provider_tree=prov_tree)
prov_tree.update_traits(nodename, traits)
except NotImplementedError:
# update_provider_tree isn't implemented yet - try get_inventory
try:

View File

@ -27,7 +27,8 @@ class PlacementApiClient(object):
self.fixture = placement_fixture
def get(self, url, **kwargs):
return client.APIResponse(self.fixture._fake_get(None, url, **kwargs))
return client.APIResponse(
self.fixture._fake_get(None, url, **kwargs))
def put(self, url, body, **kwargs):
return client.APIResponse(
@ -37,6 +38,10 @@ class PlacementApiClient(object):
return client.APIResponse(
self.fixture._fake_post(None, url, body, **kwargs))
def delete(self, url, **kwargs):
return client.APIResponse(
self.fixture._fake_delete(None, url, **kwargs))
class PlacementFixture(placement_fixtures.PlacementFixture):
"""A fixture to placement operations.

View File

@ -19,9 +19,11 @@ Provides common functionality for integrated unit tests
import collections
import random
import six
import string
import time
import os_traits
from oslo_log import log as logging
from oslo_utils.fixture import uuidsentinel as uuids
@ -375,6 +377,32 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin):
microversion = 'latest'
# These must match the capabilities in
# nova.virt.libvirt.driver.LibvirtDriver.capabilities
expected_libvirt_driver_capability_traits = set([
six.u(trait) for trait in [
os_traits.COMPUTE_DEVICE_TAGGING,
os_traits.COMPUTE_NET_ATTACH_INTERFACE,
os_traits.COMPUTE_NET_ATTACH_INTERFACE_WITH_TAG,
os_traits.COMPUTE_VOLUME_ATTACH_WITH_TAG,
os_traits.COMPUTE_VOLUME_EXTEND,
os_traits.COMPUTE_TRUSTED_CERTS,
]
])
# These must match the capabilities in
# nova.virt.fake.FakeDriver.capabilities
expected_fake_driver_capability_traits = set([
six.u(trait) for trait in [
os_traits.COMPUTE_NET_ATTACH_INTERFACE,
os_traits.COMPUTE_NET_ATTACH_INTERFACE_WITH_TAG,
os_traits.COMPUTE_VOLUME_ATTACH_WITH_TAG,
os_traits.COMPUTE_VOLUME_EXTEND,
os_traits.COMPUTE_VOLUME_MULTI_ATTACH,
os_traits.COMPUTE_TRUSTED_CERTS,
]
])
def setUp(self):
self.flags(compute_driver=self.compute_driver)
super(ProviderUsageBaseTestCase, self).setUp()
@ -446,6 +474,9 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin):
def _create_trait(self, trait):
return self.placement_api.put('/traits/%s' % trait, {}, version='1.6')
def _delete_trait(self, trait):
return self.placement_api.delete('/traits/%s' % trait, version='1.6')
def _get_provider_traits(self, provider_uuid):
return self.placement_api.get(
'/resource_providers/%s/traits' % provider_uuid,

View File

@ -39,8 +39,9 @@ class LibvirtReportTraitsTests(integrated_helpers.ProviderUsageBaseTestCase):
def test_report_cpu_traits(self):
# Test CPU traits reported on initial node startup, these specific
# trait values are coming from fakelibvirt's baselineCPU result.
self.assertItemsEqual(['HW_CPU_X86_VMX', 'HW_CPU_X86_AESNI'],
self._get_provider_traits(self.host_uuid))
traits = self._get_provider_traits(self.host_uuid)
for trait in ('HW_CPU_X86_VMX', 'HW_CPU_X86_AESNI'):
self.assertIn(trait, traits)
self._create_trait('CUSTOM_TRAITS')
new_traits = ['CUSTOM_TRAITS', 'HW_CPU_X86_AVX']
@ -52,7 +53,8 @@ class LibvirtReportTraitsTests(integrated_helpers.ProviderUsageBaseTestCase):
self._run_periodics()
# HW_CPU_X86_AVX is filtered out because nova-compute owns CPU traits
# and it's not in the baseline for the host.
self.assertItemsEqual(
['HW_CPU_X86_VMX', 'HW_CPU_X86_AESNI', 'CUSTOM_TRAITS'],
self._get_provider_traits(self.host_uuid)
traits = set(self._get_provider_traits(self.host_uuid))
expected_traits = self.expected_libvirt_driver_capability_traits.union(
[u'HW_CPU_X86_VMX', u'HW_CPU_X86_AESNI', u'CUSTOM_TRAITS']
)
self.assertItemsEqual(expected_traits, traits)

View File

@ -23,6 +23,7 @@ import zlib
from keystoneauth1 import adapter
import mock
import os_traits
from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import base64
@ -1755,7 +1756,8 @@ class ProviderTreeTests(integrated_helpers.ProviderUsageBaseTestCase):
'step_size': 1,
},
}, self._get_provider_inventory(self.host_uuid))
self.assertEqual([], self._get_provider_traits(self.host_uuid))
self.assertItemsEqual(self.expected_fake_driver_capability_traits,
self._get_provider_traits(self.host_uuid))
def _run_update_available_resource(self, startup):
self.compute.rt.update_available_resource(
@ -1820,8 +1822,10 @@ class ProviderTreeTests(integrated_helpers.ProviderUsageBaseTestCase):
self.assertIn('CUSTOM_BANDWIDTH', self._get_all_resource_classes())
self.assertIn('CUSTOM_GOLD', self._get_all_traits())
self.assertEqual(inv, self._get_provider_inventory(self.host_uuid))
self.assertEqual(traits,
set(self._get_provider_traits(self.host_uuid)))
self.assertItemsEqual(
traits.union(self.expected_fake_driver_capability_traits),
self._get_provider_traits(self.host_uuid)
)
self.assertEqual(aggs,
set(self._get_provider_aggregates(self.host_uuid)))
@ -2039,8 +2043,12 @@ class ProviderTreeTests(integrated_helpers.ProviderUsageBaseTestCase):
4,
self._get_provider_inventory(uuids.pf2_2)['SRIOV_NET_VF']['total'])
# Compute and NUMAs don't have any traits
for uuid in (self.host_uuid, uuids.numa1, uuids.numa2):
# Compute don't have any extra traits
self.assertItemsEqual(self.expected_fake_driver_capability_traits,
self._get_provider_traits(self.host_uuid))
# NUMAs don't have any traits
for uuid in (uuids.numa1, uuids.numa2):
self.assertEqual([], self._get_provider_traits(uuid))
def test_update_provider_tree_multiple_providers(self):
@ -2218,6 +2226,192 @@ class ProviderTreeTests(integrated_helpers.ProviderUsageBaseTestCase):
])
class TraitsTrackingTests(integrated_helpers.ProviderUsageBaseTestCase):
compute_driver = 'fake.SmallFakeDriver'
fake_caps = {
'supports_attach_interface': True,
'supports_device_tagging': False,
}
def _mock_upt(self, traits_to_add, traits_to_remove):
"""Set up the compute driver with a fake update_provider_tree()
which injects the given traits into the provider tree
"""
original_upt = fake.SmallFakeDriver.update_provider_tree
def fake_upt(self2, ptree, nodename, allocations=None):
original_upt(self2, ptree, nodename, allocations)
LOG.debug("injecting traits via fake update_provider_tree(): %s",
traits_to_add)
ptree.add_traits(nodename, *traits_to_add)
LOG.debug("removing traits via fake update_provider_tree(): %s",
traits_to_remove)
ptree.remove_traits(nodename, *traits_to_remove)
self.stub_out('nova.virt.fake.FakeDriver.update_provider_tree',
fake_upt)
@mock.patch.dict(fake.SmallFakeDriver.capabilities, clear=True,
values=fake_caps)
def test_resource_provider_traits(self):
"""Test that the compute service reports traits via driver
capabilities and registers them on the compute host resource
provider in the placement API.
"""
custom_trait = 'CUSTOM_FOO'
ptree_traits = [custom_trait, 'HW_CPU_X86_VMX']
global_traits = self._get_all_traits()
self.assertNotIn(custom_trait, global_traits)
self.assertIn(os_traits.COMPUTE_NET_ATTACH_INTERFACE, global_traits)
self.assertIn(os_traits.COMPUTE_DEVICE_TAGGING, global_traits)
self.assertEqual([], self._get_all_providers())
self._mock_upt(ptree_traits, [])
self.compute = self._start_compute(host='host1')
rp_uuid = self._get_provider_uuid_by_host('host1')
expected_traits = set(
ptree_traits + [os_traits.COMPUTE_NET_ATTACH_INTERFACE]
)
self.assertItemsEqual(expected_traits,
self._get_provider_traits(rp_uuid))
global_traits = self._get_all_traits()
# CUSTOM_FOO is now a registered trait because the virt driver
# reported it.
self.assertIn(custom_trait, global_traits)
# Now simulate user deletion of driver-provided traits from
# the compute node provider.
expected_traits.remove(custom_trait)
expected_traits.remove(os_traits.COMPUTE_NET_ATTACH_INTERFACE)
self._set_provider_traits(rp_uuid, list(expected_traits))
self.assertItemsEqual(expected_traits,
self._get_provider_traits(rp_uuid))
# The above trait deletions are simulations of an out-of-band
# placement operation, as if the operator used the CLI. So
# now we have to "SIGHUP the compute process" to clear the
# report client cache so the subsequent update picks up the
# changes.
self.compute.manager.reset()
# Add the traits back so that the mock update_provider_tree()
# can reinject them.
expected_traits.update(
[custom_trait, os_traits.COMPUTE_NET_ATTACH_INTERFACE])
# Now when we run the periodic update task, the trait should
# reappear in the provider tree and get synced back to
# placement.
self._run_periodics()
self.assertItemsEqual(expected_traits,
self._get_provider_traits(rp_uuid))
global_traits = self._get_all_traits()
self.assertIn(custom_trait, global_traits)
self.assertIn(os_traits.COMPUTE_NET_ATTACH_INTERFACE, global_traits)
@mock.patch.dict(fake.SmallFakeDriver.capabilities, clear=True,
values=fake_caps)
def test_admin_traits_preserved(self):
"""Test that if admin externally sets traits on the resource provider
then the compute periodic doesn't remove them from placement.
"""
admin_trait = 'CUSTOM_TRAIT_FROM_ADMIN'
self._create_trait(admin_trait)
global_traits = self._get_all_traits()
self.assertIn(admin_trait, global_traits)
self.compute = self._start_compute(host='host1')
rp_uuid = self._get_provider_uuid_by_host('host1')
traits = self._get_provider_traits(rp_uuid)
traits.append(admin_trait)
self._set_provider_traits(rp_uuid, traits)
self.assertIn(admin_trait, self._get_provider_traits(rp_uuid))
# SIGHUP the compute process to clear the report client
# cache, so the subsequent periodic update recalculates everything.
self.compute.manager.reset()
self._run_periodics()
self.assertIn(admin_trait, self._get_provider_traits(rp_uuid))
@mock.patch.dict(fake.SmallFakeDriver.capabilities, clear=True,
values=fake_caps)
def test_driver_removing_support_for_trait_via_capability(self):
"""Test that if a driver initially reports a trait via a supported
capability, then at the next periodic update doesn't report
support for it again, it gets removed from the provider in the
placement service.
"""
self.compute = self._start_compute(host='host1')
rp_uuid = self._get_provider_uuid_by_host('host1')
trait = os_traits.COMPUTE_NET_ATTACH_INTERFACE
self.assertIn(trait, self._get_provider_traits(rp_uuid))
new_caps = dict(fake.SmallFakeDriver.capabilities,
**{'supports_attach_interface': False})
with mock.patch.dict(fake.SmallFakeDriver.capabilities, new_caps):
self._run_periodics()
self.assertNotIn(trait, self._get_provider_traits(rp_uuid))
def test_driver_removing_trait_via_upt(self):
"""Test that if a driver reports a trait via update_provider_tree()
initially, but at the next periodic update doesn't report it
again, that it gets removed from placement.
"""
custom_trait = "CUSTOM_TRAIT_FROM_DRIVER"
standard_trait = os_traits.HW_CPU_X86_SGX
self._mock_upt([custom_trait, standard_trait], [])
self.compute = self._start_compute(host='host1')
rp_uuid = self._get_provider_uuid_by_host('host1')
self.assertIn(custom_trait, self._get_provider_traits(rp_uuid))
self.assertIn(standard_trait, self._get_provider_traits(rp_uuid))
# Now change the fake update_provider_tree() from injecting the
# traits to removing them, and run the periodic update.
self._mock_upt([], [custom_trait, standard_trait])
self._run_periodics()
self.assertNotIn(custom_trait, self._get_provider_traits(rp_uuid))
self.assertNotIn(standard_trait, self._get_provider_traits(rp_uuid))
@mock.patch.dict(fake.SmallFakeDriver.capabilities, clear=True,
values=fake_caps)
def test_driver_removes_unsupported_trait_from_admin(self):
"""Test that if an admin adds a trait corresponding to a
capability which is unsupported, then if the provider cache is
reset, the driver will remove it during the next update.
"""
self.compute = self._start_compute(host='host1')
rp_uuid = self._get_provider_uuid_by_host('host1')
traits = self._get_provider_traits(rp_uuid)
trait = os_traits.COMPUTE_DEVICE_TAGGING
self.assertNotIn(trait, traits)
# Simulate an admin associating the trait with the host via
# the placement API.
traits.append(trait)
self._set_provider_traits(rp_uuid, traits)
# Check that worked.
traits = self._get_provider_traits(rp_uuid)
self.assertIn(trait, traits)
# SIGHUP the compute process to clear the report client
# cache, so the subsequent periodic update recalculates everything.
self.compute.manager.reset()
self._run_periodics()
self.assertNotIn(trait, self._get_provider_traits(rp_uuid))
class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
"""Tests moving servers while checking the resource allocations and usages

View File

@ -485,6 +485,25 @@ class BaseTestCase(test.NoDBTestCase):
self.driver_mock) = setup_rt(
_HOSTNAME, virt_resources, estimate_overhead)
def _setup_ptree(self, compute):
"""Set up a ProviderTree with a compute node root, and mock the
ReportClient's get_provider_tree_and_ensure_root() to return
it.
update_traits() is mocked so that tests can specify a return
value. Returns the new ProviderTree so that tests can control
its behaviour further.
"""
ptree = provider_tree.ProviderTree()
ptree.new_root(compute.hypervisor_hostname, compute.uuid)
ptree.update_traits = mock.Mock()
rc_mock = self.rt.reportclient
gptaer_mock = rc_mock.get_provider_tree_and_ensure_root
gptaer_mock.return_value = ptree
return ptree
class TestUpdateAvailableResources(BaseTestCase):
@ -1395,6 +1414,34 @@ class TestUpdateComputeNode(BaseTestCase):
save_mock.assert_called_once_with()
norm_mock.assert_called_once_with(mock.sentinel.inv_data, new_compute)
def test_existing_node_capabilities_as_traits(self):
"""The capabilities_as_traits() driver method returns traits
information for a node/provider.
"""
self._setup_rt()
rc = self.rt.reportclient
rc.set_traits_for_provider = mock.MagicMock()
# Emulate a driver that has implemented the update_from_provider_tree()
# virt driver method
self.driver_mock.update_provider_tree = mock.Mock()
self.driver_mock.capabilities_as_traits.return_value = \
{mock.sentinel.trait: True}
orig_compute = _COMPUTE_NODE_FIXTURES[0].obj_clone()
self.rt.compute_nodes[_NODENAME] = orig_compute
self.rt.old_resources[_NODENAME] = orig_compute
new_compute = orig_compute.obj_clone()
ptree = self._setup_ptree(orig_compute)
self.rt._update(mock.sentinel.ctx, new_compute)
self.driver_mock.capabilities_as_traits.assert_called_once()
ptree.update_traits.assert_called_once_with(
new_compute.hypervisor_hostname,
[mock.sentinel.trait]
)
@mock.patch('nova.objects.ComputeNode.save')
def test_existing_node_update_provider_tree_implemented(self, save_mock):
"""The update_provider_tree() virt driver method is only implemented
@ -1451,23 +1498,24 @@ class TestUpdateComputeNode(BaseTestCase):
new_compute = orig_compute.obj_clone()
new_compute.local_gb = 210000
rc_mock = self.rt.reportclient
gptaer_mock = rc_mock.get_provider_tree_and_ensure_root
ptree = provider_tree.ProviderTree()
ptree.new_root(orig_compute.hypervisor_hostname, orig_compute.uuid)
gptaer_mock.return_value = ptree
ptree = self._setup_ptree(orig_compute)
self.rt._update(mock.sentinel.ctx, new_compute)
save_mock.assert_called_once_with()
gptaer_mock = self.rt.reportclient.get_provider_tree_and_ensure_root
gptaer_mock.assert_called_once_with(
mock.sentinel.ctx, new_compute.uuid,
name=new_compute.hypervisor_hostname)
self.driver_mock.update_provider_tree.assert_called_once_with(
ptree, new_compute.hypervisor_hostname)
rc_mock.update_from_provider_tree.assert_called_once_with(
self.rt.reportclient.update_from_provider_tree.assert_called_once_with(
mock.sentinel.ctx, ptree, allocations=None)
self.driver_mock.get_inventory.assert_not_called()
ptree.update_traits.assert_called_once_with(
new_compute.hypervisor_hostname,
[]
)
exp_inv = copy.deepcopy(fake_inv)
# These ratios and reserved amounts come from fake_upt
exp_inv[orc.VCPU]['allocation_ratio'] = 16.0

View File

@ -23,6 +23,7 @@ Driver base-classes:
import sys
import os_resource_classes as orc
import os_traits
from oslo_log import log as logging
from oslo_utils import importutils
import six
@ -91,6 +92,27 @@ def block_device_info_get_mapping(block_device_info):
return block_device_mapping
# NOTE(aspiers): When adding new capabilities, ensure they are
# mirrored in ComputeDriver.capabilities, and that the corresponding
# values should always be standard traits in os_traits. If something
# isn't a standard trait, it doesn't need to be a compute node
# capability trait; and if it needs to be a compute node capability
# trait, it needs to be (made) standard, and must be prefixed with
# "COMPUTE_".
CAPABILITY_TRAITS_MAP = {
# Added in os-traits 0.7.0.
"supports_attach_interface": os_traits.COMPUTE_NET_ATTACH_INTERFACE,
"supports_device_tagging": os_traits.COMPUTE_DEVICE_TAGGING,
"supports_tagged_attach_interface":
os_traits.COMPUTE_NET_ATTACH_INTERFACE_WITH_TAG,
"supports_tagged_attach_volume": os_traits.COMPUTE_VOLUME_ATTACH_WITH_TAG,
"supports_extend_volume": os_traits.COMPUTE_VOLUME_EXTEND,
"supports_multiattach": os_traits.COMPUTE_VOLUME_MULTI_ATTACH,
# Added in os-traits 0.8.0.
"supports_trusted_certs": os_traits.COMPUTE_TRUSTED_CERTS
}
class ComputeDriver(object):
"""Base class for compute drivers.
@ -122,6 +144,9 @@ class ComputeDriver(object):
"""
# NOTE(mriedem): When adding new capabilities, consider whether they
# should also be added to CAPABILITY_TRAITS_MAP; if so, any new traits
# must also be added to the os-traits library.
capabilities = {
"has_imagecache": False,
"supports_evacuate": False,
@ -996,6 +1021,23 @@ class ComputeDriver(object):
"""
raise NotImplementedError()
def capabilities_as_traits(self):
"""Returns this driver's capabilities dict where the keys are traits
Traits can only be standard compute capabilities traits from
the os-traits library.
:returns: dict, keyed by trait, of this driver's capabilities where the
values are booleans indicating if the driver supports the trait
"""
traits = {}
for capability, supported in self.capabilities.items():
if capability in CAPABILITY_TRAITS_MAP:
traits[CAPABILITY_TRAITS_MAP[capability]] = supported
return traits
def get_available_resource(self, nodename):
"""Retrieve resource information.

View File

@ -123,6 +123,8 @@ class Resources(object):
class FakeDriver(driver.ComputeDriver):
# These must match the traits in
# nova.tests.functional.integrated_helpers.ProviderUsageBaseTestCase
capabilities = {
"has_imagecache": True,
"supports_evacuate": True,

View File

@ -0,0 +1,26 @@
---
features:
- |
Compute drivers now expose capabilities via traits in the
Placement API. Capabilities must map to standard traits defined
in `the os-traits project
<https://docs.openstack.org/os-traits/latest/>`_; for now these
are:
* ``COMPUTE_NET_ATTACH_INTERFACE``
* ``COMPUTE_DEVICE_TAGGING``
* ``COMPUTE_NET_ATTACH_INTERFACE_WITH_TAG``
* ``COMPUTE_VOLUME_ATTACH_WITH_TAG``
* ``COMPUTE_VOLUME_EXTEND``
* ``COMPUTE_VOLUME_MULTI_ATTACH``
* ``COMPUTE_TRUSTED_CERTS``
Any traits provided by the driver will be automatically added
during startup or a periodic update of a compute node. Similarly
any traits later retracted by the driver will be automatically
removed.
However any traits which are removed by the admin from the compute
node resource provider via the Placement API will not be
reinstated until the compute service's provider cache is reset.
This can be triggered via a ``SIGHUP``.

View File

@ -57,7 +57,7 @@ psutil>=3.2.2 # BSD
oslo.versionedobjects>=1.33.3 # Apache-2.0
os-brick>=2.6.1 # Apache-2.0
os-resource-classes>=0.1.0 # Apache-2.0
os-traits>=0.4.0 # Apache-2.0
os-traits>=0.8.0 # Apache-2.0
os-vif>=1.14.0 # Apache-2.0
os-win>=3.0.0 # Apache-2.0
castellan>=0.16.0 # Apache-2.0