Use update_provider_tree from resource tracker

The resource tracker calls the new update_provider_tree virt driver
method - using it if available, falling back to the existing
get_inventory-if-available business if not - and flushes the changes
back to placement accordingly.

Change-Id: I5ee11274816cd9e4f0669e9e52468a29262c9020
blueprint: update-provider-tree
This commit is contained in:
Eric Fried 2018-01-16 11:44:25 -06:00 committed by naichuans
parent 32fdf52958
commit 83a5f90f82
4 changed files with 373 additions and 30 deletions

View File

@ -868,33 +868,52 @@ class ResourceTracker(object):
# is changed.
nodename = compute_node.hypervisor_hostname
# Persist the stats to the Scheduler
# First try update_provider_tree
# Retrieve the provider tree associated with this compute node. If
# it doesn't exist yet, this will create it with a (single, root)
# provider corresponding to the compute node.
reportclient = self.scheduler_client.reportclient
prov_tree = reportclient.get_provider_tree_and_ensure_root(
context, compute_node.uuid, name=compute_node.hypervisor_hostname)
# Let the virt driver rearrange the provider tree and set/update
# the inventory, traits, and aggregates throughout.
try:
inv_data = self.driver.get_inventory(nodename)
_normalize_inventory_from_cn_obj(inv_data, compute_node)
self.scheduler_client.set_inventory_for_provider(
context,
compute_node.uuid,
compute_node.hypervisor_hostname,
inv_data,
)
self.driver.update_provider_tree(prov_tree, nodename)
# Flush any changes.
reportclient.update_from_provider_tree(context, prov_tree)
# NOTE(efried): We do not _normalize_inventory_from_cn_obj if
# the virt driver is advanced enough to have implemented
# update_provider_tree.
except NotImplementedError:
# Eventually all virt drivers will return an inventory dict in the
# format that the placement API expects and we'll be able to remove
# this code branch
self.scheduler_client.update_compute_node(context, compute_node)
# update_provider_tree isn't implemented yet - try get_inventory
try:
inv_data = self.driver.get_inventory(nodename)
_normalize_inventory_from_cn_obj(inv_data, compute_node)
self.scheduler_client.set_inventory_for_provider(
context,
compute_node.uuid,
compute_node.hypervisor_hostname,
inv_data,
)
except NotImplementedError:
# Eventually all virt drivers will return an inventory dict in
# the format that the placement API expects and we'll be able
# to remove this code branch
self.scheduler_client.update_compute_node(context,
compute_node)
try:
traits = self.driver.get_traits(nodename)
except NotImplementedError:
pass
else:
# NOTE(mgoddard): set_traits_for_provider does not refresh the
# provider tree in the report client, so we rely on the above call
# to set_inventory_for_provider or update_compute_node to ensure
# that the resource provider exists in the tree and has had its
# cached traits refreshed.
self.reportclient.set_traits_for_provider(
context, compute_node.uuid, traits)
try:
traits = self.driver.get_traits(nodename)
except NotImplementedError:
pass
else:
# NOTE(mgoddard): set_traits_for_provider does not refresh the
# provider tree in the report client, so we rely on the above
# call to set_inventory_for_provider or update_compute_node to
# ensure that the resource provider exists in the tree and has
# had its cached traits refreshed.
self.reportclient.set_traits_for_provider(
context, compute_node.uuid, traits)
if self.pci_tracker:
self.pci_tracker.save(context)

View File

@ -146,6 +146,7 @@ class IronicResourceTrackerTest(test.TestCase):
driver = mock.MagicMock(autospec=virt_driver.ComputeDriver)
driver.node_is_available.return_value = True
driver.update_provider_tree.side_effect = NotImplementedError
self.driver_mock = driver
self.rt = resource_tracker.ResourceTracker(COMPUTE_HOST, driver)
self.rt.scheduler_client.reportclient = self.report_client

View File

@ -1432,12 +1432,9 @@ class ProviderUsageBaseTestCase(test.TestCase,
return self.placement_api.get(
'/allocations/%s' % server_uuid).body['allocations']
def _get_traits(self):
return self.placement_api.get('/traits', version='1.6').body['traits']
def _get_all_providers(self):
return self.placement_api.get(
'/resource_providers').body['resource_providers']
'/resource_providers', version='1.14').body['resource_providers']
def _get_provider_traits(self, provider_uuid):
return self.placement_api.get(
@ -1461,6 +1458,23 @@ class ProviderUsageBaseTestCase(test.TestCase,
'/resource_providers/%s/traits' % rp_uuid,
put_traits_req, version='1.6')
def _get_all_resource_classes(self):
dicts = self.placement_api.get(
'/resource_classes', version='1.2').body['resource_classes']
return [d['name'] for d in dicts]
def _get_all_traits(self):
return self.placement_api.get('/traits', version='1.6').body['traits']
def _get_provider_inventory(self, rp_uuid):
return self.placement_api.get(
'/resource_providers/%s/inventories' % rp_uuid).body['inventories']
def _get_provider_aggregates(self, rp_uuid):
return self.placement_api.get(
'/resource_providers/%s/aggregates' % rp_uuid,
version='1.1').body['aggregates']
def assertFlavorMatchesAllocation(self, flavor, allocation):
self.assertEqual(flavor['vcpus'], allocation['VCPU'])
self.assertEqual(flavor['ram'], allocation['MEMORY_MB'])
@ -1609,6 +1623,270 @@ class ProviderUsageBaseTestCase(test.TestCase,
LOG.info('Finished with periodics')
class ProviderTreeTests(ProviderUsageBaseTestCase):
compute_driver = 'fake.SmallFakeDriver'
def setUp(self):
super(ProviderTreeTests, self).setUp()
_p = mock.patch.object(fake.SmallFakeDriver, 'update_provider_tree')
self.addCleanup(_p.stop)
self.mock_upt = _p.start()
# Before starting compute, placement has no providers registered
self.assertEqual([], self._get_all_providers())
self.compute = self._start_compute(host='host1')
# The compute host should have been created in placement with empty
# inventory and no traits
rps = self._get_all_providers()
self.assertEqual(1, len(rps))
self.assertEqual(self.compute.host, rps[0]['name'])
self.host_uuid = self._get_provider_uuid_by_host(self.compute.host)
self.assertEqual({}, self._get_provider_inventory(self.host_uuid))
self.assertEqual([], self._get_provider_traits(self.host_uuid))
def _run_update_available_resource_and_assert_sync_error(self):
"""Invoke ResourceTracker.update_available_resource and assert that it
results in ResourceProviderSyncFailed.
_run_periodicals is a little too high up in the call stack to be useful
for this, because ResourceTracker.update_available_resource_for_node
swallows all exceptions.
"""
ctx = context.get_admin_context()
rt = self.compute._get_resource_tracker()
self.assertRaises(
exception.ResourceProviderSyncFailed,
rt.update_available_resource, ctx, self.compute.host)
def test_update_provider_tree_associated_info(self):
"""Inventory in some standard and custom resource classes. Standard
and custom traits. Aggregates. Custom resource class and trait get
created; inventory, traits, and aggregates get set properly.
"""
inv = {
'VCPU': {
'total': 10,
'reserved': 0,
'min_unit': 1,
'max_unit': 2,
'step_size': 1, 'allocation_ratio': 10.0,
},
'MEMORY_MB': {
'total': 1048576,
'reserved': 2048,
'min_unit': 1024,
'max_unit': 131072,
'step_size': 1024,
'allocation_ratio': 1.0,
},
'CUSTOM_BANDWIDTH': {
'total': 1250000,
'reserved': 10000,
'min_unit': 5000,
'max_unit': 250000,
'step_size': 5000,
'allocation_ratio': 8.0,
},
}
traits = set(['HW_CPU_X86_AVX', 'HW_CPU_X86_AVX2', 'CUSTOM_GOLD'])
aggs = set([uuids.agg1, uuids.agg2])
def update_provider_tree(prov_tree, nodename):
prov_tree.update_inventory(self.compute.host, inv, None)
prov_tree.update_traits(self.compute.host, traits)
prov_tree.update_aggregates(self.compute.host, aggs)
self.mock_upt.side_effect = update_provider_tree
self.assertNotIn('CUSTOM_BANDWIDTH', self._get_all_resource_classes())
self.assertNotIn('CUSTOM_GOLD', self._get_all_traits())
self._run_periodics()
self.assertIn('CUSTOM_BANDWIDTH', self._get_all_resource_classes())
self.assertIn('CUSTOM_GOLD', self._get_all_traits())
self.assertEqual(inv, self._get_provider_inventory(self.host_uuid))
self.assertEqual(traits,
set(self._get_provider_traits(self.host_uuid)))
self.assertEqual(aggs,
set(self._get_provider_aggregates(self.host_uuid)))
def test_update_provider_tree_multiple_providers(self):
"""Make update_provider_tree create multiple providers, including an
additional root as a sharing provider; and some descendants in the
compute node's tree.
"""
def update_provider_tree(prov_tree, nodename):
# Create a shared storage provider as a root
prov_tree.new_root('ssp', uuids.ssp, None)
prov_tree.update_traits(
'ssp', ['MISC_SHARES_VIA_AGGREGATE', 'STORAGE_DISK_SSD'])
prov_tree.update_aggregates('ssp', [uuids.agg])
# Compute node is in the same aggregate
prov_tree.update_aggregates(self.compute.host, [uuids.agg])
# Create two NUMA nodes as children
prov_tree.new_child('numa1', self.host_uuid, uuid=uuids.numa1)
prov_tree.new_child('numa2', self.host_uuid, uuid=uuids.numa2)
# Give the NUMA nodes the proc/mem inventory. NUMA 2 has twice as
# much as NUMA 1 (so we can validate later that everything is where
# it should be).
for n in (1, 2):
inv = {
'VCPU': {
'total': 10 * n,
'reserved': 0,
'min_unit': 1,
'max_unit': 2,
'step_size': 1,
'allocation_ratio': 10.0,
},
'MEMORY_MB': {
'total': 1048576 * n,
'reserved': 2048,
'min_unit': 1024,
'max_unit': 131072,
'step_size': 1024,
'allocation_ratio': 1.0,
},
}
prov_tree.update_inventory('numa%d' % n, inv, None)
# Each NUMA node has two PFs providing VF inventory on one of two
# networks
for n in (1, 2):
for p in (1, 2):
name = 'pf%d_%d' % (n, p)
prov_tree.new_child(
name, getattr(uuids, 'numa%d' % n),
uuid=getattr(uuids, name))
trait = 'CUSTOM_PHYSNET_%d' % ((n + p) % 2)
prov_tree.update_traits(name, [trait])
inv = {
'SRIOV_NET_VF': {
'total': n + p,
'reserved': 0,
'min_unit': 1,
'max_unit': 1,
'step_size': 1,
'allocation_ratio': 1.0,
},
}
prov_tree.update_inventory(name, inv, None)
self.mock_upt.side_effect = update_provider_tree
self._run_periodics()
# Create a dict, keyed by provider UUID, of all the providers
rps_by_uuid = {}
for rp_dict in self._get_all_providers():
rps_by_uuid[rp_dict['uuid']] = rp_dict
# All and only the expected providers got created.
all_uuids = set([self.host_uuid, uuids.ssp, uuids.numa1, uuids.numa2,
uuids.pf1_1, uuids.pf1_2, uuids.pf2_1, uuids.pf2_2])
self.assertEqual(all_uuids, set(rps_by_uuid))
# Validate tree roots
tree_uuids = [self.host_uuid, uuids.numa1, uuids.numa2,
uuids.pf1_1, uuids.pf1_2, uuids.pf2_1, uuids.pf2_2]
for tree_uuid in tree_uuids:
self.assertEqual(self.host_uuid,
rps_by_uuid[tree_uuid]['root_provider_uuid'])
self.assertEqual(uuids.ssp,
rps_by_uuid[uuids.ssp]['root_provider_uuid'])
# SSP has the right traits
self.assertEqual(
set(['MISC_SHARES_VIA_AGGREGATE', 'STORAGE_DISK_SSD']),
set(self._get_provider_traits(uuids.ssp)))
# SSP and compute are in the same aggregate
agg_uuids = set([self.host_uuid, uuids.ssp])
for uuid in agg_uuids:
self.assertEqual(set([uuids.agg]),
set(self._get_provider_aggregates(uuid)))
# The rest aren't in aggregates
for uuid in (all_uuids - agg_uuids):
self.assertEqual(set(), set(self._get_provider_aggregates(uuid)))
# NUMAs have the right inventory and parentage
for n in (1, 2):
numa_uuid = getattr(uuids, 'numa%d' % n)
self.assertEqual(self.host_uuid,
rps_by_uuid[numa_uuid]['parent_provider_uuid'])
inv = self._get_provider_inventory(numa_uuid)
self.assertEqual(10 * n, inv['VCPU']['total'])
self.assertEqual(1048576 * n, inv['MEMORY_MB']['total'])
# PFs have the right inventory, physnet, and parentage
self.assertEqual(uuids.numa1,
rps_by_uuid[uuids.pf1_1]['parent_provider_uuid'])
self.assertEqual(['CUSTOM_PHYSNET_0'],
self._get_provider_traits(uuids.pf1_1))
self.assertEqual(
2,
self._get_provider_inventory(uuids.pf1_1)['SRIOV_NET_VF']['total'])
self.assertEqual(uuids.numa1,
rps_by_uuid[uuids.pf1_2]['parent_provider_uuid'])
self.assertEqual(['CUSTOM_PHYSNET_1'],
self._get_provider_traits(uuids.pf1_2))
self.assertEqual(
3,
self._get_provider_inventory(uuids.pf1_2)['SRIOV_NET_VF']['total'])
self.assertEqual(uuids.numa2,
rps_by_uuid[uuids.pf2_1]['parent_provider_uuid'])
self.assertEqual(['CUSTOM_PHYSNET_1'],
self._get_provider_traits(uuids.pf2_1))
self.assertEqual(
3,
self._get_provider_inventory(uuids.pf2_1)['SRIOV_NET_VF']['total'])
self.assertEqual(uuids.numa2,
rps_by_uuid[uuids.pf2_2]['parent_provider_uuid'])
self.assertEqual(['CUSTOM_PHYSNET_0'],
self._get_provider_traits(uuids.pf2_2))
self.assertEqual(
4,
self._get_provider_inventory(uuids.pf2_2)['SRIOV_NET_VF']['total'])
# Compute and NUMAs don't have any traits
for uuid in (self.host_uuid, uuids.numa1, uuids.numa2):
self.assertEqual([], self._get_provider_traits(uuid))
def test_update_provider_tree_bogus_resource_class(self):
def update_provider_tree(prov_tree, nodename):
prov_tree.update_inventory(self.compute.host, {'FOO': {}}, None)
self.mock_upt.side_effect = update_provider_tree
rcs = self._get_all_resource_classes()
self.assertIn('VCPU', rcs)
self.assertNotIn('FOO', rcs)
self._run_update_available_resource_and_assert_sync_error()
rcs = self._get_all_resource_classes()
self.assertIn('VCPU', rcs)
self.assertNotIn('FOO', rcs)
def test_update_provider_tree_bogus_trait(self):
def update_provider_tree(prov_tree, nodename):
prov_tree.update_traits(self.compute.host, ['FOO'])
self.mock_upt.side_effect = update_provider_tree
traits = self._get_all_traits()
self.assertIn('HW_CPU_X86_AVX', traits)
self.assertNotIn('FOO', traits)
self._run_update_available_resource_and_assert_sync_error()
traits = self._get_all_traits()
self.assertIn('HW_CPU_X86_AVX', traits)
self.assertNotIn('FOO', traits)
class TraitsTrackingTests(ProviderUsageBaseTestCase):
compute_driver = 'fake.SmallFakeDriver'
@ -1617,14 +1895,14 @@ class TraitsTrackingTests(ProviderUsageBaseTestCase):
traits = ['CUSTOM_FOO', 'HW_CPU_X86_VMX']
mock_traits.return_value = traits
self.assertNotIn('CUSTOM_FOO', self._get_traits())
self.assertNotIn('CUSTOM_FOO', self._get_all_traits())
self.assertEqual([], self._get_all_providers())
self.compute = self._start_compute(host='host1')
rp_uuid = self._get_provider_uuid_by_host('host1')
self.assertEqual(traits, sorted(self._get_provider_traits(rp_uuid)))
self.assertIn('CUSTOM_FOO', self._get_traits())
self.assertIn('CUSTOM_FOO', self._get_all_traits())
class ServerMovingTests(ProviderUsageBaseTestCase):

View File

@ -438,6 +438,7 @@ def setup_rt(hostname, virt_resources=_VIRT_DRIVER_AVAIL_RESOURCES,
vd.get_available_resource.return_value = virt_resources
vd.get_inventory.side_effect = NotImplementedError
vd.get_traits.side_effect = NotImplementedError
vd.update_provider_tree.side_effect = NotImplementedError
vd.get_host_ip_addr.return_value = _NODENAME
vd.estimate_instance_overhead.side_effect = estimate_overhead
vd.rebalances_nodes = False
@ -1325,6 +1326,50 @@ class TestUpdateComputeNode(BaseTestCase):
)
self.driver_mock.get_traits.assert_called_once_with(_NODENAME)
@mock.patch('nova.compute.resource_tracker.'
'_normalize_inventory_from_cn_obj')
@mock.patch('nova.objects.ComputeNode.save')
def test_existing_node_update_provider_tree_implemented(self, save_mock,
norm_mock):
"""The update_provider_tree() virt driver method is only implemented
for some virt drivers. This method returns inventory, trait, and
aggregate information for resource providers in a tree associated with
the compute node. If this method doesn't raise a NotImplementedError,
it triggers _update() to call the update_from_provider_tree() method of
the reporting client instead of set_inventory_for_provider() (old) or
update_compute_node() (older).
"""
self._setup_rt()
rc_mock = self.rt.reportclient
gptaer_mock = rc_mock.get_provider_tree_and_ensure_root
gptaer_mock.return_value = mock.sentinel.pt1
# Emulate a driver that has implemented the update_from_provider_tree()
# virt driver method
self.driver_mock.update_provider_tree.side_effect = None
orig_compute = _COMPUTE_NODE_FIXTURES[0].obj_clone()
self.rt.compute_nodes[_NODENAME] = orig_compute
self.rt.old_resources[_NODENAME] = orig_compute
# Deliberately changing local_gb to trigger updating inventory
new_compute = orig_compute.obj_clone()
new_compute.local_gb = 210000
self.rt._update(mock.sentinel.ctx, new_compute)
save_mock.assert_called_once_with()
gptaer_mock.assert_called_once_with(
mock.sentinel.ctx, new_compute.uuid,
name=new_compute.hypervisor_hostname)
self.driver_mock.update_provider_tree.assert_called_once_with(
mock.sentinel.pt1, new_compute.hypervisor_hostname)
rc_mock.update_from_provider_tree.assert_called_once_with(
mock.sentinel.ctx, mock.sentinel.pt1)
norm_mock.assert_not_called()
self.sched_client_mock.update_compute_node.assert_not_called()
self.sched_client_mock.set_inventory_for_provider.assert_not_called()
def test_get_node_uuid(self):
self._setup_rt()
orig_compute = _COMPUTE_NODE_FIXTURES[0].obj_clone()