Fix reporting inventory for provisioned nodes in the Ironic driver

Currently we report the full inventory for available nodes, and an empty
inventory for nodes that are deployed to or otherwise unavailable.

Reporting an empty inventory for deployed nodes has 2 bad consequences:
1. Nova tries deleting the inventory for Placement, which fails, because
   the resources are still in use. This results in nasty warnings.
2. When adding a resource class to a deployed node, it does not get into
   inventory, and thus does not get to Placement. It results in an error
   later on, when the custom resource class is not found.

This patch fixes the latter problem by
1. Always reporting the custom resource class for deployed nodes, if present.
2. Reporting VCPUS/memory/disk in exactly the same amount, as it is configured
   in the ironic node's properties.

As a side effect, the warnings are no longer shown for deployed nodes.
They still appear, however, for nodes during cleaning.

Partial-Bug: #1710141
Change-Id: I2fd1e4a95f000da19864e75299afa51527697101
This commit is contained in:
Dmitry Tantsur 2017-08-11 13:52:02 +02:00
parent 4cd6a3a1b4
commit 9ed692bf8c
3 changed files with 119 additions and 48 deletions
nova
tests/unit/virt/ironic
virt/ironic
releasenotes/notes

View File

@ -256,17 +256,12 @@ class IronicDriverTestCase(test.NoDBTestCase):
self.driver._wait_for_power_state, instance, 'fake message') self.driver._wait_for_power_state, instance, 'fake message')
self.assertTrue(fake_validate.called) self.assertTrue(fake_validate.called)
def _test__node_resource(self, has_inst_info): def test__node_resource_with_instance_uuid(self):
node_uuid = uuidutils.generate_uuid() node_uuid = uuidutils.generate_uuid()
props = _get_properties() props = _get_properties()
stats = _get_stats() stats = _get_stats()
if has_inst_info:
instance_info = _get_instance_info()
else:
instance_info = {}
node = ironic_utils.get_test_node(uuid=node_uuid, node = ironic_utils.get_test_node(uuid=node_uuid,
instance_uuid=self.instance_uuid, instance_uuid=self.instance_uuid,
instance_info=instance_info,
properties=props, properties=props,
resource_class='foo') resource_class='foo')
@ -285,30 +280,18 @@ class IronicDriverTestCase(test.NoDBTestCase):
gotkeys = sorted(result.keys()) gotkeys = sorted(result.keys())
self.assertEqual(wantkeys, gotkeys) self.assertEqual(wantkeys, gotkeys)
if has_inst_info: self.assertEqual(props['cpus'], result['vcpus'])
props_dict = instance_info self.assertEqual(result['vcpus'], result['vcpus_used'])
expected_cpus = instance_info['vcpus'] self.assertEqual(props['memory_mb'], result['memory_mb'])
else: self.assertEqual(result['memory_mb'], result['memory_mb_used'])
props_dict = props self.assertEqual(props['local_gb'], result['local_gb'])
expected_cpus = props['cpus'] self.assertEqual(result['local_gb'], result['local_gb_used'])
self.assertEqual(0, result['vcpus'])
self.assertEqual(expected_cpus, result['vcpus_used'])
self.assertEqual(0, result['memory_mb'])
self.assertEqual(props_dict['memory_mb'], result['memory_mb_used'])
self.assertEqual(0, result['local_gb'])
self.assertEqual(props_dict['local_gb'], result['local_gb_used'])
self.assertEqual(node_uuid, result['hypervisor_hostname']) self.assertEqual(node_uuid, result['hypervisor_hostname'])
self.assertEqual(stats, result['stats']) self.assertEqual(stats, result['stats'])
self.assertEqual('foo', result['resource_class']) self.assertEqual('foo', result['resource_class'])
self.assertIsNone(result['numa_topology']) self.assertIsNone(result['numa_topology'])
def test__node_resource(self):
self._test__node_resource(True)
def test__node_resource_no_instance_info(self):
self._test__node_resource(False)
def test__node_resource_canonicalizes_arch(self): def test__node_resource_canonicalizes_arch(self):
node_uuid = uuidutils.generate_uuid() node_uuid = uuidutils.generate_uuid()
props = _get_properties() props = _get_properties()
@ -411,12 +394,12 @@ class IronicDriverTestCase(test.NoDBTestCase):
instance_info=instance_info) instance_info=instance_info)
result = self.driver._node_resource(node) result = self.driver._node_resource(node)
self.assertEqual(0, result['vcpus']) self.assertEqual(props['cpus'], result['vcpus'])
self.assertEqual(instance_info['vcpus'], result['vcpus_used']) self.assertEqual(result['vcpus'], result['vcpus_used'])
self.assertEqual(0, result['memory_mb']) self.assertEqual(props['memory_mb'], result['memory_mb'])
self.assertEqual(instance_info['memory_mb'], result['memory_mb_used']) self.assertEqual(result['memory_mb'], result['memory_mb_used'])
self.assertEqual(0, result['local_gb']) self.assertEqual(props['local_gb'], result['local_gb'])
self.assertEqual(instance_info['local_gb'], result['local_gb_used']) self.assertEqual(result['local_gb'], result['local_gb_used'])
self.assertEqual(node_uuid, result['hypervisor_hostname']) self.assertEqual(node_uuid, result['hypervisor_hostname'])
self.assertEqual(stats, result['stats']) self.assertEqual(stats, result['stats'])
@ -683,11 +666,6 @@ class IronicDriverTestCase(test.NoDBTestCase):
{'uuid': uuidutils.generate_uuid(), {'uuid': uuidutils.generate_uuid(),
'power_state': ironic_states.POWER_ON, 'power_state': ironic_states.POWER_ON,
'provision_state': ironic_states.DELETED}, 'provision_state': ironic_states.DELETED},
# a node in AVAILABLE with an instance uuid
{'uuid': uuidutils.generate_uuid(),
'instance_uuid': uuidutils.generate_uuid(),
'power_state': ironic_states.POWER_OFF,
'provision_state': ironic_states.AVAILABLE}
] ]
for n in node_dicts: for n in node_dicts:
node = ironic_utils.get_test_node(**n) node = ironic_utils.get_test_node(**n)
@ -761,8 +739,11 @@ class IronicDriverTestCase(test.NoDBTestCase):
""" """
mock_nr.return_value = { mock_nr.return_value = {
'vcpus': 24, 'vcpus': 24,
'vcpus_used': 0,
'memory_mb': 1024, 'memory_mb': 1024,
'memory_mb_used': 0,
'local_gb': 100, 'local_gb': 100,
'local_gb_used': 0,
'resource_class': None, 'resource_class': None,
} }
@ -807,8 +788,67 @@ class IronicDriverTestCase(test.NoDBTestCase):
""" """
mock_nr.return_value = { mock_nr.return_value = {
'vcpus': 24, 'vcpus': 24,
'vcpus_used': 0,
'memory_mb': 1024, 'memory_mb': 1024,
'memory_mb_used': 0,
'local_gb': 100, 'local_gb': 100,
'local_gb_used': 0,
'resource_class': 'iron-nfv',
}
result = self.driver.get_inventory(mock.sentinel.nodename)
expected = {
fields.ResourceClass.VCPU: {
'total': 24,
'reserved': 0,
'min_unit': 1,
'max_unit': 24,
'step_size': 1,
'allocation_ratio': 1.0,
},
fields.ResourceClass.MEMORY_MB: {
'total': 1024,
'reserved': 0,
'min_unit': 1,
'max_unit': 1024,
'step_size': 1,
'allocation_ratio': 1.0,
},
fields.ResourceClass.DISK_GB: {
'total': 100,
'reserved': 0,
'min_unit': 1,
'max_unit': 100,
'step_size': 1,
'allocation_ratio': 1.0,
},
'CUSTOM_IRON_NFV': {
'total': 1,
'reserved': 0,
'min_unit': 1,
'max_unit': 1,
'step_size': 1,
'allocation_ratio': 1.0,
},
}
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
self.assertEqual(expected, result)
@mock.patch.object(ironic_driver.IronicDriver, '_node_resource')
@mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache')
def test_get_inventory_with_rc_occupied(self, mock_nfc, mock_nr):
"""Ensure that when a node is used, we report the inventory matching
the consumed resources.
"""
mock_nr.return_value = {
'vcpus': 24,
'vcpus_used': 24,
'memory_mb': 1024,
'memory_mb_used': 1024,
'local_gb': 100,
'local_gb_used': 100,
'resource_class': 'iron-nfv', 'resource_class': 'iron-nfv',
} }
@ -860,8 +900,11 @@ class IronicDriverTestCase(test.NoDBTestCase):
""" """
mock_nr.return_value = { mock_nr.return_value = {
'vcpus': 0, 'vcpus': 0,
'vcpus_used': 0,
'memory_mb': 0, 'memory_mb': 0,
'memory_mb_used': 0,
'local_gb': 0, 'local_gb': 0,
'local_gb_used': 0,
'resource_class': None, 'resource_class': None,
} }

View File

@ -189,9 +189,7 @@ class IronicDriver(virt_driver.ComputeDriver):
ironic_states.AVAILABLE, ironic_states.NOSTATE] ironic_states.AVAILABLE, ironic_states.NOSTATE]
return (node_obj.maintenance or return (node_obj.maintenance or
node_obj.power_state in bad_power_states or node_obj.power_state in bad_power_states or
node_obj.provision_state not in good_provision_states or node_obj.provision_state not in good_provision_states)
(node_obj.provision_state in good_provision_states and
node_obj.instance_uuid is not None))
def _node_resources_used(self, node_obj): def _node_resources_used(self, node_obj):
"""Determine whether the node's resources are currently used. """Determine whether the node's resources are currently used.
@ -309,19 +307,13 @@ class IronicDriver(virt_driver.ComputeDriver):
# Node is in the process of deploying, is deployed, or is in # Node is in the process of deploying, is deployed, or is in
# the process of cleaning up from a deploy. Report all of its # the process of cleaning up from a deploy. Report all of its
# resources as in use. # resources as in use.
instance_info = self._parse_node_instance_info(node, properties) vcpus_used = vcpus
memory_mb_used = memory_mb
# Use instance_info instead of properties here is because the local_gb_used = local_gb
# properties of a deployed node can be changed which will count
# as available resources.
vcpus_used = vcpus = instance_info['vcpus']
memory_mb_used = memory_mb = instance_info['memory_mb']
local_gb_used = local_gb = instance_info['local_gb']
# Always checking allows us to catch the case where Nova thinks there # Always checking allows us to catch the case where Nova thinks there
# are available resources on the Node, but Ironic does not (because it # are available resources on the Node, but Ironic does not (because it
# is not in a usable state): https://launchpad.net/bugs/1503453 # is not in a usable state): https://launchpad.net/bugs/1503453
if self._node_resources_unavailable(node): elif self._node_resources_unavailable(node):
# The node's current state is such that it should not present any # The node's current state is such that it should not present any
# of its resources to Nova # of its resources to Nova
vcpus = 0 vcpus = 0
@ -743,6 +735,12 @@ class IronicDriver(virt_driver.ComputeDriver):
# node is "disabled". In the future, we should detach inventory # node is "disabled". In the future, we should detach inventory
# accounting from the concept of a node being disabled or not. The # accounting from the concept of a node being disabled or not. The
# two things don't really have anything to do with each other. # two things don't really have anything to do with each other.
LOG.debug('Node %(node)s is not ready for a deployment, '
'reporting an empty inventory for it. Node\'s '
'provision state is %(prov)s, power state is '
'%(power)s and maintenance is %(maint)s.',
{'node': node.uuid, 'prov': node.provision_state,
'power': node.power_state, 'maint': node.maintenance})
return {} return {}
result = { result = {

View File

@ -0,0 +1,30 @@
---
fixes:
- |
The ironic virt driver no longer reports an empty inventory for bare metal
nodes that have instances on them. Instead the custom resource class, VCPU,
memory and disk are reported as they are configured on the node.
issues:
- |
Due to the changes in scheduling of bare metal nodes, additional resources
may be reported as free to Placement. This happens in two cases:
* An instance is deployed with a flavor smaller than a node (only possible
when exact filters are not used)
* Node properties were modified in ironic for a deployed nodes
When such instances were deployed without using a custom resource class,
it is possible for the scheduler to try deploying another instance on
the same node. It will cause a failure in the compute and a scheduling
retry.
The recommended work around is to assign a resource class to all ironic
nodes, and use it for scheduling of bare metal instances.
deprecations:
- |
Scheduling bare metal (ironic) instances using standard resource classes
(VCPU, memory, disk) is deprecated and will no longer be supported in
Queens. Custom resource classes should be used instead.
Please refer to the `ironic documentation
<https://docs.openstack.org/ironic/latest/install/configure-nova-flavors.html#scheduling-based-on-resource-classes>`_
for a detailed explanation.