Browse Source

ironic: add instance_uuid before any other spawn activity

In the baremetal use case, it is necessary to allow a remote
source of truth assert information about a physical node,
as opposed to a client asserting desired configuration.

As such, the prepare_networks_before_block_device_mapping virt
driver method was added, however in doing so network VIF attaching
was moved before actual hardware reservation. As ironic only
supports users to have a number of VIFs limited by the number
of network interfaces in the physical node, VIF attach actions
cannot be performed without first asserting control over the node.

Adding an "instance_uuid" upfront allows other ironic API consumers
to be aware that a node is now in use. Alternatively it also allows
nova to become aware prior to adding network information on the node
that the node may already be in use by an external user.

Co-Authored-By: Julia Kreger <juliaashleykreger@gmail.com>

Conflicts:
    nova/tests/unit/compute/test_compute_mgr.py

NOTE(melwitt): The conflict is because change
I755b6fdddc9d754326cd9c81b6880581641f73e8 is not in Queens.

Closes-Bug: #1766301
Change-Id: I87f085589bb663c519650f307f25d087c88bbdb1
(cherry picked from commit e45c5ec819)
changes/93/743493/1
Jim Rollenhagen 2 years ago
committed by Lee Yarwood
parent
commit
6be5a27b36
5 changed files with 221 additions and 18 deletions
  1. +7
    -0
      nova/compute/manager.py
  2. +69
    -11
      nova/tests/unit/compute/test_compute_mgr.py
  3. +76
    -3
      nova/tests/unit/virt/ironic/test_driver.py
  4. +28
    -0
      nova/virt/driver.py
  5. +41
    -4
      nova/virt/ironic/driver.py

+ 7
- 0
nova/compute/manager.py View File

@@ -2325,6 +2325,9 @@ class ComputeManager(manager.Manager):
reason=msg)

try:
# Perform any driver preparation work for the driver.
self.driver.prepare_for_spawn(instance)

# Depending on a virt driver, some network configuration is
# necessary before preparing block devices.
self.driver.prepare_networks_before_block_device_mapping(
@@ -2352,12 +2355,14 @@ class ComputeManager(manager.Manager):
network_info.wait(do_raise=False)
self.driver.clean_networks_preparation(instance,
network_info)
self.driver.failed_spawn_cleanup(instance)
except (exception.UnexpectedTaskStateError,
exception.OverQuota, exception.InvalidBDM) as e:
# Make sure the async call finishes
if network_info is not None:
network_info.wait(do_raise=False)
self.driver.clean_networks_preparation(instance, network_info)
self.driver.failed_spawn_cleanup(instance)
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=e.format_message())
except Exception:
@@ -2367,6 +2372,7 @@ class ComputeManager(manager.Manager):
if network_info is not None:
network_info.wait(do_raise=False)
self.driver.clean_networks_preparation(instance, network_info)
self.driver.failed_spawn_cleanup(instance)
msg = _('Failure prepping block device.')
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=msg)
@@ -2381,6 +2387,7 @@ class ComputeManager(manager.Manager):
# Make sure the async call finishes
if network_info is not None:
network_info.wait(do_raise=False)
self.driver.failed_spawn_cleanup(instance)
msg = _('Failure retrieving placement allocations')
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=msg)


+ 69
- 11
nova/tests/unit/compute/test_compute_mgr.py View File

@@ -6037,6 +6037,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.requested_networks, self.security_groups,
test.MatchType(objects.ImageMeta), self.block_device_mapping)

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch.object(objects.Instance, 'save')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@mock.patch.object(manager.ComputeManager, '_prep_block_device')
@@ -6045,7 +6047,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
@mock.patch.object(virt_driver.ComputeDriver,
'clean_networks_preparation')
def test_build_resources_reraises_on_failed_bdm_prep(
self, mock_clean, mock_prepnet, mock_prep, mock_build, mock_save):
self, mock_clean, mock_prepnet, mock_prep, mock_build, mock_save,
mock_prepspawn, mock_failedspawn):
mock_save.return_value = self.instance
mock_build.return_value = self.network_info
mock_prep.side_effect = test.TestingException
@@ -6065,6 +6068,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.block_device_mapping)
mock_prepnet.assert_called_once_with(self.instance, self.network_info)
mock_clean.assert_called_once_with(self.instance, self.network_info)
mock_prepspawn.assert_called_once_with(self.instance)
mock_failedspawn.assert_called_once_with(self.instance)

@mock.patch('nova.virt.block_device.attach_block_devices',
side_effect=exception.VolumeNotCreated('oops!'))
@@ -6118,12 +6123,16 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
instance, hints)
mock_get.assert_called_once_with(self.context, uuids.group_hint)

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch.object(virt_driver.ComputeDriver,
'prepare_networks_before_block_device_mapping')
@mock.patch.object(virt_driver.ComputeDriver,
'clean_networks_preparation')
def test_failed_bdm_prep_from_delete_raises_unexpected(self, mock_clean,
mock_prepnet):
mock_prepnet,
mock_prepspawn,
mock_failedspawn):
with test.nested(
mock.patch.object(self.compute,
'_build_networks_for_instance',
@@ -6151,9 +6160,15 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
save.assert_has_calls([mock.call()])
mock_prepnet.assert_called_once_with(self.instance, self.network_info)
mock_clean.assert_called_once_with(self.instance, self.network_info)
mock_prepspawn.assert_called_once_with(self.instance)
mock_failedspawn.assert_called_once_with(self.instance)

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
def test_build_resources_aborts_on_failed_network_alloc(self, mock_build):
def test_build_resources_aborts_on_failed_network_alloc(self, mock_build,
mock_prepspawn,
mock_failedspawn):
mock_build.side_effect = test.TestingException

try:
@@ -6166,8 +6181,16 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):

mock_build.assert_called_once_with(self.context, self.instance,
self.requested_networks, self.security_groups)

def test_failed_network_alloc_from_delete_raises_unexpected(self):
# This exception is raised prior to initial prep and cleanup
# with the virt driver, and as such these should not record
# any calls.
mock_prepspawn.assert_not_called()
mock_failedspawn.assert_not_called()

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
def test_failed_network_alloc_from_delete_raises_unexpected(self,
mock_prepspawn, mock_failedspawn):
with mock.patch.object(self.compute,
'_build_networks_for_instance') as _build_networks:

@@ -6188,12 +6211,17 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
_build_networks.assert_has_calls(
[mock.call(self.context, self.instance,
self.requested_networks, self.security_groups)])
mock_prepspawn.assert_not_called()
mock_failedspawn.asset_not_called()

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch('nova.network.model.NetworkInfoAsyncWrapper.wait')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@mock.patch('nova.objects.Instance.save')
def test_build_resources_aborts_on_failed_allocations_get(
self, mock_save, mock_bn, mock_net_wait):
self, mock_save, mock_bn, mock_net_wait, mock_prepspawn,
mock_failedspawn):
mock_bn.return_value = self.network_info
mock_save.return_value = self.instance
self.mock_get_allocs.side_effect = exception.NotFound()
@@ -6210,12 +6238,17 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.mock_get_allocs.assert_called_once_with(self.context,
self.instance.uuid)
mock_net_wait.assert_called_once_with(do_raise=False)
mock_prepspawn.assert_called_once_with(self.instance)
mock_failedspawn.assert_called_once_with(self.instance)

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@mock.patch.object(manager.ComputeManager, '_shutdown_instance')
@mock.patch.object(objects.Instance, 'save')
def test_build_resources_cleans_up_and_reraises_on_spawn_failure(self,
mock_save, mock_shutdown, mock_build):
mock_save, mock_shutdown, mock_build,
mock_prepspawn, mock_failedspawn):
mock_save.return_value = self.instance
mock_build.return_value = self.network_info
test_exception = test.TestingException()
@@ -6237,13 +6270,20 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_shutdown.assert_called_once_with(self.context, self.instance,
self.block_device_mapping, self.requested_networks,
try_deallocate_networks=False)
mock_prepspawn.assert_called_once_with(self.instance)
# Complete should have occured with _shutdown_instance
# so calling after the fact is not necessary.
mock_failedspawn.assert_not_called()

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch('nova.network.model.NetworkInfoAsyncWrapper.wait')
@mock.patch(
'nova.compute.manager.ComputeManager._build_networks_for_instance')
@mock.patch('nova.objects.Instance.save')
def test_build_resources_instance_not_found_before_yield(
self, mock_save, mock_build_network, mock_info_wait):
self, mock_save, mock_build_network, mock_info_wait,
mock_prepspawn, mock_failedspawn):
mock_build_network.return_value = self.network_info
expected_exc = exception.InstanceNotFound(
instance_id=self.instance.uuid)
@@ -6258,7 +6298,11 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_build_network.assert_called_once_with(self.context, self.instance,
self.requested_networks, self.security_groups)
mock_info_wait.assert_called_once_with(do_raise=False)
mock_prepspawn.assert_called_once_with(self.instance)
mock_failedspawn.assert_called_once_with(self.instance)

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch('nova.network.model.NetworkInfoAsyncWrapper.wait')
@mock.patch(
'nova.compute.manager.ComputeManager._build_networks_for_instance')
@@ -6269,7 +6313,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
'clean_networks_preparation')
def test_build_resources_unexpected_task_error_before_yield(
self, mock_clean, mock_prepnet, mock_save, mock_build_network,
mock_info_wait):
mock_info_wait, mock_prepspawn, mock_failedspawn):
mock_build_network.return_value = self.network_info
mock_save.side_effect = exception.UnexpectedTaskStateError(
instance_uuid=uuids.instance, expected={}, actual={})
@@ -6285,13 +6329,18 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_info_wait.assert_called_once_with(do_raise=False)
mock_prepnet.assert_called_once_with(self.instance, self.network_info)
mock_clean.assert_called_once_with(self.instance, self.network_info)
mock_prepspawn.assert_called_once_with(self.instance)
mock_failedspawn.assert_called_once_with(self.instance)

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch('nova.network.model.NetworkInfoAsyncWrapper.wait')
@mock.patch(
'nova.compute.manager.ComputeManager._build_networks_for_instance')
@mock.patch('nova.objects.Instance.save')
def test_build_resources_exception_before_yield(
self, mock_save, mock_build_network, mock_info_wait):
self, mock_save, mock_build_network, mock_info_wait,
mock_prepspawn, mock_failedspawn):
mock_build_network.return_value = self.network_info
mock_save.side_effect = Exception()
try:
@@ -6304,13 +6353,18 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_build_network.assert_called_once_with(self.context, self.instance,
self.requested_networks, self.security_groups)
mock_info_wait.assert_called_once_with(do_raise=False)
mock_prepspawn.assert_called_once_with(self.instance)
mock_failedspawn.assert_called_once_with(self.instance)

@mock.patch.object(virt_driver.ComputeDriver, 'failed_spawn_cleanup')
@mock.patch.object(virt_driver.ComputeDriver, 'prepare_for_spawn')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@mock.patch.object(manager.ComputeManager, '_shutdown_instance')
@mock.patch.object(objects.Instance, 'save')
@mock.patch('nova.compute.manager.LOG')
def test_build_resources_aborts_on_cleanup_failure(self, mock_log,
mock_save, mock_shutdown, mock_build):
mock_save, mock_shutdown, mock_build,
mock_prepspawn, mock_failedspawn):
mock_save.return_value = self.instance
mock_build.return_value = self.network_info
mock_shutdown.side_effect = test.TestingException('Failed to shutdown')
@@ -6334,6 +6388,10 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_shutdown.assert_called_once_with(self.context, self.instance,
self.block_device_mapping, self.requested_networks,
try_deallocate_networks=False)
mock_prepspawn.assert_called_once_with(self.instance)
# Complete should have occured with _shutdown_instance
# so calling after the fact is not necessary.
mock_failedspawn.assert_not_called()

@mock.patch.object(manager.ComputeManager, '_allocate_network')
@mock.patch.object(network_api.API, 'get_instance_nw_info')


+ 76
- 3
nova/tests/unit/virt/ironic/test_driver.py View File

@@ -1209,9 +1209,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
{'path': '/instance_info/memory_mb', 'op': 'add',
'value': str(instance.flavor.memory_mb)},
{'path': '/instance_info/local_gb', 'op': 'add',
'value': str(node.properties.get('local_gb', 0))},
{'path': '/instance_uuid', 'op': 'add',
'value': instance.uuid}]
'value': str(node.properties.get('local_gb', 0))}]

if mock_call is not None:
# assert call() is invoked with retry_on_conflict False to
@@ -1404,6 +1402,7 @@ class IronicDriverTestCase(test.NoDBTestCase):

self.assertRaises(exception.ValidationError, self.driver.spawn,
self.ctx, instance, image_meta, [], None, {})
self.assertEqual(1, mock_node.get.call_count)
mock_node.get.assert_called_once_with(
node_uuid, fields=ironic_driver._NODE_FIELDS)
mock_avti.assert_called_once_with(self.ctx, instance, None)
@@ -2387,6 +2386,80 @@ class IronicDriverTestCase(test.NoDBTestCase):
self.driver._get_node_list)
mock_error.assert_called_once()

@mock.patch.object(cw.IronicClientWrapper, 'call')
def test_prepare_for_spawn(self, mock_call):
node = ironic_utils.get_test_node(driver='fake')
instance = fake_instance.fake_instance_obj(self.ctx,
node=node.uuid)
self.driver.prepare_for_spawn(instance)
expected_patch = [{'path': '/instance_uuid', 'op': 'add',
'value': instance.uuid}]
mock_call.has_calls(
[mock.call('node.get', node.uuid, mock.ANY),
mock.call('node.update', node.uuid,
expected_patch, retry_on_conflict=False)])

@mock.patch.object(cw.IronicClientWrapper, 'call')
def test__set_instance_uuid(self, mock_call):
node = ironic_utils.get_test_node(driver='fake')
instance = fake_instance.fake_instance_obj(self.ctx,
node=node.uuid)
expected_patch = [{'path': '/instance_uuid', 'op': 'add',
'value': instance.uuid}]
self.driver._set_instance_uuid(node, instance)
mock_call.has_calls(
[mock.call('node.update', node.uuid,
expected_patch, retry_on_conflict=False)])

def test_prepare_for_spawn_invalid_instance(self):
instance = fake_instance.fake_instance_obj(self.ctx,
node=None)
self.assertRaises(ironic_exception.BadRequest,
self.driver.prepare_for_spawn,
instance)

@mock.patch.object(cw.IronicClientWrapper, 'call')
def test_prepare_for_spawn_conflict(self, mock_call):
node = ironic_utils.get_test_node(driver='fake')
mock_call.side_effect = [node, ironic_exception.BadRequest]
instance = fake_instance.fake_instance_obj(self.ctx,
node=node.uuid)
self.assertRaises(exception.InstanceDeployFailure,
self.driver.prepare_for_spawn,
instance)

@mock.patch.object(ironic_driver.IronicDriver, '_cleanup_deploy')
@mock.patch.object(cw.IronicClientWrapper, 'call')
def test_failed_spawn_cleanup(self, mock_call, mock_cleanup):
node = ironic_utils.get_test_node(driver='fake')
instance = fake_instance.fake_instance_obj(self.ctx,
node=node.uuid)
self.driver.failed_spawn_cleanup(instance)
mock_call.assert_called_once_with('node.get_by_instance_uuid',
instance.uuid,
fields=ironic_driver._NODE_FIELDS)
self.assertEqual(1, mock_cleanup.call_count)

@mock.patch.object(ironic_driver.IronicDriver, '_stop_firewall')
@mock.patch.object(ironic_driver.IronicDriver, '_unplug_vifs')
@mock.patch.object(ironic_driver.IronicDriver,
'_cleanup_volume_target_info')
@mock.patch.object(cw.IronicClientWrapper, 'call')
def test__cleanup_deploy(self, mock_call, mock_vol, mock_unvif,
mock_stop_fw):
# TODO(TheJulia): This REALLY should be updated to cover all of the
# calls that take place.
node = ironic_utils.get_test_node(driver='fake')
instance = fake_instance.fake_instance_obj(self.ctx,
node=node.uuid)
self.driver._cleanup_deploy(node, instance)
mock_vol.assert_called_once_with(instance)
mock_unvif.assert_called_once_with(node, instance, None)
mock_stop_fw.assert_called_once_with(instance, None)
expected_patch = [{'path': '/instance_uuid', 'op': 'remove'}]
mock_call.has_calls(
[mock.call('node.update', node.uuid, expected_patch)])


class IronicDriverSyncTestCase(IronicDriverTestCase):



+ 28
- 0
nova/virt/driver.py View File

@@ -272,6 +272,34 @@ class ComputeDriver(object):
"""
raise NotImplementedError()

def prepare_for_spawn(self, instance):
"""Prepare to spawn instance.

Perform any pre-flight checks, tagging, etc. that the virt driver
must perform before executing the spawn process for a new instance.

:param instance: nova.objects.instance.Instance
This function should use the data there to guide
the creation of the new instance.
"""
pass

def failed_spawn_cleanup(self, instance):
"""Cleanup from the instance spawn.

Perform any hypervisor clean-up required should the spawn operation
fail, such as the removal of tags that were added during the
prepare_for_spawn method.

This method should be idempotent.

:param instance: nova.objects.instance.Instance
This function should use the data there to guide
the creation of the new instance.

"""
pass

def spawn(self, context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):


+ 41
- 4
nova/virt/ironic/driver.py View File

@@ -369,6 +369,45 @@ class IronicDriver(virt_driver.ComputeDriver):
def _stop_firewall(self, instance, network_info):
self.firewall_driver.unfilter_instance(instance, network_info)

def _set_instance_uuid(self, node, instance):

patch = [{'path': '/instance_uuid', 'op': 'add',
'value': instance.uuid}]
try:
# NOTE(TheJulia): Assert an instance UUID to lock the node
# from other deployment attempts while configuration is
# being set.
self.ironicclient.call('node.update', node.uuid, patch,
retry_on_conflict=False)
except ironic.exc.BadRequest:
msg = (_("Failed to reserve node %(node)s "
"when provisioning the instance %(instance)s")
% {'node': node.uuid, 'instance': instance.uuid})
LOG.error(msg)
raise exception.InstanceDeployFailure(msg)

def prepare_for_spawn(self, instance):
LOG.debug('Preparing to spawn instance %s.', instance.uuid)
node_uuid = instance.get('node')
if not node_uuid:
raise ironic.exc.BadRequest(
_("Ironic node uuid not supplied to "
"driver for instance %s.") % instance.uuid)
node = self._get_node(node_uuid)
self._set_instance_uuid(node, instance)

def failed_spawn_cleanup(self, instance):
LOG.debug('Failed spawn cleanup called for instance',
instance=instance)
try:
node = self._validate_instance_and_node(instance)
except exception.InstanceNotFound:
LOG.warning('Attempt to clean-up from failed spawn of '
'instance %s failed due to no instance_uuid '
'present on the node.', instance.uuid)
return
self._cleanup_deploy(node, instance)

def _add_instance_info_to_node(self, node, instance, image_meta, flavor,
preserve_ephemeral=None,
block_device_info=None):
@@ -382,9 +421,6 @@ class IronicDriver(virt_driver.ComputeDriver):
preserve_ephemeral,
boot_from_volume)

# Associate the node with an instance
patch.append({'path': '/instance_uuid', 'op': 'add',
'value': instance.uuid})
try:
# FIXME(lucasagomes): The "retry_on_conflict" parameter was added
# to basically causes the deployment to fail faster in case the
@@ -468,10 +504,11 @@ class IronicDriver(virt_driver.ComputeDriver):
'reason': e},
instance=instance)

def _cleanup_deploy(self, node, instance, network_info):
def _cleanup_deploy(self, node, instance, network_info=None):
self._cleanup_volume_target_info(instance)
self._unplug_vifs(node, instance, network_info)
self._stop_firewall(instance, network_info)
self._remove_instance_info_from_node(node, instance)

def _wait_for_active(self, instance):
"""Wait for the node to be marked as ACTIVE in Ironic."""


Loading…
Cancel
Save