diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 7622aae7ae15..7b59a268537f 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -855,6 +855,12 @@ class ComputeManager(manager.Manager): LOG.debug("Instance in transitional state %s at start-up " "clearing task state", instance['task_state'], instance=instance) + try: + self._post_interrupted_snapshot_cleanup(context, instance) + except Exception: + # we don't want that an exception blocks the init_host + msg = _LE('Failed to cleanup snapshot.') + LOG.exception(msg, instance=instance) instance.task_state = None instance.save() @@ -2939,6 +2945,9 @@ class ComputeManager(manager.Manager): msg = _("Image not found during snapshot") LOG.warn(msg, instance=instance) + def _post_interrupted_snapshot_cleanup(self, context, instance): + self.driver.post_interrupted_snapshot_cleanup(context, instance) + @object_compat @messaging.expected_exceptions(NotImplementedError) def volume_snapshot_create(self, context, instance, volume_id, diff --git a/nova/tests/compute/test_compute_mgr.py b/nova/tests/compute/test_compute_mgr.py index 5a721c039132..370e4efcfca2 100644 --- a/nova/tests/compute/test_compute_mgr.py +++ b/nova/tests/compute/test_compute_mgr.py @@ -484,10 +484,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): def _test_init_instance_cleans_image_states(self, instance): with mock.patch.object(instance, 'save') as save: self.compute._get_power_state = mock.Mock() + self.compute.driver.post_interrupted_snapshot_cleanup = mock.Mock() instance.info_cache = None instance.power_state = power_state.RUNNING self.compute._init_instance(self.context, instance) save.assert_called_once_with() + self.compute.driver.post_interrupted_snapshot_cleanup.\ + assert_called_once_with(self.context, instance) self.assertIsNone(instance.task_state) def test_init_instance_cleans_image_state_pending_upload(self): diff --git a/nova/tests/virt/test_virt_drivers.py b/nova/tests/virt/test_virt_drivers.py index 9212807ac5ed..a8062bdbccb0 100644 --- a/nova/tests/virt/test_virt_drivers.py +++ b/nova/tests/virt/test_virt_drivers.py @@ -263,6 +263,12 @@ class _VirtDriverTestCase(_FakeDriverBackendTestCase): self.connection.snapshot(self.ctxt, instance_ref, img_ref['id'], lambda *args, **kwargs: None) + @catch_notimplementederror + def test_post_interrupted_snapshot_cleanup(self): + instance_ref, network_info = self._get_running_instance() + self.connection.post_interrupted_snapshot_cleanup(self.ctxt, + instance_ref) + @catch_notimplementederror def test_reboot(self): reboot_type = "SOFT" diff --git a/nova/tests/virt/xenapi/test_driver.py b/nova/tests/virt/xenapi/test_driver.py index 8e5c5385633a..7276cc4b5a5c 100644 --- a/nova/tests/virt/xenapi/test_driver.py +++ b/nova/tests/virt/xenapi/test_driver.py @@ -15,6 +15,8 @@ import math +import mock + from nova.openstack.common import units from nova.tests.virt import test_driver from nova.tests.virt.xenapi import stubs @@ -27,6 +29,12 @@ class XenAPIDriverTestCase(stubs.XenAPITestBaseNoDB, test_driver.DriverAPITestHelper): """Unit tests for Driver operations.""" + def _get_driver(self): + stubs.stubout_session(self.stubs, stubs.FakeSessionForVMTests) + self.flags(connection_url='test_url', + connection_password='test_pass', group='xenserver') + return xenapi.XenAPIDriver(fake.FakeVirtAPI(), False) + def host_stats(self, refresh=True): return {'host_memory_total': 3 * units.Mi, 'host_memory_free_computed': 2 * units.Mi, @@ -40,11 +48,7 @@ class XenAPIDriverTestCase(stubs.XenAPITestBaseNoDB, 'pci_passthrough_devices': ''} def test_available_resource(self): - self.flags(connection_url='test_url', - connection_password='test_pass', group='xenserver') - stubs.stubout_session(self.stubs, stubs.FakeSessionForVMTests) - - driver = xenapi.XenAPIDriver(fake.FakeVirtAPI(), False) + driver = self._get_driver() driver._session.product_version = (6, 8, 2) self.stubs.Set(driver, 'get_host_stats', self.host_stats) @@ -62,10 +66,7 @@ class XenAPIDriverTestCase(stubs.XenAPITestBaseNoDB, self.assertEqual(1, resources['disk_available_least']) def test_overhead(self): - self.flags(connection_url='test_url', - connection_password='test_pass', group='xenserver') - stubs.stubout_session(self.stubs, stubs.FakeSessionForVMTests) - driver = xenapi.XenAPIDriver(fake.FakeVirtAPI(), False) + driver = self._get_driver() instance = {'memory_mb': 30720, 'vcpus': 4} # expected memory overhead per: @@ -78,10 +79,7 @@ class XenAPIDriverTestCase(stubs.XenAPITestBaseNoDB, self.assertEqual(expected, overhead['memory_mb']) def test_set_bootable(self): - self.flags(connection_url='test_url', connection_password='test_pass', - group='xenserver') - stubs.stubout_session(self.stubs, stubs.FakeSessionForVMTests) - driver = xenapi.XenAPIDriver(fake.FakeVirtAPI(), False) + driver = self._get_driver() self.mox.StubOutWithMock(driver._vmops, 'set_bootable') driver._vmops.set_bootable('inst', True) @@ -89,9 +87,15 @@ class XenAPIDriverTestCase(stubs.XenAPITestBaseNoDB, driver.set_bootable('inst', True) + def test_post_interrupted_snapshot_cleanup(self): + driver = self._get_driver() + fake_vmops_cleanup = mock.Mock() + driver._vmops.post_interrupted_snapshot_cleanup = fake_vmops_cleanup + + driver.post_interrupted_snapshot_cleanup("context", "instance") + + fake_vmops_cleanup.assert_called_once_with("context", "instance") + def test_public_api_signatures(self): - self.flags(connection_url='test_url', connection_password='test_pass', - group='xenserver') - stubs.stubout_session(self.stubs, stubs.FakeSessionForVMTests) - inst = xenapi.XenAPIDriver(fake.FakeVirtAPI(), False) + inst = self._get_driver() self.assertPublicAPISignatures(inst) diff --git a/nova/tests/virt/xenapi/test_vm_utils.py b/nova/tests/virt/xenapi/test_vm_utils.py index ee95fec38b37..c3ba4e51503a 100644 --- a/nova/tests/virt/xenapi/test_vm_utils.py +++ b/nova/tests/virt/xenapi/test_vm_utils.py @@ -2310,6 +2310,102 @@ class CreateVmRecordTestCase(VMUtilsTestBase): self.assertIn(vm_ref, result_keys) +class ChildVHDsTestCase(test.NoDBTestCase): + all_vdis = [ + ("my-vdi-ref", + {"uuid": "my-uuid", "sm_config": {}, + "is_a_snapshot": False, "other_config": {}}), + ("non-parent", + {"uuid": "uuid-1", "sm_config": {}, + "is_a_snapshot": False, "other_config": {}}), + ("diff-parent", + {"uuid": "uuid-1", "sm_config": {"vhd-parent": "other-uuid"}, + "is_a_snapshot": False, "other_config": {}}), + ("child", + {"uuid": "uuid-child", "sm_config": {"vhd-parent": "my-uuid"}, + "is_a_snapshot": False, "other_config": {}}), + ("child-snap", + {"uuid": "uuid-child-snap", "sm_config": {"vhd-parent": "my-uuid"}, + "is_a_snapshot": True, "other_config": {}}), + ] + + @mock.patch.object(vm_utils, '_get_all_vdis_in_sr') + def test_child_vhds_defaults(self, mock_get_all): + mock_get_all.return_value = self.all_vdis + + result = vm_utils._child_vhds("session", "sr_ref", "my-uuid") + + self.assertEqual(['uuid-child', 'uuid-child-snap'], result) + + @mock.patch.object(vm_utils, '_get_all_vdis_in_sr') + def test_child_vhds_only_snapshots(self, mock_get_all): + mock_get_all.return_value = self.all_vdis + + result = vm_utils._child_vhds("session", "sr_ref", "my-uuid", + old_snapshots_only=True) + + self.assertEqual(['uuid-child-snap'], result) + + def test_is_vdi_a_snapshot_works(self): + vdi_rec = {"is_a_snapshot": True, + "other_config": {}} + + self.assertTrue(vm_utils._is_vdi_a_snapshot(vdi_rec)) + + def test_is_vdi_a_snapshot_base_images_false(self): + vdi_rec = {"is_a_snapshot": True, + "other_config": {"image-id": "fake"}} + + self.assertFalse(vm_utils._is_vdi_a_snapshot(vdi_rec)) + + def test_is_vdi_a_snapshot_false_for_non_snapshot(self): + vdi_rec = {"is_a_snapshot": False, + "other_config": {}} + + self.assertFalse(vm_utils._is_vdi_a_snapshot(vdi_rec)) + + +class RemoveOldSnapshotsTestCase(test.NoDBTestCase): + + @mock.patch.object(vm_utils, '_child_vhds') + @mock.patch.object(vm_utils, '_get_vhd_parent_uuid') + @mock.patch.object(vm_utils, 'get_vdi_for_vm_safely') + @mock.patch.object(vm_utils, 'safe_find_sr') + def test_get_snapshots_for_vm(self, mock_find, mock_get_vdi, + mock_parent, mock_child_vhds): + session = mock.Mock() + instance = {"uuid": "uuid"} + mock_find.return_value = "sr_ref" + mock_get_vdi.return_value = ("vm_vdi_ref", "vm_vdi_rec") + mock_parent.return_value = "parent_uuid" + mock_child_vhds.return_value = [] + + result = vm_utils._get_snapshots_for_vm(session, instance, "vm_ref") + + self.assertEqual([], result) + mock_find.assert_called_once_with(session) + mock_get_vdi.assert_called_once_with(session, "vm_ref") + mock_parent.assert_called_once_with(session, "vm_vdi_ref") + mock_child_vhds.assert_called_once_with(session, "sr_ref", + "parent_uuid", old_snapshots_only=True) + + @mock.patch.object(vm_utils, 'scan_default_sr') + @mock.patch.object(vm_utils, 'safe_destroy_vdis') + @mock.patch.object(vm_utils, '_get_snapshots_for_vm') + def test_remove_old_snapshots(self, mock_get, mock_destroy, mock_scan): + session = mock.Mock() + instance = {"uuid": "uuid"} + mock_get.return_value = ["vdi_uuid1", "vdi_uuid2"] + session.VDI.get_by_uuid.return_value = "vdi_ref" + + vm_utils.remove_old_snapshots(session, instance, "vm_ref") + + self.assertTrue(mock_scan.called) + session.VDI.get_by_uuid.assert_called_once_with("vdi_uuid1") + mock_destroy.assert_called_once_with(session, ["vdi_ref"]) + mock_scan.assert_called_once_with(session) + + class ResizeFunctionTestCase(test.NoDBTestCase): def _call_get_resize_func_name(self, brand, version): session = mock.Mock() diff --git a/nova/tests/virt/xenapi/test_vmops.py b/nova/tests/virt/xenapi/test_vmops.py index 7fcea40f70f8..e0e7cbc33dde 100644 --- a/nova/tests/virt/xenapi/test_vmops.py +++ b/nova/tests/virt/xenapi/test_vmops.py @@ -933,6 +933,18 @@ class ResizeVdisTestCase(VMOpsTestBase): None, 5, 1000) +@mock.patch.object(vm_utils, 'remove_old_snapshots') +class CleanupFailedSnapshotTestCase(VMOpsTestBase): + def test_post_interrupted_snapshot_cleanup(self, mock_remove): + self.vmops._get_vm_opaque_ref = mock.Mock() + self.vmops._get_vm_opaque_ref.return_value = "vm_ref" + + self.vmops.post_interrupted_snapshot_cleanup("context", "instance") + + mock_remove.assert_called_once_with(self.vmops._session, + "instance", "vm_ref") + + class LiveMigrateHelperTestCase(VMOpsTestBase): def test_connect_block_device_volumes_none(self): self.assertEqual({}, self.vmops.connect_block_device_volumes(None)) diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 6edba8fdde23..f12757ad6ac9 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -486,6 +486,14 @@ class ComputeDriver(object): """ raise NotImplementedError() + def post_interrupted_snapshot_cleanup(self, context, instance): + """Cleans up any resources left after an interrupted snapshot. + + :param context: security context + :param instance: nova.objects.instance.Instance + """ + pass + def finish_migration(self, context, migration, instance, disk_info, network_info, image_meta, resize_instance, block_device_info=None, power_on=True): diff --git a/nova/virt/xenapi/driver.py b/nova/virt/xenapi/driver.py index 8e107ec16d48..97aa46f208bb 100644 --- a/nova/virt/xenapi/driver.py +++ b/nova/virt/xenapi/driver.py @@ -228,6 +228,10 @@ class XenAPIDriver(driver.ComputeDriver): """Create snapshot from a running VM instance.""" self._vmops.snapshot(context, instance, image_id, update_task_state) + def post_interrupted_snapshot_cleanup(self, context, instance): + """Cleans up any resources left after a failed snapshot.""" + self._vmops.post_interrupted_snapshot_cleanup(context, instance) + def reboot(self, context, instance, network_info, reboot_type, block_device_info=None, bad_volumes_callback=None): """Reboot VM instance.""" diff --git a/nova/virt/xenapi/vm_utils.py b/nova/virt/xenapi/vm_utils.py index e7106197dcf8..3f261a483488 100644 --- a/nova/virt/xenapi/vm_utils.py +++ b/nova/virt/xenapi/vm_utils.py @@ -728,6 +728,40 @@ def strip_base_mirror_from_vdis(session, vm_ref): _try_strip_base_mirror_from_vdi(session, vdi_ref) +def _get_snapshots_for_vm(session, instance, vm_ref): + sr_ref = safe_find_sr(session) + vm_vdi_ref, vm_vdi_rec = get_vdi_for_vm_safely(session, vm_ref) + parent_uuid = _get_vhd_parent_uuid(session, vm_vdi_ref) + + if not parent_uuid: + return [] + + return _child_vhds(session, sr_ref, parent_uuid, old_snapshots_only=True) + + +def remove_old_snapshots(session, instance, vm_ref): + """See if there is an snapshot present that should be removed.""" + LOG.debug("Starting remove_old_snapshots for VM", instance=instance) + + snapshot_uuids = _get_snapshots_for_vm(session, instance, vm_ref) + number_of_snapshots = len(snapshot_uuids) + + if number_of_snapshots <= 0: + LOG.debug("No snapshots to remove.", instance=instance) + return + + if number_of_snapshots > 1: + LOG.debug("More snapshots than expected, only deleting one.", + instance=instance) + + vdi_uuid = snapshot_uuids[0] + vdi_ref = session.VDI.get_by_uuid(vdi_uuid) + safe_destroy_vdis(session, [vdi_ref]) + scan_default_sr(session) + # TODO(johnthetubaguy): we could look for older snapshots too + LOG.debug("Removed one old snapshot.", instance=instance) + + @contextlib.contextmanager def snapshot_attached_here(session, instance, vm_ref, label, userdevice='0', post_snapshot_callback=None): @@ -2048,7 +2082,14 @@ def _walk_vdi_chain(session, vdi_uuid): vdi_uuid = parent_uuid -def _child_vhds(session, sr_ref, vdi_uuid): +def _is_vdi_a_snapshot(vdi_rec): + """Ensure VDI is a snapshot, and not cached image.""" + is_a_snapshot = vdi_rec['is_a_snapshot'] + image_id = vdi_rec['other_config'].get('image-id') + return is_a_snapshot and not image_id + + +def _child_vhds(session, sr_ref, vdi_uuid, old_snapshots_only=False): """Return the immediate children of a given VHD. This is not recursive, only the immediate children are returned. @@ -2064,9 +2105,12 @@ def _child_vhds(session, sr_ref, vdi_uuid): if parent_uuid != vdi_uuid: continue + if old_snapshots_only and not _is_vdi_a_snapshot(rec): + continue + children.add(rec_uuid) - return children + return list(children) def _count_parents_children(session, vdi_ref, sr_ref): diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py index 4d276799ed3b..0cba7cda196d 100644 --- a/nova/virt/xenapi/vmops.py +++ b/nova/virt/xenapi/vmops.py @@ -752,6 +752,11 @@ class VMOps(object): LOG.debug("Finished snapshot and upload for VM", instance=instance) + def post_interrupted_snapshot_cleanup(self, context, instance): + """Cleans up any resources left after a failed snapshot.""" + vm_ref = self._get_vm_opaque_ref(instance) + vm_utils.remove_old_snapshots(self._session, instance, vm_ref) + def _get_orig_vm_name_label(self, instance): return instance['name'] + '-orig'