diff --git a/cinder/tests/unit/volume/drivers/test_rbd.py b/cinder/tests/unit/volume/drivers/test_rbd.py index f1ffeb89ee4..9952f8a722d 100644 --- a/cinder/tests/unit/volume/drivers/test_rbd.py +++ b/cinder/tests/unit/volume/drivers/test_rbd.py @@ -193,6 +193,7 @@ class RBDTestCase(test.TestCase): cfg.rados_connection_interval = 5 cfg.backup_use_temp_snapshot = False cfg.enable_deferred_deletion = False + cfg.rbd_concurrent_flatten_operations = 3 # Because the mocked conf doesn't actually have an underlying oslo conf # it doesn't have the set_default method, so we use a fake one. @@ -792,15 +793,15 @@ class RBDTestCase(test.TestCase): self.driver.delete_volume(self.volume_a) mock_get_clone_info.assert_called_once_with( - self.mock_rbd.Image.return_value, + self.mock_proxy.return_value.__enter__.return_value, self.volume_a.name, None) - (self.driver.rbd.Image.return_value - .list_snaps.assert_called_once_with()) + self.mock_proxy.return_value.__enter__.return_value.\ + list_snaps.assert_called_once_with() client.__enter__.assert_called_once_with() client.__exit__.assert_called_once_with(None, None, None) mock_delete_backup_snaps.assert_called_once_with( - self.mock_rbd.Image.return_value) + self.mock_proxy.return_value.__enter__.return_value) self.assertFalse( self.driver.rbd.Image.return_value.unprotect_snap.called) self.assertEqual( @@ -827,16 +828,16 @@ class RBDTestCase(test.TestCase): self.driver.delete_volume(self.volume_a) mock_get_clone_info.assert_called_once_with( - self.mock_rbd.Image.return_value, + self.mock_proxy.return_value.__enter__.return_value, self.volume_a.name, None) - m_del_clone_parent_refs.assert_called_once() - (self.driver.rbd.Image.return_value - .list_snaps.assert_called_once_with()) + m_del_clone_parent_refs.assert_not_called() + self.mock_proxy.return_value.__enter__.return_value.list_snaps.\ + assert_called_once_with() client.__enter__.assert_called_once_with() client.__exit__.assert_called_once_with(None, None, None) m_del_back_snaps.assert_called_once_with( - self.mock_rbd.Image.return_value) + self.mock_proxy.return_value.__enter__.return_value) self.assertFalse( self.driver.rbd.Image.return_value.unprotect_snap.called) self.assertEqual( @@ -859,18 +860,17 @@ class RBDTestCase(test.TestCase): drv.delete_volume(self.volume_a) mock_get_clone_info.assert_called_once_with( - drv.rbd.Image.return_value, + self.mock_proxy.return_value.__enter__.return_value, self.volume_a.name, None) - drv.rbd.Image.return_value.list_snaps.assert_called_once_with() client.__enter__.assert_called_once_with() client.__exit__.assert_called_once_with(None, None, None) mock_delete_backup_snaps.assert_called_once_with( - drv.rbd.Image.return_value) + self.mock_proxy.return_value.__enter__.return_value) self.assertFalse( drv.rbd.Image.return_value.unprotect_snap.called) self.assertEqual( - 1, drv.rbd.RBD.return_value.trash_move.call_count) + 0, drv.rbd.RBD.return_value.trash_move.call_count) self.driver.rbd.RBD.return_value.remove.assert_not_called() @common_mocks @@ -926,7 +926,7 @@ class RBDTestCase(test.TestCase): drv.delete_volume(self.volume_a) self.assertEqual( - 1, drv.rbd.RBD.return_value.trash_move.call_count) + 0, drv.rbd.RBD.return_value.trash_move.call_count) @common_mocks def test_deferred_deletion_w_deleted_parent(self): @@ -940,16 +940,18 @@ class RBDTestCase(test.TestCase): drv.delete_volume(self.volume_a) self.assertEqual( - 2, drv.rbd.RBD.return_value.trash_move.call_count) + 0, drv.rbd.RBD.return_value.trash_move.call_count) @common_mocks def test_delete_volume_not_found_at_open(self): self.mock_rbd.Image.side_effect = self.mock_rbd.ImageNotFound + self.mock_proxy.side_effect = self.mock_rbd.ImageNotFound self.assertIsNone(self.driver.delete_volume(self.volume_a)) with mock.patch.object(driver, 'RADOSClient') as client: client = self.mock_client.return_value.__enter__.return_value - self.mock_rbd.Image.assert_called_once_with(client.ioctx, - self.volume_a.name) + self.mock_proxy.assert_called_once_with(self.driver, + self.volume_a.name, + ioctx=client.ioctx) # Make sure the exception was raised self.assertEqual([self.mock_rbd.ImageNotFound], RAISED_EXCEPTIONS) @@ -958,45 +960,58 @@ class RBDTestCase(test.TestCase): self.mock_rbd.Image.return_value.list_snaps.return_value = [] self.mock_rbd.RBD.return_value.remove.side_effect = ( - self.mock_rbd.ImageBusy) - - mock_get_clone_info = self.mock_object(self.driver, '_get_clone_info') - mock_get_clone_info.return_value = (None, None, None) + self.mock_rbd.ImageBusy, + None) mock_delete_backup_snaps = self.mock_object(self.driver, '_delete_backup_snaps') mock_rados_client = self.mock_object(driver, 'RADOSClient') + mock_flatten = self.mock_object(self.driver, '_flatten') - self.driver.delete_volume(self.volume_a) + with mock.patch.object(self.driver, '_get_clone_info') as \ + mock_get_clone_info: + mock_get_clone_info.return_value = (None, None, None) + self.driver.rbd.Image.return_value.list_children.\ + return_value = [('pool1', 'child1'), + ('pool1', 'child2')] + self.mock_proxy.return_value.__enter__.return_value.list_children.\ + return_value = [('pool1', 'child1'), ('pool1', 'child2')] + self.driver.delete_volume(self.volume_a) - mock_get_clone_info.assert_called_once_with( - self.mock_rbd.Image.return_value, - self.volume_a.name, - None) - self.mock_rbd.Image.return_value.list_snaps.assert_called_once_with() - mock_rados_client.assert_called_once_with(self.driver) - mock_delete_backup_snaps.assert_called_once_with( - self.mock_rbd.Image.return_value) - self.assertFalse( - self.mock_rbd.Image.return_value.unprotect_snap.called) - self.assertEqual( - 1, self.mock_rbd.RBD.return_value.remove.call_count) - self.assertEqual(1, len(RAISED_EXCEPTIONS)) - # Make sure the exception was raised - self.assertIn(self.mock_rbd.ImageBusy, RAISED_EXCEPTIONS) + mock_flatten.assert_has_calls( + [mock.call('pool1', 'child1'), + mock.call('pool1', 'child2')]) - self.mock_rbd.RBD.return_value.trash_move.\ - assert_called_once_with( - mock.ANY, + mock_get_clone_info.assert_called_once_with( + self.mock_proxy.return_value.__enter__.return_value, self.volume_a.name, - 0) + None) + self.mock_proxy.return_value.__enter__.return_value.list_snaps.\ + assert_called_once_with() + mock_rados_client.assert_called_once_with(self.driver) + mock_delete_backup_snaps.assert_called_once_with( + self.mock_proxy.return_value.__enter__.return_value) + self.assertFalse( + self.mock_rbd.Image.return_value.unprotect_snap. + called) + self.assertEqual( + 2, + self.mock_rbd.RBD.return_value.remove.call_count) + self.assertEqual(1, len(RAISED_EXCEPTIONS)) + # Make sure the exception was raised + self.assertIn(self.mock_rbd.ImageBusy, + RAISED_EXCEPTIONS) + + self.mock_rbd.RBD.return_value.trash_move.assert_not_called() @common_mocks def test_delete_volume_has_snapshots(self): self.mock_rbd.Image.return_value.list_snaps.return_value = [] self.mock_rbd.RBD.return_value.remove.side_effect = ( - self.mock_rbd.ImageHasSnapshots) + self.mock_rbd.ImageHasSnapshots, # initial vol remove attempt + None # removal of child image + ) mock_get_clone_info = self.mock_object(self.driver, '_get_clone_info', return_value=(None, @@ -1005,18 +1020,70 @@ class RBDTestCase(test.TestCase): m_del_backup_snaps = self.mock_object(self.driver, '_delete_backup_snaps') + mock_try_remove_volume = self.mock_object(self.driver, + '_try_remove_volume', + return_value=True) + + mock_rados_client = self.mock_object(driver, 'RADOSClient') + + self.driver.delete_volume(self.volume_a) + + mock_get_clone_info.assert_called_once_with( + self.mock_proxy.return_value.__enter__.return_value, + self.volume_a.name, + None) + mock_rados_client.assert_called_once_with(self.driver) + m_del_backup_snaps.assert_called_once_with( + self.mock_proxy.return_value.__enter__.return_value) + self.assertFalse( + self.mock_rbd.Image.return_value.unprotect_snap.called) + self.assertEqual( + 1, self.mock_rbd.RBD.return_value.remove.call_count) + self.assertEqual(1, len(RAISED_EXCEPTIONS)) + # Make sure the exception was raised + self.assertIn(self.mock_rbd.ImageHasSnapshots, + RAISED_EXCEPTIONS) + + self.mock_rbd.RBD.return_value.trash_move.assert_not_called() + + mock_try_remove_volume.assert_called_once_with(mock.ANY, + self.volume_a.name) + + @common_mocks + def test_delete_volume_has_snapshots_trash(self): + self.mock_rbd.Image.return_value.list_snaps.return_value = [] + + self.mock_rbd.RBD.return_value.remove.side_effect = ( + self.mock_rbd.ImageHasSnapshots, # initial vol remove attempt + None # removal of child image + ) + mock_get_clone_info = self.mock_object(self.driver, + '_get_clone_info', + return_value=(None, + None, + None)) + m_del_backup_snaps = self.mock_object(self.driver, + '_delete_backup_snaps') + + mock_try_remove_volume = self.mock_object(self.driver, + '_try_remove_volume', + return_value=False) + + mock_trash_volume = self.mock_object(self.driver, + '_move_volume_to_trash') + with mock.patch.object(driver, 'RADOSClient') as mock_rados_client: self.driver.delete_volume(self.volume_a) mock_get_clone_info.assert_called_once_with( - self.mock_rbd.Image.return_value, + self.mock_proxy.return_value.__enter__.return_value, self.volume_a.name, None) - (self.mock_rbd.Image.return_value.list_snaps - .assert_called_once_with()) + self.mock_proxy.return_value.__enter__.return_value.list_snaps.\ + assert_called_once_with() mock_rados_client.assert_called_once_with(self.driver) m_del_backup_snaps.assert_called_once_with( - self.mock_rbd.Image.return_value) + self.mock_proxy.return_value.__enter__.return_value) self.assertFalse( self.mock_rbd.Image.return_value.unprotect_snap.called) self.assertEqual( @@ -1027,10 +1094,14 @@ class RBDTestCase(test.TestCase): RAISED_EXCEPTIONS) self.mock_rbd.RBD.return_value.trash_move.\ - assert_called_once_with( - mock.ANY, - self.volume_a.name, - 0) + assert_not_called() + + mock_trash_volume.assert_called_once_with(mock.ANY, + self.volume_a.name, + 0) + + mock_try_remove_volume.assert_called_once_with(mock.ANY, + self.volume_a.name) @common_mocks def test_delete_volume_not_found(self): @@ -1046,15 +1117,20 @@ class RBDTestCase(test.TestCase): mock_get_clone_info = self.mock_object(self.driver, '_get_clone_info') mock_get_clone_info.return_value = (None, None, None) + mock_find_clone_snap = self.mock_object(self.driver, + '_find_clone_snap', + return_value=None) + self.assertIsNone(self.driver.delete_volume(self.volume_a)) + image = self.mock_proxy.return_value.__enter__.return_value mock_get_clone_info.assert_called_once_with( - self.mock_rbd.Image.return_value, + image, self.volume_a.name, None) - self.mock_rbd.Image.return_value.list_snaps.assert_called_once_with() + mock_find_clone_snap.assert_called_once_with(image) mock_rados_client.assert_called_once_with(self.driver) - mock_delete_backup_snaps.assert_called_once_with( - self.mock_rbd.Image.return_value) + mock_delete_backup_snaps.assert_called_once_with(image) + self.assertFalse( self.mock_rbd.Image.return_value.unprotect_snap.called) self.assertEqual( @@ -1083,21 +1159,22 @@ class RBDTestCase(test.TestCase): return_value=(None, None, None)) + + self.mock_object(self.driver, '_find_clone_snap', + return_value=snapshots[2]['name']) with mock.patch.object(self.driver, '_delete_backup_snaps') as \ mock_delete_backup_snaps: self.driver.delete_volume(self.volume_a) mock_get_clone_info.assert_called_once_with( - self.mock_rbd.Image.return_value, + self.mock_proxy.return_value.__enter__.return_value, self.volume_a.name, snapshots[2]['name']) - (self.driver.rbd.Image.return_value - .list_snaps.assert_called_once_with()) client.__enter__.assert_called_once_with() client.__exit__.assert_called_once_with(None, None, None) mock_delete_backup_snaps.assert_called_once_with( - self.mock_rbd.Image.return_value) + self.mock_proxy.return_value.__enter__.return_value) self.assertFalse( self.driver.rbd.Image.return_value.unprotect_snap.called) self.assertEqual( @@ -1281,26 +1358,40 @@ class RBDTestCase(test.TestCase): proxy.__enter__.return_value = proxy proxy.unprotect_snap.side_effect = ( + self.mock_rbd.ImageBusy, + None) + + with mock.patch.object(self.driver, '_flatten_children') as \ + mock_flatten_children: + self.driver.delete_snapshot(self.snapshot) + + mock_flatten_children.assert_called_once_with(mock.ANY, + self.volume_a.name, + self.snapshot.name) + + self.assertTrue(proxy.unprotect_snap.called) + self.assertTrue(proxy.remove_snap.called) + + @common_mocks + @mock.patch.object(driver.RBDDriver, '_flatten') + @mock.patch('cinder.objects.Volume.get_by_id') + def test_delete_busy_snapshot_fail(self, volume_get_by_id, flatten_mock): + volume_get_by_id.return_value = self.volume_a + proxy = self.mock_proxy.return_value + proxy.__enter__.return_value = proxy + + proxy.unprotect_snap.side_effect = ( + self.mock_rbd.ImageBusy, + self.mock_rbd.ImageBusy, self.mock_rbd.ImageBusy) + flatten_mock.side_effect = exception.SnapshotIsBusy(self.snapshot.name) - with mock.patch.object(self.driver, '_get_children_info') as \ - mock_get_children_info: - mock_get_children_info.return_value = [('pool', 'volume2')] + self.assertRaises(exception.SnapshotIsBusy, + self.driver.delete_snapshot, + self.snapshot) - with mock.patch.object(driver, 'LOG') as \ - mock_log: - - self.assertRaises(exception.SnapshotIsBusy, - self.driver.delete_snapshot, - self.snapshot) - - mock_get_children_info.assert_called_once_with( - proxy, - self.snapshot.name) - - self.assertTrue(mock_log.info.called) - self.assertTrue(proxy.unprotect_snap.called) - self.assertFalse(proxy.remove_snap.called) + self.assertTrue(proxy.unprotect_snap.called) + self.assertFalse(proxy.remove_snap.called) @common_mocks @mock.patch('cinder.objects.Volume.get_by_id') @@ -1325,19 +1416,6 @@ class RBDTestCase(test.TestCase): self.snapshot) image.rollback_to_snap.assert_called_once_with(self.snapshot.name) - @common_mocks - def test_get_children_info(self): - volume = self.mock_proxy - volume.set_snap = mock.Mock() - volume.list_children = mock.Mock() - list_children = [('pool', 'volume2')] - volume.list_children.return_value = list_children - - info = self.driver._get_children_info(volume, - self.snapshot['name']) - - self.assertEqual(list_children, info) - @common_mocks def test_get_clone_info(self): volume = self.mock_rbd.Image() diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index 7b92f926391..0b3ccb9f044 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -1,4 +1,5 @@ # Copyright 2013 OpenStack Foundation +# Copyright 2022 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -133,6 +134,9 @@ RBD_OPTS = [ cfg.IntOpt('deferred_deletion_purge_interval', default=60, help='Number of seconds between runs of the periodic task ' 'to purge volumes tagged for deletion.'), + cfg.IntOpt('rbd_concurrent_flatten_operations', default=3, min=0, + help='Number of flatten operations that will run ' + 'concurrently on this volume service.') ] CONF = cfg.CONF @@ -356,6 +360,10 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, self.keyring_data: Optional[str] = None self._set_keyring_attributes() + self._semaphore = utils.semaphore_factory( + limit=self.configuration.rbd_concurrent_flatten_operations, + concurrent_processes=1) + def _set_keyring_attributes(self) -> None: # The rbd_keyring_conf option is not available for OpenStack usage # for security reasons (OSSN-0085) and in OpenStack we use @@ -830,6 +838,25 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, 'dst_size': volume.size}) self._resize(volume) + def _flatten_volume( + self, + image_name: str, + client: RADOSClient) -> None: + + # Flatten destination volume + try: + with RBDVolumeProxy(self, image_name, client=client, + ioctx=client.ioctx) as dest_volume: + LOG.debug("flattening volume %s", image_name) + dest_volume.flatten() + except Exception as e: + msg = (_("Failed to flatten volume %(volume)s with " + "error: %(error)s.") % + {'volume': image_name, + 'error': e}) + LOG.exception(msg) + raise exception.VolumeBackendAPIException(data=msg) + def create_cloned_volume( self, volume: Volume, @@ -890,24 +917,12 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, # volumes are always flattened. if (volume.use_quota and depth >= self.configuration.rbd_max_clone_depth): + LOG.info("maximum clone depth (%d) has been reached - " "flattening dest volume", self.configuration.rbd_max_clone_depth) - # Flatten destination volume - try: - with RBDVolumeProxy(self, dest_name, client=client, - ioctx=client.ioctx) as dest_volume: - LOG.debug("flattening dest volume %s", dest_name) - dest_volume.flatten() - except Exception as e: - msg = (_("Failed to flatten volume %(volume)s with " - "error: %(error)s.") % - {'volume': dest_name, - 'error': e}) - LOG.exception(msg) - src_volume.close() - raise exception.VolumeBackendAPIException(data=msg) + self._flatten_volume(dest_name, client) try: # remove temporary snap @@ -1168,11 +1183,22 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, return volume_update + @utils.limit_operations + def _do_flatten(self, volume_name: str, pool: str) -> None: + LOG.debug('flattening %s/%s', pool, volume_name) + try: + with RBDVolumeProxy(self, volume_name, pool=pool) as vol: + vol.flatten() + LOG.debug('flattening of %s/%s has completed', + pool, volume_name) + except self.rbd.ImageNotFound: + LOG.debug('image %s not found during flatten', volume_name) + # do nothing + def _flatten(self, pool: str, volume_name: str) -> None: - LOG.debug('flattening %(pool)s/%(img)s', - dict(pool=pool, img=volume_name)) - with RBDVolumeProxy(self, volume_name, pool) as vol: - vol.flatten() + image = pool + '/' + volume_name + LOG.debug('Queueing %s for flattening', image) + self._do_flatten(volume_name, pool) def _get_stripe_unit(self, ioctx: 'rados.Ioctx', volume_name: str) -> int: """Return the correct stripe unit for a cloned volume. @@ -1310,23 +1336,6 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, return (None, None, None) - def _get_children_info(self, - volume: 'rbd.Image', - snap: Optional[str]) -> list[tuple]: - """List children for the given snapshot of a volume(image). - - Returns a list of (pool, image). - """ - - children_list = [] - - if snap: - volume.set_snap(snap) - children_list = volume.list_children() - volume.set_snap(None) - - return children_list - def _delete_clone_parent_refs(self, client: RADOSClient, parent_name: str, @@ -1369,96 +1378,161 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, g_parent_snap = typing.cast(str, g_parent_snap) self._delete_clone_parent_refs(client, g_parent, g_parent_snap) - def delete_volume(self, volume: Volume) -> None: - """Deletes a logical volume.""" - volume_name = volume.name - with RADOSClient(self) as client: + def _flatten_children(self, + client_ioctx: 'rados.Ioctx', + volume_name: str, + snap_name: Optional[str] = None) -> None: + with RBDVolumeProxy(self, + volume_name, + ioctx=client_ioctx) as rbd_image: + if snap_name is not None: + rbd_image.set_snap(snap_name) + + children_list = rbd_image.list_children() + + for (pool, child_name) in children_list: + LOG.info('Image %(pool)s/%(image)s%(snap)s is dependent ' + 'on the image %(volume_name)s.', + {'pool': pool, + 'image': child_name, + 'volume_name': volume_name, + 'snap': '@' + snap_name if snap_name else ''}) try: - rbd_image = self.rbd.Image(client.ioctx, volume_name) - except self.rbd.ImageNotFound: - LOG.info("volume %s no longer exists in backend", - volume_name) - return + self._flatten(pool, child_name) + except Exception as e: + LOG.error(e) + raise - clone_snap = None - parent = None + def _move_volume_to_trash(self, + client_ioctx: 'rados.Ioctx', + volume_name: str, + delay: int) -> None: + # trash_move() will succeed in some situations when a regular + # remove() call will fail due to image dependencies + LOG.debug("moving volume %s to trash", volume_name) + try: + self.RBDProxy().trash_move(client_ioctx, + volume_name, + delay) + except self.rbd.ImageBusy: + msg = _('ImageBusy error raised while trashing RBD ' + 'volume.') + LOG.warning(msg) + raise exception.VolumeIsBusy(msg, volume_name=volume_name) - # Ensure any backup snapshots are deleted - self._delete_backup_snaps(rbd_image) - - # If the volume has non-clone snapshots this delete is expected to - # raise VolumeIsBusy so do so straight away. + def _try_remove_volume(self, client: 'rados', volume_name: str) -> bool: + # Try a couple of times to delete the volume, rather than + # stopping on the first error. + # In the event of simultaneous Cinder delete operations, + # this gives a window for other deletes of snapshots and images + # to complete, freeing dependencies which allow this remove to + # succeed. + @utils.retry((self.rbd.ImageBusy, self.rbd.ImageHasSnapshots), + self.configuration.rados_connection_interval, + self.configuration.rados_connection_retries) + def _do_try_remove_volume(self, client, volume_name: str) -> bool: try: - snaps = rbd_image.list_snaps() - for snap in snaps: - if snap['name'].endswith('.clone_snap'): - LOG.debug("volume has clone snapshot(s)") - # We grab one of these and use it when fetching parent - # info in case the volume has been flattened. - clone_snap = snap['name'] - break + LOG.debug('Trying to remove image %s', volume_name) + self.RBDProxy().remove(client.ioctx, volume_name) + return True + except (self.rbd.ImageHasSnapshots, self.rbd.ImageBusy): + with excutils.save_and_reraise_exception(): + msg = _('deletion failed') + LOG.info(msg) + + return False + + return _do_try_remove_volume(self, client, volume_name) + + @staticmethod + def _find_clone_snap(rbd_image: RBDVolumeProxy) -> Optional[str]: + snaps = rbd_image.list_snaps() + for snap in snaps: + if snap['name'].endswith('.clone_snap'): + LOG.debug("volume has clone snapshot(s)") + # We grab one of these and use it when fetching parent + # info in case the volume has been flattened. + clone_snap = snap['name'] + return clone_snap + + return None + + def _delete_volume(self, volume: Volume, client: RADOSClient) -> None: + + clone_snap = None + parent = None + parent_snap = None + + try: + with RBDVolumeProxy(self, + volume.name, + ioctx=client.ioctx) as rbd_image: + # Ensure any backup snapshots are deleted + self._delete_backup_snaps(rbd_image) + + clone_snap = self._find_clone_snap(rbd_image) # Determine if this volume is itself a clone _pool, parent, parent_snap = self._get_clone_info(rbd_image, - volume_name, + volume.name, clone_snap) - finally: - rbd_image.close() + except self.rbd.ImageNotFound: + LOG.info("volume %s no longer exists in backend", volume.name) + return - @utils.retry(self.rbd.ImageBusy, - self.configuration.rados_connection_interval, - self.configuration.rados_connection_retries) - def _try_remove_volume(client: Any, volume_name: str) -> None: - if self.configuration.enable_deferred_deletion: - delay = self.configuration.deferred_deletion_delay - else: - try: - self.RBDProxy().remove(client.ioctx, volume_name) - return - except (self.rbd.ImageHasSnapshots, self.rbd.ImageBusy): - delay = 0 - LOG.debug("moving volume %s to trash", volume_name) - # When using the RBD v2 clone api, deleting a volume - # that has a snapshot in the trash space raises a - # busy exception. - # In order to solve this, call the trash operation - # which should succeed when the volume has - # dependencies. - self.RBDProxy().trash_move(client.ioctx, - volume_name, - delay) + if clone_snap is not None: + # If the volume has copy-on-write clones, keep it as a silent + # volume which will be deleted when its snapshots and clones + # are deleted. + # TODO: only do this if it actually can't be deleted? + new_name = "%s.deleted" % (volume.name) + self.RBDProxy().rename(client.ioctx, volume.name, new_name) + return - if clone_snap is None: - LOG.debug("deleting rbd volume %s", volume_name) - try: - _try_remove_volume(client, volume_name) - except self.rbd.ImageBusy: - msg = (_("ImageBusy error raised while deleting rbd " - "volume. This may have been caused by a " - "connection from a client that has crashed and, " - "if so, may be resolved by retrying the delete " - "after 30 seconds has elapsed.")) - LOG.warning(msg) - # Now raise this so that the volume stays available and the - # deletion can be retried. - raise exception.VolumeIsBusy(msg, volume_name=volume_name) - except self.rbd.ImageNotFound: - LOG.info("RBD volume %s not found, allowing delete " - "operation to proceed.", volume_name) - return + LOG.debug("deleting RBD volume %s", volume.name) - # If it is a clone, walk back up the parent chain deleting - # references. - if parent: - LOG.debug("volume is a clone so cleaning references") - parent_snap = typing.cast(str, parent_snap) - self._delete_clone_parent_refs(client, parent, parent_snap) - else: - # If the volume has copy-on-write clones we will not be able to - # delete it. Instead we will keep it as a silent volume which - # will be deleted when it's snapshot and clones are deleted. - new_name = "%s.deleted" % (volume_name) - self.RBDProxy().rename(client.ioctx, volume_name, new_name) + try: + self.RBDProxy().remove(client.ioctx, volume.name) + return # the fast path was successful + except (self.rbd.ImageHasSnapshots, self.rbd.ImageBusy): + self._flatten_children(client.ioctx, volume.name) + except self.rbd.ImageNotFound: + LOG.info("RBD volume %s not found, allowing delete " + "operation to proceed.", volume.name) + return + + try: + if self._try_remove_volume(client, volume.name): + return + except self.rbd.ImageHasSnapshots: + # perform trash instead, which can succeed when snapshots exist + pass + except self.rbd.ImageBusy: + msg = _('ImageBusy error raised while deleting RBD volume') + raise exception.VolumeIsBusy(msg, volume_name=volume.name) + + delay = 0 + if self.configuration.enable_deferred_deletion: + delay = self.configuration.deferred_deletion_delay + + # Since it failed to remove above, trash the volume here instead. + # This covers the scenario of an image unable to be deleted because + # a child snapshot of it has been trashed but not yet removed. + # That snapshot is not visible but is still in the dependency + # chain of RBD images. + self._move_volume_to_trash(client.ioctx, volume.name, delay) + + # If it is a clone, walk back up the parent chain deleting + # references. + if parent: + LOG.debug("volume is a clone so cleaning references") + parent_snap = typing.cast(str, parent_snap) + self._delete_clone_parent_refs(client, parent, parent_snap) + + def delete_volume(self, volume: Volume) -> None: + """Deletes an RBD volume.""" + with RADOSClient(self) as client: + self._delete_volume(volume, client) def create_snapshot(self, snapshot: Snapshot) -> None: """Creates an rbd snapshot.""" @@ -1472,38 +1546,42 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, volume_name = snapshot.volume_name snap_name = snapshot.name + @utils.retry(self.rbd.ImageBusy, + self.configuration.rados_connection_interval, + self.configuration.rados_connection_retries) + def do_unprotect_snap(self, volume_name, snap_name): + try: + with RBDVolumeProxy(self, volume_name) as volume: + volume.unprotect_snap(snap_name) + except self.rbd.InvalidArgument: + LOG.info( + "InvalidArgument: Unable to unprotect snapshot %s.", + snap_name) + except self.rbd.ImageNotFound as e: + LOG.info("Snapshot %s does not exist in backend.", + snap_name) + raise e + except self.rbd.ImageBusy as e: + # flatten and then retry the operation + with RADOSClient(self) as client: + self._flatten_children(client.ioctx, + volume_name, + snap_name) + raise e + + try: + do_unprotect_snap(self, volume_name, snap_name) + except self.rbd.ImageBusy: + raise exception.SnapshotIsBusy(snapshot_name=snap_name) + except self.rbd.ImageNotFound: + return + try: with RBDVolumeProxy(self, volume_name) as volume: - try: - volume.unprotect_snap(snap_name) - except self.rbd.InvalidArgument: - LOG.info( - "InvalidArgument: Unable to unprotect snapshot %s.", - snap_name) - except self.rbd.ImageNotFound: - LOG.info("Snapshot %s does not exist in backend.", - snap_name) - return - except self.rbd.ImageBusy: - children_list = self._get_children_info(volume, snap_name) - - if children_list: - for (pool, image) in children_list: - LOG.info('Image %(pool)s/%(image)s is dependent ' - 'on the snapshot %(snap)s.', - {'pool': pool, - 'image': image, - 'snap': snap_name}) - - raise exception.SnapshotIsBusy(snapshot_name=snap_name) - - try: - volume.remove_snap(snap_name) - except self.rbd.ImageNotFound: - LOG.info("Snapshot %s does not exist in backend.", - snap_name) + volume.remove_snap(snap_name) except self.rbd.ImageNotFound: - LOG.warning("Volume %s does not exist in backend.", volume_name) + LOG.info("Snapshot %s does not exist in backend.", + snap_name) def snapshot_revert_use_temp_snapshot(self) -> bool: """Disable the use of a temporary snapshot on revert.""" diff --git a/releasenotes/notes/rbd-flatten-child-volumes-4cb0b7fcf3a1df5e.yaml b/releasenotes/notes/rbd-flatten-child-volumes-4cb0b7fcf3a1df5e.yaml new file mode 100644 index 00000000000..a6285476571 --- /dev/null +++ b/releasenotes/notes/rbd-flatten-child-volumes-4cb0b7fcf3a1df5e.yaml @@ -0,0 +1,17 @@ +--- +upgrade: + - | + Cinder now uses the RBD trash functionality to handle some volume deletions. + Therefore, deployments must either a) enable scheduled RBD trash purging on + the RBD backend or b) enable the Cinder RBD driver's enable_deferred_deletion + option to have Cinder purge the RBD trash. + This adds the new configuration option 'rbd_concurrent_flatten_operations', + which limits how many RBD flattens the driver will run simultaneously. + This can be used to prevent flatten operations from consuming too much I/O + capacity on the Ceph cluster. It defaults to 3. +fixes: + - | + `Bug #1969643 `_: + The RBD driver can now delete volumes with other volumes cloned from it + (or its snapshots) in cases where deletion would previously fail. This + uses the RBD trash functionality.