Support deferred deletion in RBD

This patch proposes to use the trash functionality in Ceph RBD to
support deferred deletion in the RBD volume driver. With deferred
deletion enabled, deleting a volume will only move it to the trash.
A periodic task is checking if the deferment time of volumes in the
trash has expired and (if it has) remove them from the backend.
The patch also adds config options to enable the feature, to set the
trash deferment time, and to define the interval at which the periodic
purge task is triggered.

Implements: blueprint rbd-deferred-volume-deletion
Change-Id: Id07c3f5e5c0a7665e6360cdccc007c6d7deb58fc
This commit is contained in:
Arne Wiebalck 2018-10-02 16:02:13 +02:00
parent c319b40759
commit c6580b2ab4
3 changed files with 196 additions and 2 deletions

View File

@ -21,10 +21,12 @@ import uuid
import castellan
import ddt
import errno
import mock
from mock import call
from oslo_utils import imageutils
from oslo_utils import units
import time
from cinder import context
from cinder import db
@ -73,6 +75,11 @@ class MockOSErrorException(MockException):
"""Used as mock for rbd.OSError."""
class MockPermissionError(MockException):
"""Used as mock for PermissionError."""
errno = errno.EPERM
class KeyObject(object):
def get_encoded(arg):
return "asdf".encode('utf-8')
@ -109,6 +116,7 @@ def common_mocks(f):
inst.mock_rbd.ImageNotFound = MockImageNotFoundException
inst.mock_rbd.ImageExists = MockImageExistsException
inst.mock_rbd.InvalidArgument = MockImageNotFoundException
inst.mock_rbd.PermissionError = MockPermissionError
inst.driver.rbd = inst.mock_rbd
inst.driver.rados = inst.mock_rados
@ -190,6 +198,7 @@ class RBDTestCase(test.TestCase):
self.cfg.rados_connection_retries = 3
self.cfg.rados_connection_interval = 5
self.cfg.backup_use_temp_snapshot = False
self.cfg.enable_deferred_deletion = False
mock_exec = mock.Mock()
mock_exec.return_value = ('', '')
@ -659,6 +668,112 @@ class RBDTestCase(test.TestCase):
self.assertEqual(
1, self.driver.rbd.RBD.return_value.remove.call_count)
@common_mocks
def test_deferred_deletion(self):
client = self.mock_client.return_value
self.driver.rbd.Image.return_value.list_snaps.return_value = []
with mock.patch.object(self.driver, '_get_clone_info') as \
mock_get_clone_info:
with mock.patch.object(self.driver, '_delete_backup_snaps') as \
mock_delete_backup_snaps:
mock_get_clone_info.return_value = (None, None, None)
self.cfg.enable_deferred_deletion = True
self.cfg.deferred_deletion_delay = 0
self.driver.delete_volume(self.volume_a)
mock_get_clone_info.assert_called_once_with(
self.mock_rbd.Image.return_value,
self.volume_a.name,
None)
(self.driver.rbd.Image.return_value
.list_snaps.assert_called_once_with())
client.__enter__.assert_called_once_with()
client.__exit__.assert_called_once_with(None, None, None)
mock_delete_backup_snaps.assert_called_once_with(
self.mock_rbd.Image.return_value)
self.assertFalse(
self.driver.rbd.Image.return_value.unprotect_snap.called)
self.assertEqual(
1, self.driver.rbd.RBD.return_value.trash_move.call_count)
@common_mocks
def test_deferred_deletion_periodic_task(self):
self.cfg.rados_connect_timeout = -1
self.cfg.enable_deferred_deletion = True
self.cfg.deferred_deletion_purge_interval = 1
self.driver._start_periodic_tasks()
time.sleep(1)
self.assertTrue(self.driver.rbd.RBD.return_value.trash_list.called)
self.assertFalse(self.driver.rbd.RBD.return_value.trash_remove.called)
@common_mocks
def test_deferred_deletion_trash_purge(self):
with mock.patch.object(self.driver.rbd.RBD(), 'trash_list') as \
mock_trash_list:
mock_trash_list.return_value = [self.volume_a]
self.cfg.enable_deferred_deletion = True
self.driver._trash_purge()
self.assertEqual(
1, self.driver.rbd.RBD.return_value.trash_list.call_count)
self.assertEqual(
1, self.driver.rbd.RBD.return_value.trash_remove.call_count)
@common_mocks
def test_deferred_deletion_trash_purge_not_expired(self):
with mock.patch.object(self.driver.rbd.RBD(), 'trash_list') as \
mock_trash_list:
mock_trash_list.return_value = [self.volume_a]
self.mock_rbd.RBD.return_value.trash_remove.side_effect = (
self.mock_rbd.PermissionError)
self.cfg.enable_deferred_deletion = True
self.driver._trash_purge()
self.assertEqual(
1, self.driver.rbd.RBD.return_value.trash_list.call_count)
self.assertEqual(
1, self.driver.rbd.RBD.return_value.trash_remove.call_count)
# Make sure the exception was raised
self.assertEqual(1, len(RAISED_EXCEPTIONS))
self.assertIn(self.mock_rbd.PermissionError, RAISED_EXCEPTIONS)
@common_mocks
def test_deferred_deletion_w_parent(self):
_get_clone_info_return_values = [
(None, self.volume_b.name, None),
(None, None, None)]
with mock.patch.object(self.driver, '_get_clone_info',
side_effect = _get_clone_info_return_values):
self.cfg.enable_deferred_deletion = True
self.cfg.deferred_deletion_delay = 0
self.driver.delete_volume(self.volume_a)
self.assertEqual(
1, self.driver.rbd.RBD.return_value.trash_move.call_count)
@common_mocks
def test_deferred_deletion_w_deleted_parent(self):
_get_clone_info_return_values = [
(None, "%s.deleted" % self.volume_b.name, None),
(None, None, None)]
with mock.patch.object(self.driver, '_get_clone_info',
side_effect = _get_clone_info_return_values):
self.cfg.enable_deferred_deletion = True
self.cfg.deferred_deletion_delay = 0
self.driver.delete_volume(self.volume_a)
self.assertEqual(
2, self.driver.rbd.RBD.return_value.trash_move.call_count)
@common_mocks
def delete_volume_not_found(self):
self.mock_rbd.Image.side_effect = self.mock_rbd.ImageNotFound

View File

@ -15,6 +15,7 @@
from __future__ import absolute_import
import binascii
import errno
import json
import math
import os
@ -25,6 +26,7 @@ from eventlet import tpool
from os_brick.initiator import linuxrbd
from oslo_config import cfg
from oslo_log import log as logging
from oslo_service import loopingcall
from oslo_utils import encodeutils
from oslo_utils import excutils
from oslo_utils import fileutils
@ -111,6 +113,17 @@ RBD_OPTS = [
"Cinder core code for allocated_capacity_gb. This "
"reduces the load on the Ceph cluster as well as on the "
"volume service."),
cfg.BoolOpt('enable_deferred_deletion', default=False,
help='Enable deferred deletion. Upon deletion, volumes are '
'tagged for deletion but will only be removed '
'asynchronously at a later time.'),
cfg.IntOpt('deferred_deletion_delay', default=0,
help='Time delay in seconds before a volume is eligible '
'for permanent removal after being tagged for deferred '
'deletion.'),
cfg.IntOpt('deferred_deletion_purge_interval', default=60,
help='Number of seconds between runs of the periodic task'
'to purge volumes tagged for deletion.'),
]
CONF = cfg.CONF
@ -280,6 +293,42 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD,
remote = self._active_config
return (remote.get('name'), remote.get('conf'), remote.get('user'))
def _trash_purge(self):
LOG.info("Purging trash for backend '%s'", self._backend_name)
with RADOSClient(self) as client:
for vol in self.RBDProxy().trash_list(client.ioctx):
try:
self.RBDProxy().trash_remove(client.ioctx, vol.get('id'))
LOG.info("Deleted %s from trash for backend '%s'",
vol.get('name'),
self._backend_name)
except Exception as e:
# NOTE(arne_wiebalck): trash_remove raises EPERM in case
# the volume's deferral time has not expired yet, so we
# want to explicitly handle this "normal" situation.
# All other exceptions, e.g. ImageBusy, are not re-raised
# so that the periodic purge retries on the next iteration
# and leaves ERRORs in the logs in case the deletion fails
# repeatedly.
if e.errno == errno.EPERM:
LOG.debug("%s has not expired yet on backend '%s'",
vol.get('name'),
self._backend_name)
else:
LOG.exception("Error deleting %s from trash "
"backend '%s'",
vol.get('name'),
self._backend_name)
def _start_periodic_tasks(self):
if self.configuration.enable_deferred_deletion:
LOG.info("Starting periodic trash purge for backend '%s'",
self._backend_name)
deferred_deletion_ptask = loopingcall.FixedIntervalLoopingCall(
self._trash_purge)
deferred_deletion_ptask.start(
interval=self.configuration.deferred_deletion_purge_interval)
def check_for_setup_error(self):
"""Returns an error if prerequisites aren't met."""
if rados is None:
@ -297,6 +346,18 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD,
with RADOSClient(self):
pass
# NOTE(arne_wiebalck): If deferred deletion is enabled, check if the
# local Ceph client has support for the trash API.
if self.configuration.enable_deferred_deletion:
if not hasattr(self.RBDProxy(), 'trash_list'):
msg = _("Deferred deletion is enabled, but the local Ceph "
"client has no support for the trash API. Support "
"for this feature started with v12.2.0 Luminous.")
LOG.error(msg)
raise exception.VolumeBackendAPIException(data=msg)
self._start_periodic_tasks()
def RBDProxy(self):
return tpool.Proxy(self.rbd.RBD())
@ -917,7 +978,14 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD,
# keep walking up the chain if it is itself a clone.
if (not parent_has_snaps) and parent_name.endswith('.deleted'):
LOG.debug("deleting parent %s", parent_name)
self.RBDProxy().remove(client.ioctx, parent_name)
if self.configuration.enable_deferred_deletion:
LOG.debug("moving volume %s to trash", parent_name)
delay = self.configuration.deferred_deletion_delay
self.RBDProxy().trash_move(client.ioctx,
parent_name,
delay)
else:
self.RBDProxy().remove(client.ioctx, parent_name)
# Now move up to grandparent if there is one
if g_parent:
@ -967,7 +1035,14 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD,
self.configuration.rados_connection_interval,
self.configuration.rados_connection_retries)
def _try_remove_volume(client, volume_name):
self.RBDProxy().remove(client.ioctx, volume_name)
if self.configuration.enable_deferred_deletion:
LOG.debug("moving volume %s to trash", volume_name)
delay = self.configuration.deferred_deletion_delay
self.RBDProxy().trash_move(client.ioctx,
volume_name,
delay)
else:
self.RBDProxy().remove(client.ioctx, volume_name)
if clone_snap is None:
LOG.debug("deleting rbd volume %s", volume_name)

View File

@ -0,0 +1,4 @@
---
features:
- |
Add support for deferred deletion in the RBD volume driver.