Ceph: Add option to keep only last n snapshots per backup
Add a new config option "backup_ceph_max_snapshots" to only keep
the last n snapshots of a volume backup on the source volume storage
to save disk space.
By default, the current behavior of keeping all snapshots is used, but
can now be adjusted as requested.
Commit bc9ab142da changed the previous
behavior of keeping only the last snapshot to keeping all snapshots of
a backup. However, this can take up much more disk space on the
source volume storage.
If enabled, the driver checks the number of snapshots of a volume backup
after successful creation of a new snapshot and removes older ones, so
that only the n most recent snapshots are kept on the source storage.
This change also creates a new special case which was not present
before: If a user deletes at least n backup snapshots, the next
incremental snapshot creation would fail (due to how RBD works).
This will be handled by forcing a full backup instead.
Therefore, this commit introduces a configurable tradeoff between
required space on the source volume storage and a potentially longer
backup time.
Change-Id: Ief71e6e62ccc0654f4e1e71ccc53df66d5ffbfac
Related-Bug: #1703011
Signed-off-by: Jan Hartkopf <jhartkopf@inovex.de>
This commit is contained in:
committed by
Christian Rohmann
parent
d46e2ebbd7
commit
03cbcd3cd0
@@ -48,6 +48,7 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
import textwrap
|
||||
import time
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
@@ -62,6 +63,8 @@ from cinder.backup import driver
|
||||
from cinder import exception
|
||||
from cinder.i18n import _
|
||||
from cinder import interface
|
||||
from cinder.message import api as message_api
|
||||
from cinder.message import message_field
|
||||
from cinder import objects
|
||||
from cinder import utils
|
||||
import cinder.volume.drivers.rbd as rbd_driver
|
||||
@@ -95,6 +98,19 @@ service_opts = [
|
||||
cfg.BoolOpt('backup_ceph_image_journals', default=False,
|
||||
help='If True, apply JOURNALING and EXCLUSIVE_LOCK feature '
|
||||
'bits to the backup RBD objects to allow mirroring'),
|
||||
cfg.IntOpt('backup_ceph_max_snapshots', default=0,
|
||||
help=textwrap.dedent("""\
|
||||
Number of the most recent snapshots to keep.
|
||||
|
||||
0 indicates to keep an unlimited number of snapshots.
|
||||
|
||||
Configuring this option can save disk space by only keeping
|
||||
a limited number of snapshots on the source volume storage.
|
||||
However, if a user deletes all incremental backups which
|
||||
still have snapshots on the source storage, the next
|
||||
incremental backup will automatically become a full backup
|
||||
as no common snapshot exists anymore.
|
||||
""")),
|
||||
cfg.BoolOpt('restore_discard_excess_bytes', default=True,
|
||||
help='If True, always discard excess bytes when restoring '
|
||||
'volumes i.e. pad with zeroes.')
|
||||
@@ -198,6 +214,8 @@ class CephBackupDriver(driver.BackupDriver):
|
||||
self._ceph_backup_pool = CONF.backup_ceph_pool
|
||||
self._ceph_backup_conf = CONF.backup_ceph_conf
|
||||
|
||||
self.message_api = message_api.API()
|
||||
|
||||
@staticmethod
|
||||
def get_driver_options() -> list:
|
||||
return service_opts
|
||||
@@ -799,24 +817,62 @@ class CephBackupDriver(driver.BackupDriver):
|
||||
rbd_conf = volume_file.rbd_conf
|
||||
source_rbd_image = eventlet.tpool.Proxy(volume_file.rbd_image)
|
||||
volume_id = backup.volume_id
|
||||
base_name = None
|
||||
base_name = self._get_backup_base_name(volume_id, backup=backup)
|
||||
snaps_to_keep = CONF.backup_ceph_max_snapshots
|
||||
|
||||
# If backup.parent_id is None performs full RBD backup
|
||||
if backup.parent_id is None:
|
||||
base_name = self._get_backup_base_name(volume_id, backup=backup)
|
||||
from_snap, image_created = self._full_rbd_backup(backup.container,
|
||||
base_name,
|
||||
length)
|
||||
# Otherwise performs incremental rbd backup
|
||||
else:
|
||||
# Find the base name from the parent backup's service_metadata
|
||||
base_name = self._get_backup_base_name(volume_id, backup=backup)
|
||||
rbd_img = source_rbd_image
|
||||
from_snap, image_created = self._incremental_rbd_backup(backup,
|
||||
base_name,
|
||||
length,
|
||||
rbd_img,
|
||||
volume_id)
|
||||
# Check if there is at least one snapshot to base an incremental
|
||||
# backup on. If not, we cannot perform an incremental backup and
|
||||
# fall back to full backup.
|
||||
no_source_snaps = snaps_to_keep > 0 and \
|
||||
self._get_backup_snap_name(
|
||||
source_rbd_image,
|
||||
base_name,
|
||||
backup.parent_id) is None
|
||||
|
||||
# If true, force full backup
|
||||
if no_source_snaps:
|
||||
# Unset parent so we get a new backup base name
|
||||
backup.parent = None
|
||||
# The backup will be a full one, so it has no parent ID.
|
||||
# This will mark the backup as a full backup in the database.
|
||||
backup.parent_id = None
|
||||
backup.save()
|
||||
|
||||
base_name = self.\
|
||||
_get_backup_base_name(volume_id, backup=backup)
|
||||
|
||||
LOG.info("Incremental backup was requested, but there are no "
|
||||
"snapshots present to use as base, "
|
||||
"forcing full backup.")
|
||||
self.message_api.create(
|
||||
context=self.context,
|
||||
action=message_field.Action.BACKUP_CREATE,
|
||||
resource_uuid=volume_id,
|
||||
detail=message_field.Detail.
|
||||
INCREMENTAL_BACKUP_FORCES_FULL_BACKUP,
|
||||
level="WARNING"
|
||||
)
|
||||
|
||||
from_snap, image_created = self._full_rbd_backup(
|
||||
backup.container,
|
||||
base_name,
|
||||
length)
|
||||
else:
|
||||
# Incremental backup
|
||||
rbd_img = source_rbd_image
|
||||
from_snap, image_created = \
|
||||
self._incremental_rbd_backup(backup,
|
||||
base_name,
|
||||
length,
|
||||
rbd_img,
|
||||
volume_id)
|
||||
|
||||
LOG.debug("Using --from-snap '%(snap)s' for incremental backup of "
|
||||
"volume %(volume)s.",
|
||||
@@ -847,6 +903,13 @@ class CephBackupDriver(driver.BackupDriver):
|
||||
LOG.debug("Differential backup transfer completed in %.4fs",
|
||||
(time.time() - before))
|
||||
|
||||
# only keep last n snapshots and delete older ones
|
||||
if snaps_to_keep > 0:
|
||||
self._remove_last_snapshots(source_rbd_image, snaps_to_keep)
|
||||
else:
|
||||
LOG.debug("Not deleting any snapshots because "
|
||||
"all should be kept")
|
||||
|
||||
except exception.BackupRBDOperationFailed:
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.debug("Differential backup transfer failed")
|
||||
@@ -863,6 +926,48 @@ class CephBackupDriver(driver.BackupDriver):
|
||||
|
||||
return {'service_metadata': '{"base": "%s"}' % base_name}
|
||||
|
||||
def _remove_last_snapshots(self, source_rbd_image, snaps_to_keep: int):
|
||||
# only keep last n snapshots and delete older ones for the source
|
||||
# image provided
|
||||
snap_list = []
|
||||
|
||||
try:
|
||||
snap_list = self.get_backup_snaps(source_rbd_image)
|
||||
except Exception as e:
|
||||
LOG.debug(
|
||||
"Failed to get snapshot list for %s: %s", source_rbd_image, e
|
||||
)
|
||||
|
||||
remaining_snaps = len(snap_list)
|
||||
LOG.debug("Snapshot list: %s", snap_list)
|
||||
|
||||
if remaining_snaps > snaps_to_keep:
|
||||
snaps_to_delete = remaining_snaps - snaps_to_keep
|
||||
LOG.debug(
|
||||
"There are %s snapshots and %s should be kept, "
|
||||
"deleting the oldest %s snapshots",
|
||||
remaining_snaps,
|
||||
snaps_to_keep,
|
||||
snaps_to_delete,
|
||||
)
|
||||
|
||||
for i in range(snaps_to_delete):
|
||||
LOG.debug("Deleting snapshot %s", snap_list[i])
|
||||
|
||||
try:
|
||||
source_rbd_image.remove_snap(snap_list[i]["name"])
|
||||
except Exception as e:
|
||||
LOG.debug(
|
||||
"Failed to delete snapshot %s: %s", snap_list[i], e
|
||||
)
|
||||
else:
|
||||
LOG.debug(
|
||||
"There are %s snapshots and %s should be kept, "
|
||||
"not deleting any snapshots",
|
||||
remaining_snaps,
|
||||
snaps_to_keep,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _file_is_rbd(volume_file: linuxrbd.RBDVolumeIOWrapper) -> bool:
|
||||
"""Returns True if the volume_file is actually an RBD image."""
|
||||
|
||||
@@ -134,6 +134,8 @@ class Detail(object):
|
||||
'029',
|
||||
_("The image disk format must be the same as the volume format for "
|
||||
"the volume type you are requesting."))
|
||||
INCREMENTAL_BACKUP_FORCES_FULL_BACKUP = (
|
||||
'030', _("Incremental backup not possible, forcing full backup."))
|
||||
|
||||
ALL = (UNKNOWN_ERROR,
|
||||
DRIVER_NOT_INITIALIZED,
|
||||
@@ -164,6 +166,7 @@ class Detail(object):
|
||||
VOLUME_INVALID_STATE,
|
||||
REIMAGE_VOLUME_FAILED,
|
||||
IMAGE_FORMAT_UNACCEPTABLE,
|
||||
INCREMENTAL_BACKUP_FORCES_FULL_BACKUP,
|
||||
)
|
||||
|
||||
# Exception and detail mappings
|
||||
|
||||
@@ -536,6 +536,40 @@ class BackupCephTestCase(test.TestCase):
|
||||
self.assertEqual(checksum.digest(),
|
||||
self.checksum.digest())
|
||||
|
||||
@common_mocks
|
||||
def test_backup_snapshot_lifecycle(self):
|
||||
with mock.patch.object(self.service, '_rbd_diff_transfer'), \
|
||||
mock.patch.object(self.service, "get_backup_snaps") \
|
||||
as mock_get_backup_snaps:
|
||||
|
||||
CONF.set_override('backup_ceph_max_snapshots', 1)
|
||||
|
||||
mocked_snaps = [
|
||||
{'name': 'backup.mock.snap.153464362.12'},
|
||||
{'name': 'backup.mock.snap.225341241.90'},
|
||||
{'name': 'backup.mock.snap.399994362.10'}]
|
||||
|
||||
mock_get_backup_snaps.return_value = mocked_snaps
|
||||
self.mock_rbd.RBD.remove_snap = mock.Mock()
|
||||
|
||||
image = self.service.rbd.Image()
|
||||
meta = linuxrbd.RBDImageMetadata(image,
|
||||
'pool_foo',
|
||||
'user_foo',
|
||||
'conf_foo')
|
||||
rbdio = linuxrbd.RBDVolumeIOWrapper(meta)
|
||||
rbdio.seek(0)
|
||||
|
||||
self.service._backup_rbd(self.backup, rbdio,
|
||||
self.volume.name, self.volume.size)
|
||||
|
||||
self.assertEqual(2, self.mock_rbd.Image.return_value.
|
||||
remove_snap.call_count)
|
||||
expected_calls = [mock.call('backup.mock.snap.153464362.12'),
|
||||
mock.call('backup.mock.snap.225341241.90')]
|
||||
self.mock_rbd.Image.return_value.remove_snap.\
|
||||
assert_has_calls(expected_calls)
|
||||
|
||||
@common_mocks
|
||||
def test_backup_volume_from_rbd_set_parent_id(self):
|
||||
with mock.patch.object(self.service, '_backup_rbd') as \
|
||||
|
||||
@@ -21,6 +21,26 @@ stored as snapshots so that minimal space is consumed in the backup
|
||||
store. It takes far less time to restore a volume than to take a full
|
||||
copy.
|
||||
|
||||
By default, all incremental backups are held on the source volume storage,
|
||||
which can take up much disk space on the usually more expensive
|
||||
primary storage compared to backup storage. Enabling the option
|
||||
``backup_ceph_max_snapshots`` can save disk space on the source
|
||||
volume storage by only keeping a limited number of snapshots per backup volume.
|
||||
After every successful creation of a new incremental backup, the Ceph backup
|
||||
driver will then ensure that excess snapshots of the corresponding backup
|
||||
volume are deleted so that only the ``backup_ceph_max_snapshots``
|
||||
most recent snapshots are kept on the primary storage.
|
||||
However, this can cause incremental backups to automatically become full
|
||||
backups instead if a user manually deleted at least
|
||||
``backup_ceph_max_snapshots`` incremental backups. In that case
|
||||
the next snapshot, being a full backup, will require more disk space on
|
||||
the backup storage and will take longer to complete than an incremental
|
||||
backup would have.
|
||||
|
||||
Thus, the option allows to configure a tradeoff between required space on the
|
||||
source volume storage and required space on the backup storage as well as a
|
||||
longer backup process under the above conditions.
|
||||
|
||||
.. note::
|
||||
|
||||
Block Storage enables you to:
|
||||
@@ -57,3 +77,4 @@ This example shows the default options for the Ceph backup driver.
|
||||
backup_ceph_pool = backups
|
||||
backup_ceph_stripe_unit = 0
|
||||
backup_ceph_stripe_count = 0
|
||||
backup_ceph_max_snapshots = 0
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Ceph driver: Add config option to keep only the last n snapshots per backup
|
||||
to save disk space on the source volume storage. Enabling this option can
|
||||
cause incremental backups to become full backups instead under special
|
||||
circumstances. Please take a look at the Ceph backup driver docs for
|
||||
more information.
|
||||
Reference in New Issue
Block a user