Merge "Ceph: Add option to keep only last n snapshots per backup"

This commit is contained in:
Zuul 2024-09-27 11:55:28 +00:00 committed by Gerrit Code Review
commit b07e4147f9
5 changed files with 181 additions and 10 deletions

View File

@ -48,6 +48,7 @@ import os
import re
import subprocess
import tempfile
import textwrap
import time
from typing import Dict, List, Optional, Tuple
@ -62,6 +63,8 @@ from cinder.backup import driver
from cinder import exception
from cinder.i18n import _
from cinder import interface
from cinder.message import api as message_api
from cinder.message import message_field
from cinder import objects
from cinder import utils
import cinder.volume.drivers.rbd as rbd_driver
@ -95,6 +98,19 @@ service_opts = [
cfg.BoolOpt('backup_ceph_image_journals', default=False,
help='If True, apply JOURNALING and EXCLUSIVE_LOCK feature '
'bits to the backup RBD objects to allow mirroring'),
cfg.IntOpt('backup_ceph_max_snapshots', default=0,
help=textwrap.dedent("""\
Number of the most recent snapshots to keep.
0 indicates to keep an unlimited number of snapshots.
Configuring this option can save disk space by only keeping
a limited number of snapshots on the source volume storage.
However, if a user deletes all incremental backups which
still have snapshots on the source storage, the next
incremental backup will automatically become a full backup
as no common snapshot exists anymore.
""")),
cfg.BoolOpt('restore_discard_excess_bytes', default=True,
help='If True, always discard excess bytes when restoring '
'volumes i.e. pad with zeroes.')
@ -200,6 +216,8 @@ class CephBackupDriver(driver.BackupDriver):
self._ceph_backup_pool = CONF.backup_ceph_pool
self._ceph_backup_conf = CONF.backup_ceph_conf
self.message_api = message_api.API()
@staticmethod
def get_driver_options() -> list:
return service_opts
@ -808,20 +826,58 @@ class CephBackupDriver(driver.BackupDriver):
rbd_conf = volume_file.rbd_conf
source_rbd_image = eventlet.tpool.Proxy(volume_file.rbd_image)
volume_id = backup.volume_id
base_name = None
base_name = self._get_backup_base_name(volume_id, backup=backup)
snaps_to_keep = CONF.backup_ceph_max_snapshots
# If backup.parent_id is None performs full RBD backup
if backup.parent_id is None:
base_name = self._get_backup_base_name(volume_id, backup=backup)
from_snap, image_created = self._full_rbd_backup(backup.container,
base_name,
length)
# Otherwise performs incremental rbd backup
else:
# Find the base name from the parent backup's service_metadata
base_name = self._get_backup_base_name(volume_id, backup=backup)
# Check if there is at least one snapshot to base an incremental
# backup on. If not, we cannot perform an incremental backup and
# fall back to full backup.
no_source_snaps = snaps_to_keep > 0 and \
self._get_backup_snap_name(
source_rbd_image,
base_name,
backup.parent_id) is None
# If true, force full backup
if no_source_snaps:
# Unset parent so we get a new backup base name
backup.parent = None
# The backup will be a full one, so it has no parent ID.
# This will mark the backup as a full backup in the database.
backup.parent_id = None
backup.save()
base_name = self.\
_get_backup_base_name(volume_id, backup=backup)
LOG.info("Incremental backup was requested, but there are no "
"snapshots present to use as base, "
"forcing full backup.")
self.message_api.create(
context=self.context,
action=message_field.Action.BACKUP_CREATE,
resource_uuid=volume_id,
detail=message_field.Detail.
INCREMENTAL_BACKUP_FORCES_FULL_BACKUP,
level="WARNING"
)
from_snap, image_created = self._full_rbd_backup(
backup.container,
base_name,
length)
else:
# Incremental backup
rbd_img = source_rbd_image
from_snap, image_created = self._incremental_rbd_backup(backup,
from_snap, image_created = \
self._incremental_rbd_backup(backup,
base_name,
length,
rbd_img,
@ -856,6 +912,13 @@ class CephBackupDriver(driver.BackupDriver):
LOG.debug("Differential backup transfer completed in %.4fs",
(time.time() - before))
# only keep last n snapshots and delete older ones
if snaps_to_keep > 0:
self._remove_last_snapshots(source_rbd_image, snaps_to_keep)
else:
LOG.debug("Not deleting any snapshots because "
"all should be kept")
except exception.BackupRBDOperationFailed:
with excutils.save_and_reraise_exception():
LOG.debug("Differential backup transfer failed")
@ -872,6 +935,48 @@ class CephBackupDriver(driver.BackupDriver):
return {'service_metadata': '{"base": "%s"}' % base_name}
def _remove_last_snapshots(self, source_rbd_image, snaps_to_keep: int):
# only keep last n snapshots and delete older ones for the source
# image provided
snap_list = []
try:
snap_list = self.get_backup_snaps(source_rbd_image)
except Exception as e:
LOG.debug(
"Failed to get snapshot list for %s: %s", source_rbd_image, e
)
remaining_snaps = len(snap_list)
LOG.debug("Snapshot list: %s", snap_list)
if remaining_snaps > snaps_to_keep:
snaps_to_delete = remaining_snaps - snaps_to_keep
LOG.debug(
"There are %s snapshots and %s should be kept, "
"deleting the oldest %s snapshots",
remaining_snaps,
snaps_to_keep,
snaps_to_delete,
)
for i in range(snaps_to_delete):
LOG.debug("Deleting snapshot %s", snap_list[i])
try:
source_rbd_image.remove_snap(snap_list[i]["name"])
except Exception as e:
LOG.debug(
"Failed to delete snapshot %s: %s", snap_list[i], e
)
else:
LOG.debug(
"There are %s snapshots and %s should be kept, "
"not deleting any snapshots",
remaining_snaps,
snaps_to_keep,
)
@staticmethod
def _file_is_rbd(volume_file: linuxrbd.RBDVolumeIOWrapper) -> bool:
"""Returns True if the volume_file is actually an RBD image."""

View File

@ -134,6 +134,8 @@ class Detail(object):
'029',
_("The image disk format must be the same as the volume format for "
"the volume type you are requesting."))
INCREMENTAL_BACKUP_FORCES_FULL_BACKUP = (
'030', _("Incremental backup not possible, forcing full backup."))
ALL = (UNKNOWN_ERROR,
DRIVER_NOT_INITIALIZED,
@ -164,6 +166,7 @@ class Detail(object):
VOLUME_INVALID_STATE,
REIMAGE_VOLUME_FAILED,
IMAGE_FORMAT_UNACCEPTABLE,
INCREMENTAL_BACKUP_FORCES_FULL_BACKUP,
)
# Exception and detail mappings

View File

@ -536,6 +536,40 @@ class BackupCephTestCase(test.TestCase):
self.assertEqual(checksum.digest(),
self.checksum.digest())
@common_mocks
def test_backup_snapshot_lifecycle(self):
with mock.patch.object(self.service, '_rbd_diff_transfer'), \
mock.patch.object(self.service, "get_backup_snaps") \
as mock_get_backup_snaps:
CONF.set_override('backup_ceph_max_snapshots', 1)
mocked_snaps = [
{'name': 'backup.mock.snap.153464362.12'},
{'name': 'backup.mock.snap.225341241.90'},
{'name': 'backup.mock.snap.399994362.10'}]
mock_get_backup_snaps.return_value = mocked_snaps
self.mock_rbd.RBD.remove_snap = mock.Mock()
image = self.service.rbd.Image()
meta = linuxrbd.RBDImageMetadata(image,
'pool_foo',
'user_foo',
'conf_foo')
rbdio = linuxrbd.RBDVolumeIOWrapper(meta)
rbdio.seek(0)
self.service._backup_rbd(self.backup, rbdio,
self.volume.name, self.volume.size)
self.assertEqual(2, self.mock_rbd.Image.return_value.
remove_snap.call_count)
expected_calls = [mock.call('backup.mock.snap.153464362.12'),
mock.call('backup.mock.snap.225341241.90')]
self.mock_rbd.Image.return_value.remove_snap.\
assert_has_calls(expected_calls)
@common_mocks
def test_backup_volume_from_rbd_set_parent_id(self):
with mock.patch.object(self.service, '_backup_rbd') as \

View File

@ -21,6 +21,26 @@ stored as snapshots so that minimal space is consumed in the backup
store. It takes far less time to restore a volume than to take a full
copy.
By default, all incremental backups are held on the source volume storage,
which can take up much disk space on the usually more expensive
primary storage compared to backup storage. Enabling the option
``backup_ceph_max_snapshots`` can save disk space on the source
volume storage by only keeping a limited number of snapshots per backup volume.
After every successful creation of a new incremental backup, the Ceph backup
driver will then ensure that excess snapshots of the corresponding backup
volume are deleted so that only the ``backup_ceph_max_snapshots``
most recent snapshots are kept on the primary storage.
However, this can cause incremental backups to automatically become full
backups instead if a user manually deleted at least
``backup_ceph_max_snapshots`` incremental backups. In that case
the next snapshot, being a full backup, will require more disk space on
the backup storage and will take longer to complete than an incremental
backup would have.
Thus, the option allows to configure a tradeoff between required space on the
source volume storage and required space on the backup storage as well as a
longer backup process under the above conditions.
.. note::
Block Storage enables you to:
@ -57,3 +77,4 @@ This example shows the default options for the Ceph backup driver.
backup_ceph_pool = backups
backup_ceph_stripe_unit = 0
backup_ceph_stripe_count = 0
backup_ceph_max_snapshots = 0

View File

@ -0,0 +1,8 @@
---
features:
- |
Ceph driver: Add config option to keep only the last n snapshots per backup
to save disk space on the source volume storage. Enabling this option can
cause incremental backups to become full backups instead under special
circumstances. Please take a look at the Ceph backup driver docs for
more information.