Add backup restoration cancellation support

Backups can be cancelled by force deleting the in-progress backup, but there is no mechanism to delete the restoring backups. We will now monitor the status of the backup and on any status change will abort the restoration and change the volume status to error. The reason to diverge from the backup creation cancellation implementation (deleting the new resource) is that the restoring may be done into an already existing volume and deleting it is not feasable for the user, for example when an instance is booted from a Cinder volume and there's a license linked to the System ID. Cinder backup drivers may now raise BackupRestoreCancel in `restore` method when a restoring operation is cancelled. Change-Id: If2f143d7ba56ae2d74b3bb571237cc053f63054e
2018-02-01 13:39:56 +01:00 · 2018-02-01 13:39:56 +01:00 · 89f6291ee3
commit 89f6291ee3
parent fbf51eea48
10 changed files with 175 additions and 17 deletions
--- a/cinder/backup/chunkeddriver.py
+++ b/cinder/backup/chunkeddriver.py
@ -615,8 +615,14 @@ class ChunkedBackupDriver(driver.BackupDriver):

        self._finalize_backup(backup, container, object_meta, object_sha256)

-    def _restore_v1(self, backup, volume_id, metadata, volume_file):
-        """Restore a v1 volume backup."""
+    def _restore_v1(self, backup, volume_id, metadata, volume_file,
+                    requested_backup):
+        """Restore a v1 volume backup.
+
+        Raises BackupRestoreCancel on any requested_backup status change, we
+        ignore the backup parameter for this check since that's only the
+        current data source from the list of backup sources.
+        """
        backup_id = backup['id']
        LOG.debug('v1 volume backup restore of %s started.', backup_id)
        extra_metadata = metadata.get('extra_metadata')
@ -637,6 +643,13 @@ class ChunkedBackupDriver(driver.BackupDriver):
            raise exception.InvalidBackup(reason=err)

        for metadata_object in metadata_objects:
+            # Abort when status changes to error, available, or anything else
+            with requested_backup.as_read_deleted():
+                requested_backup.refresh()
+            if requested_backup.status != fields.BackupStatus.RESTORING:
+                raise exception.BackupRestoreCancel(back_id=backup.id,
+                                                    vol_id=volume_id)
+
            object_name, obj = list(metadata_object.items())[0]
            LOG.debug('restoring object. backup: %(backup_id)s, '
                      'container: %(container)s, object name: '
@ -683,7 +696,10 @@ class ChunkedBackupDriver(driver.BackupDriver):
                  backup_id)

    def restore(self, backup, volume_id, volume_file):
-        """Restore the given volume backup from backup repository."""
+        """Restore the given volume backup from backup repository.
+
+        Raises BackupRestoreCancel on any backup status change.
+        """
        backup_id = backup['id']
        container = backup['container']
        object_prefix = backup['service_metadata']
@ -725,7 +741,7 @@ class ChunkedBackupDriver(driver.BackupDriver):
            backup1 = backup_list[index]
            index = index - 1
            metadata = self._read_metadata(backup1)
-            restore_func(backup1, volume_id, metadata, volume_file)
+            restore_func(backup1, volume_id, metadata, volume_file, backup)

            volume_meta = metadata.get('volume_meta', None)
            try:
--- a/cinder/backup/driver.py
+++ b/cinder/backup/driver.py
@ -391,6 +391,9 @@ class BackupDriver(base.Base):
        starvation parameter volume_file will be a proxy that will execute all
        methods in native threads, so the method implementation doesn't need to
        worry about that..
+
+        May raise BackupRestoreCancel to indicate that the restoration of a
+        volume has been aborted by changing the backup status.
        """
        return

--- a/cinder/backup/manager.py
+++ b/cinder/backup/manager.py
@ -530,7 +530,10 @@ class BackupManager(manager.ThreadPoolManager):
            raise exception.InvalidBackup(reason=err)

        try:
+            canceled = False
            self._run_restore(context, backup, volume)
+        except exception.BackupRestoreCancel:
+            canceled = True
        except Exception:
            with excutils.save_and_reraise_exception():
                self.db.volume_update(context, volume_id,
@ -538,12 +541,15 @@ class BackupManager(manager.ThreadPoolManager):
                backup.status = fields.BackupStatus.AVAILABLE
                backup.save()

-        self.db.volume_update(context, volume_id, {'status': 'available'})
+        volume.status = 'error' if canceled else 'available'
+        volume.save()
        backup.status = fields.BackupStatus.AVAILABLE
        backup.save()
-        LOG.info('Restore backup finished, backup %(backup_id)s restored'
-                 ' to volume %(volume_id)s.',
-                 {'backup_id': backup.id, 'volume_id': volume_id})
+        LOG.info('%(result)s restoring backup %(backup_id)s to volume '
+                 '%(volume_id)s.',
+                 {'result': 'Canceled' if canceled else 'Finished',
+                  'backup_id': backup.id,
+                  'volume_id': volume_id})
        self._notify_about_backup_usage(context, backup, "restore.end")

    def _run_restore(self, context, backup, volume):
--- a/cinder/exception.py
+++ b/cinder/exception.py
@ -155,6 +155,10 @@ class BackupDriverException(CinderException):
    message = _("Backup driver reported an error: %(message)s")


+class BackupRestoreCancel(CinderException):
+    message = _("Canceled backup %(back_id)s restore on volume %(vol_id)s")
+
+
 class GlanceConnectionFailed(CinderException):
    message = _("Connection to glance failed: %(reason)s")

--- a/cinder/tests/unit/backup/drivers/test_backup_google.py
+++ b/cinder/tests/unit/backup/drivers/test_backup_google.py
@ -125,6 +125,7 @@ class GoogleBackupDriverTestCase(test.TestCase):
                                volume_id=_DEFAULT_VOLUME_ID,
                                container=google_dr.CONF.backup_gcs_bucket,
                                parent_id=None,
+                                status=None,
                                service_metadata=None):

        try:
@ -138,6 +139,7 @@ class GoogleBackupDriverTestCase(test.TestCase):
                  'parent_id': parent_id,
                  'user_id': fake.USER_ID,
                  'project_id': fake.PROJECT_ID,
+                  'status': status,
                  'service_metadata': service_metadata,
                  }
        backup = objects.Backup(context=self.ctxt, **kwargs)
@ -476,7 +478,9 @@ class GoogleBackupDriverTestCase(test.TestCase):
    @gcs_client
    def test_restore(self):
        volume_id = 'c2a81f09-f480-4325-8424-00000071685b'
-        backup = self._create_backup_db_entry(volume_id=volume_id)
+        backup = self._create_backup_db_entry(
+            volume_id=volume_id,
+            status=objects.fields.BackupStatus.RESTORING)
        service = google_dr.GoogleBackupDriver(self.ctxt)

        with tempfile.NamedTemporaryFile() as volume_file:
@ -514,9 +518,11 @@ class GoogleBackupDriverTestCase(test.TestCase):
        self.volume_file.seek(20 * units.Ki)
        self.volume_file.write(os.urandom(units.Ki))

-        deltabackup = self._create_backup_db_entry(volume_id=volume_id,
-                                                   container=container_name,
-                                                   parent_id=backup.id)
+        deltabackup = self._create_backup_db_entry(
+            volume_id=volume_id,
+            status=objects.fields.BackupStatus.RESTORING,
+            container=container_name,
+            parent_id=backup.id)
        self.volume_file.seek(0)
        service2 = google_dr.GoogleBackupDriver(self.ctxt)
        service2.backup(deltabackup, self.volume_file, True)
--- a/cinder/tests/unit/backup/drivers/test_backup_nfs.py
+++ b/cinder/tests/unit/backup/drivers/test_backup_nfs.py
@ -127,7 +127,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
                                volume_id=_DEFAULT_VOLUME_ID,
                                container='test-container',
                                backup_id=fake.BACKUP_ID,
-                                parent_id=None):
+                                parent_id=None,
+                                status=None):

        try:
            db.volume_get(self.ctxt, volume_id)
@ -141,6 +142,7 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
                  'parent_id': parent_id,
                  'user_id': fake.USER_ID,
                  'project_id': fake.PROJECT_ID,
+                  'status': status,
                  }
        return db.backup_create(self.ctxt, backup)['id']

@ -608,6 +610,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):

        with tempfile.NamedTemporaryFile() as restored_file:
            backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
+            backup.status = objects.fields.BackupStatus.RESTORING
+            backup.save()
            service.restore(backup, volume_id, restored_file)
            self.assertTrue(filecmp.cmp(self.volume_file.name,
                            restored_file.name))
@ -629,6 +633,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):

        with tempfile.NamedTemporaryFile() as restored_file:
            backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
+            backup.status = objects.fields.BackupStatus.RESTORING
+            backup.save()
            service.restore(backup, volume_id, restored_file)
            self.assertTrue(filecmp.cmp(self.volume_file.name,
                            restored_file.name))
@ -649,6 +655,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
        service = nfs.NFSBackupDriver(self.ctxt)
        self._write_effective_compression_file(file_size)
        backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
+        backup.status = objects.fields.BackupStatus.RESTORING
+        backup.save()
        service.backup(backup, self.volume_file)

        with tempfile.NamedTemporaryFile() as restored_file:
@ -660,6 +668,70 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
        self.assertNotEqual(threading.current_thread(),
                            self.thread_dict['thread'])

+    def test_restore_abort_delta(self):
+        volume_id = fake.VOLUME_ID
+        count = set()
+
+        def _fake_generate_object_name_prefix(self, backup):
+            az = 'az_fake'
+            backup_name = '%s_backup_%s' % (az, backup['id'])
+            volume = 'volume_%s' % (backup['volume_id'])
+            prefix = volume + '_' + backup_name
+            return prefix
+
+        def my_refresh():
+            # This refresh method will abort the backup after 1 chunk
+            count.add(len(count) + 1)
+            if len(count) == 2:
+                backup.status = objects.fields.BackupStatus.AVAILABLE
+                backup.save()
+            original_refresh()
+
+        self.mock_object(nfs.NFSBackupDriver,
+                         '_generate_object_name_prefix',
+                         _fake_generate_object_name_prefix)
+
+        self.flags(backup_file_size=(1024 * 8))
+        self.flags(backup_sha_block_size_bytes=1024)
+
+        container_name = self.temp_dir.replace(tempfile.gettempdir() + '/',
+                                               '', 1)
+        self._create_backup_db_entry(volume_id=volume_id,
+                                     container=container_name,
+                                     backup_id=fake.BACKUP_ID)
+        service = nfs.NFSBackupDriver(self.ctxt)
+        self.volume_file.seek(0)
+        backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
+        service.backup(backup, self.volume_file)
+
+        # Create incremental backup with no change to contents
+        self.volume_file.seek(16 * 1024)
+        self.volume_file.write(os.urandom(1024))
+        self.volume_file.seek(20 * 1024)
+        self.volume_file.write(os.urandom(1024))
+
+        self._create_backup_db_entry(
+            volume_id=volume_id,
+            status=objects.fields.BackupStatus.RESTORING,
+            container=container_name,
+            backup_id=fake.BACKUP2_ID,
+            parent_id=fake.BACKUP_ID)
+        self.volume_file.seek(0)
+        deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
+        service.backup(deltabackup, self.volume_file, True)
+        deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
+
+        backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
+        original_refresh = backup.refresh
+
+        with tempfile.NamedTemporaryFile() as restored_file, \
+                mock.patch('cinder.objects.Backup.refresh',
+                           side_effect=my_refresh):
+
+            self.assertRaises(exception.BackupRestoreCancel,
+                              service.restore, backup, volume_id,
+                              restored_file)
+
    def test_restore_delta(self):
        volume_id = fake.VOLUME_ID

@ -693,10 +765,12 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
        self.volume_file.seek(20 * 1024)
        self.volume_file.write(os.urandom(1024))

-        self._create_backup_db_entry(volume_id=volume_id,
-                                     container=container_name,
-                                     backup_id=fake.BACKUP2_ID,
-                                     parent_id=fake.BACKUP_ID)
+        self._create_backup_db_entry(
+            volume_id=volume_id,
+            status=objects.fields.BackupStatus.RESTORING,
+            container=container_name,
+            backup_id=fake.BACKUP2_ID,
+            parent_id=fake.BACKUP_ID)
        self.volume_file.seek(0)
        deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
        service.backup(deltabackup, self.volume_file, True)
--- a/cinder/tests/unit/backup/drivers/test_backup_swift.py
+++ b/cinder/tests/unit/backup/drivers/test_backup_swift.py
@ -700,6 +700,8 @@ class BackupSwiftTestCase(test.TestCase):

        with tempfile.NamedTemporaryFile() as volume_file:
            backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
+            backup.status = objects.fields.BackupStatus.RESTORING
+            backup.save()
            service.restore(backup, volume_id, volume_file)

    def test_restore_delta(self):
@ -748,6 +750,8 @@ class BackupSwiftTestCase(test.TestCase):

        with tempfile.NamedTemporaryFile() as restored_file:
            backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
+            backup.status = objects.fields.BackupStatus.RESTORING
+            backup.save()
            service.restore(backup, volume_id,
                            restored_file)
            self.assertTrue(filecmp.cmp(self.volume_file.name,
--- a/cinder/tests/unit/backup/test_backup.py
+++ b/cinder/tests/unit/backup/test_backup.py
@ -1011,6 +1011,26 @@ class BackupTestCase(BaseBackupTest):
        self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
        self.assertTrue(mock_run_restore.called)

+    def test_restore_backup_with_driver_cancellation(self):
+        """Test error handling when a restore is cancelled."""
+        vol_id = self._create_volume_db_entry(status='restoring-backup',
+                                              size=1)
+        backup = self._create_backup_db_entry(
+            status=fields.BackupStatus.RESTORING, volume_id=vol_id)
+
+        mock_run_restore = self.mock_object(
+            self.backup_mgr,
+            '_run_restore')
+        mock_run_restore.side_effect = exception.BackupRestoreCancel(
+            vol_id=vol_id, back_id=backup.id)
+        # We shouldn't raise an exception on the call, it's OK to cancel
+        self.backup_mgr.restore_backup(self.ctxt, backup, vol_id)
+        vol = objects.Volume.get_by_id(self.ctxt, vol_id)
+        self.assertEqual('error', vol.status)
+        backup.refresh()
+        self.assertEqual(fields.BackupStatus.AVAILABLE, backup.status)
+        self.assertTrue(mock_run_restore.called)
+
    def test_restore_backup_with_bad_service(self):
        """Test error handling.

@ -1065,6 +1085,8 @@ class BackupTestCase(BaseBackupTest):

        mock_temporary_chown.assert_called_once_with('/dev/null')
        mock_get_conn.assert_called_once_with()
+        vol.status = 'available'
+        vol.obj_reset_changes()
        mock_secure_enabled.assert_called_once_with(self.ctxt, vol)
        mock_attach_device.assert_called_once_with(self.ctxt, vol,
                                                   properties)
--- a/doc/source/admin/blockstorage-volume-backups.rst
+++ b/doc/source/admin/blockstorage-volume-backups.rst
@ -203,3 +203,21 @@ the status of the source resource to see when it stops being "backing-up".
   operation on the snapshot that followed a cancellation could result in an
   error if the snapshot was still mapped.  Polling on the volume to stop being
   "backing-up" prior to the deletion is required to ensure success.
+
+Since Rocky it is also possible to cancel an ongoing restoring operation on any
+of the Chunked Backup type of drivers.
+
+To issue a backup restoration cancellation we need to alter its status to
+anything other than `restoring`.  We strongly recommend using the "error" state
+to avoid any confusion on whether the restore was successful or not.
+
+.. code-block:: console
+
+   $ openstack volume backup set --state error BACKUP_ID
+
+.. warning::
+
+   After a restore operation has started, if it is then cancelled, the
+   destination volume is useless, as there is no way of knowing how much data,
+   or if any, was actually restored, hence our recommendation of using the
+   "error" state.
--- a/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml
+++ b/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml
@ -0,0 +1,5 @@
+---
+features:
+  - |
+    Support backup restore cancelation by changing the backup status to
+    anything other than `restoring` using `cinder backup-reset-state`.