From 89f6291ee33780ed6d4e4886d5d18a0ce0cdb182 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Thu, 1 Feb 2018 13:39:56 +0100 Subject: [PATCH] Add backup restoration cancellation support Backups can be cancelled by force deleting the in-progress backup, but there is no mechanism to delete the restoring backups. We will now monitor the status of the backup and on any status change will abort the restoration and change the volume status to error. The reason to diverge from the backup creation cancellation implementation (deleting the new resource) is that the restoring may be done into an already existing volume and deleting it is not feasable for the user, for example when an instance is booted from a Cinder volume and there's a license linked to the System ID. Cinder backup drivers may now raise BackupRestoreCancel in `restore` method when a restoring operation is cancelled. Change-Id: If2f143d7ba56ae2d74b3bb571237cc053f63054e --- cinder/backup/chunkeddriver.py | 24 +++++- cinder/backup/driver.py | 3 + cinder/backup/manager.py | 14 +++- cinder/exception.py | 4 + .../unit/backup/drivers/test_backup_google.py | 14 +++- .../unit/backup/drivers/test_backup_nfs.py | 84 +++++++++++++++++-- .../unit/backup/drivers/test_backup_swift.py | 4 + cinder/tests/unit/backup/test_backup.py | 22 +++++ .../admin/blockstorage-volume-backups.rst | 18 ++++ ...eature-abort-restore-fe1252288c59e105.yaml | 5 ++ 10 files changed, 175 insertions(+), 17 deletions(-) create mode 100644 releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 6743af8f712..4296929a3e5 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -615,8 +615,14 @@ class ChunkedBackupDriver(driver.BackupDriver): self._finalize_backup(backup, container, object_meta, object_sha256) - def _restore_v1(self, backup, volume_id, metadata, volume_file): - """Restore a v1 volume backup.""" + def _restore_v1(self, backup, volume_id, metadata, volume_file, + requested_backup): + """Restore a v1 volume backup. + + Raises BackupRestoreCancel on any requested_backup status change, we + ignore the backup parameter for this check since that's only the + current data source from the list of backup sources. + """ backup_id = backup['id'] LOG.debug('v1 volume backup restore of %s started.', backup_id) extra_metadata = metadata.get('extra_metadata') @@ -637,6 +643,13 @@ class ChunkedBackupDriver(driver.BackupDriver): raise exception.InvalidBackup(reason=err) for metadata_object in metadata_objects: + # Abort when status changes to error, available, or anything else + with requested_backup.as_read_deleted(): + requested_backup.refresh() + if requested_backup.status != fields.BackupStatus.RESTORING: + raise exception.BackupRestoreCancel(back_id=backup.id, + vol_id=volume_id) + object_name, obj = list(metadata_object.items())[0] LOG.debug('restoring object. backup: %(backup_id)s, ' 'container: %(container)s, object name: ' @@ -683,7 +696,10 @@ class ChunkedBackupDriver(driver.BackupDriver): backup_id) def restore(self, backup, volume_id, volume_file): - """Restore the given volume backup from backup repository.""" + """Restore the given volume backup from backup repository. + + Raises BackupRestoreCancel on any backup status change. + """ backup_id = backup['id'] container = backup['container'] object_prefix = backup['service_metadata'] @@ -725,7 +741,7 @@ class ChunkedBackupDriver(driver.BackupDriver): backup1 = backup_list[index] index = index - 1 metadata = self._read_metadata(backup1) - restore_func(backup1, volume_id, metadata, volume_file) + restore_func(backup1, volume_id, metadata, volume_file, backup) volume_meta = metadata.get('volume_meta', None) try: diff --git a/cinder/backup/driver.py b/cinder/backup/driver.py index b085cdbe7d6..b5e5ecb7a74 100644 --- a/cinder/backup/driver.py +++ b/cinder/backup/driver.py @@ -391,6 +391,9 @@ class BackupDriver(base.Base): starvation parameter volume_file will be a proxy that will execute all methods in native threads, so the method implementation doesn't need to worry about that.. + + May raise BackupRestoreCancel to indicate that the restoration of a + volume has been aborted by changing the backup status. """ return diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index 56c6d95e18f..02cb94b8ff3 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -530,7 +530,10 @@ class BackupManager(manager.ThreadPoolManager): raise exception.InvalidBackup(reason=err) try: + canceled = False self._run_restore(context, backup, volume) + except exception.BackupRestoreCancel: + canceled = True except Exception: with excutils.save_and_reraise_exception(): self.db.volume_update(context, volume_id, @@ -538,12 +541,15 @@ class BackupManager(manager.ThreadPoolManager): backup.status = fields.BackupStatus.AVAILABLE backup.save() - self.db.volume_update(context, volume_id, {'status': 'available'}) + volume.status = 'error' if canceled else 'available' + volume.save() backup.status = fields.BackupStatus.AVAILABLE backup.save() - LOG.info('Restore backup finished, backup %(backup_id)s restored' - ' to volume %(volume_id)s.', - {'backup_id': backup.id, 'volume_id': volume_id}) + LOG.info('%(result)s restoring backup %(backup_id)s to volume ' + '%(volume_id)s.', + {'result': 'Canceled' if canceled else 'Finished', + 'backup_id': backup.id, + 'volume_id': volume_id}) self._notify_about_backup_usage(context, backup, "restore.end") def _run_restore(self, context, backup, volume): diff --git a/cinder/exception.py b/cinder/exception.py index 8f1fb872a53..ec414e4f0a2 100644 --- a/cinder/exception.py +++ b/cinder/exception.py @@ -155,6 +155,10 @@ class BackupDriverException(CinderException): message = _("Backup driver reported an error: %(message)s") +class BackupRestoreCancel(CinderException): + message = _("Canceled backup %(back_id)s restore on volume %(vol_id)s") + + class GlanceConnectionFailed(CinderException): message = _("Connection to glance failed: %(reason)s") diff --git a/cinder/tests/unit/backup/drivers/test_backup_google.py b/cinder/tests/unit/backup/drivers/test_backup_google.py index 9e2eeddb1a8..f266112ab49 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_google.py +++ b/cinder/tests/unit/backup/drivers/test_backup_google.py @@ -125,6 +125,7 @@ class GoogleBackupDriverTestCase(test.TestCase): volume_id=_DEFAULT_VOLUME_ID, container=google_dr.CONF.backup_gcs_bucket, parent_id=None, + status=None, service_metadata=None): try: @@ -138,6 +139,7 @@ class GoogleBackupDriverTestCase(test.TestCase): 'parent_id': parent_id, 'user_id': fake.USER_ID, 'project_id': fake.PROJECT_ID, + 'status': status, 'service_metadata': service_metadata, } backup = objects.Backup(context=self.ctxt, **kwargs) @@ -476,7 +478,9 @@ class GoogleBackupDriverTestCase(test.TestCase): @gcs_client def test_restore(self): volume_id = 'c2a81f09-f480-4325-8424-00000071685b' - backup = self._create_backup_db_entry(volume_id=volume_id) + backup = self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING) service = google_dr.GoogleBackupDriver(self.ctxt) with tempfile.NamedTemporaryFile() as volume_file: @@ -514,9 +518,11 @@ class GoogleBackupDriverTestCase(test.TestCase): self.volume_file.seek(20 * units.Ki) self.volume_file.write(os.urandom(units.Ki)) - deltabackup = self._create_backup_db_entry(volume_id=volume_id, - container=container_name, - parent_id=backup.id) + deltabackup = self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING, + container=container_name, + parent_id=backup.id) self.volume_file.seek(0) service2 = google_dr.GoogleBackupDriver(self.ctxt) service2.backup(deltabackup, self.volume_file, True) diff --git a/cinder/tests/unit/backup/drivers/test_backup_nfs.py b/cinder/tests/unit/backup/drivers/test_backup_nfs.py index 481ced6b4b4..3dac85e5eae 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_nfs.py +++ b/cinder/tests/unit/backup/drivers/test_backup_nfs.py @@ -127,7 +127,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): volume_id=_DEFAULT_VOLUME_ID, container='test-container', backup_id=fake.BACKUP_ID, - parent_id=None): + parent_id=None, + status=None): try: db.volume_get(self.ctxt, volume_id) @@ -141,6 +142,7 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): 'parent_id': parent_id, 'user_id': fake.USER_ID, 'project_id': fake.PROJECT_ID, + 'status': status, } return db.backup_create(self.ctxt, backup)['id'] @@ -608,6 +610,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as restored_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, restored_file) self.assertTrue(filecmp.cmp(self.volume_file.name, restored_file.name)) @@ -629,6 +633,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as restored_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, restored_file) self.assertTrue(filecmp.cmp(self.volume_file.name, restored_file.name)) @@ -649,6 +655,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): service = nfs.NFSBackupDriver(self.ctxt) self._write_effective_compression_file(file_size) backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.backup(backup, self.volume_file) with tempfile.NamedTemporaryFile() as restored_file: @@ -660,6 +668,70 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): self.assertNotEqual(threading.current_thread(), self.thread_dict['thread']) + def test_restore_abort_delta(self): + volume_id = fake.VOLUME_ID + count = set() + + def _fake_generate_object_name_prefix(self, backup): + az = 'az_fake' + backup_name = '%s_backup_%s' % (az, backup['id']) + volume = 'volume_%s' % (backup['volume_id']) + prefix = volume + '_' + backup_name + return prefix + + def my_refresh(): + # This refresh method will abort the backup after 1 chunk + count.add(len(count) + 1) + if len(count) == 2: + backup.status = objects.fields.BackupStatus.AVAILABLE + backup.save() + original_refresh() + + self.mock_object(nfs.NFSBackupDriver, + '_generate_object_name_prefix', + _fake_generate_object_name_prefix) + + self.flags(backup_file_size=(1024 * 8)) + self.flags(backup_sha_block_size_bytes=1024) + + container_name = self.temp_dir.replace(tempfile.gettempdir() + '/', + '', 1) + self._create_backup_db_entry(volume_id=volume_id, + container=container_name, + backup_id=fake.BACKUP_ID) + service = nfs.NFSBackupDriver(self.ctxt) + self.volume_file.seek(0) + backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + service.backup(backup, self.volume_file) + + # Create incremental backup with no change to contents + self.volume_file.seek(16 * 1024) + self.volume_file.write(os.urandom(1024)) + self.volume_file.seek(20 * 1024) + self.volume_file.write(os.urandom(1024)) + + self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING, + container=container_name, + backup_id=fake.BACKUP2_ID, + parent_id=fake.BACKUP_ID) + self.volume_file.seek(0) + deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + service.backup(deltabackup, self.volume_file, True) + deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + + backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + original_refresh = backup.refresh + + with tempfile.NamedTemporaryFile() as restored_file, \ + mock.patch('cinder.objects.Backup.refresh', + side_effect=my_refresh): + + self.assertRaises(exception.BackupRestoreCancel, + service.restore, backup, volume_id, + restored_file) + def test_restore_delta(self): volume_id = fake.VOLUME_ID @@ -693,10 +765,12 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): self.volume_file.seek(20 * 1024) self.volume_file.write(os.urandom(1024)) - self._create_backup_db_entry(volume_id=volume_id, - container=container_name, - backup_id=fake.BACKUP2_ID, - parent_id=fake.BACKUP_ID) + self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING, + container=container_name, + backup_id=fake.BACKUP2_ID, + parent_id=fake.BACKUP_ID) self.volume_file.seek(0) deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) service.backup(deltabackup, self.volume_file, True) diff --git a/cinder/tests/unit/backup/drivers/test_backup_swift.py b/cinder/tests/unit/backup/drivers/test_backup_swift.py index d7faf0a4b04..9a1e7075626 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_swift.py +++ b/cinder/tests/unit/backup/drivers/test_backup_swift.py @@ -700,6 +700,8 @@ class BackupSwiftTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as volume_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, volume_file) def test_restore_delta(self): @@ -748,6 +750,8 @@ class BackupSwiftTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as restored_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, restored_file) self.assertTrue(filecmp.cmp(self.volume_file.name, diff --git a/cinder/tests/unit/backup/test_backup.py b/cinder/tests/unit/backup/test_backup.py index b528d0587ba..0a412072242 100644 --- a/cinder/tests/unit/backup/test_backup.py +++ b/cinder/tests/unit/backup/test_backup.py @@ -1011,6 +1011,26 @@ class BackupTestCase(BaseBackupTest): self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status']) self.assertTrue(mock_run_restore.called) + def test_restore_backup_with_driver_cancellation(self): + """Test error handling when a restore is cancelled.""" + vol_id = self._create_volume_db_entry(status='restoring-backup', + size=1) + backup = self._create_backup_db_entry( + status=fields.BackupStatus.RESTORING, volume_id=vol_id) + + mock_run_restore = self.mock_object( + self.backup_mgr, + '_run_restore') + mock_run_restore.side_effect = exception.BackupRestoreCancel( + vol_id=vol_id, back_id=backup.id) + # We shouldn't raise an exception on the call, it's OK to cancel + self.backup_mgr.restore_backup(self.ctxt, backup, vol_id) + vol = objects.Volume.get_by_id(self.ctxt, vol_id) + self.assertEqual('error', vol.status) + backup.refresh() + self.assertEqual(fields.BackupStatus.AVAILABLE, backup.status) + self.assertTrue(mock_run_restore.called) + def test_restore_backup_with_bad_service(self): """Test error handling. @@ -1065,6 +1085,8 @@ class BackupTestCase(BaseBackupTest): mock_temporary_chown.assert_called_once_with('/dev/null') mock_get_conn.assert_called_once_with() + vol.status = 'available' + vol.obj_reset_changes() mock_secure_enabled.assert_called_once_with(self.ctxt, vol) mock_attach_device.assert_called_once_with(self.ctxt, vol, properties) diff --git a/doc/source/admin/blockstorage-volume-backups.rst b/doc/source/admin/blockstorage-volume-backups.rst index 8cc6a5f4a8b..711638dd722 100644 --- a/doc/source/admin/blockstorage-volume-backups.rst +++ b/doc/source/admin/blockstorage-volume-backups.rst @@ -203,3 +203,21 @@ the status of the source resource to see when it stops being "backing-up". operation on the snapshot that followed a cancellation could result in an error if the snapshot was still mapped. Polling on the volume to stop being "backing-up" prior to the deletion is required to ensure success. + +Since Rocky it is also possible to cancel an ongoing restoring operation on any +of the Chunked Backup type of drivers. + +To issue a backup restoration cancellation we need to alter its status to +anything other than `restoring`. We strongly recommend using the "error" state +to avoid any confusion on whether the restore was successful or not. + +.. code-block:: console + + $ openstack volume backup set --state error BACKUP_ID + +.. warning:: + + After a restore operation has started, if it is then cancelled, the + destination volume is useless, as there is no way of knowing how much data, + or if any, was actually restored, hence our recommendation of using the + "error" state. diff --git a/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml b/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml new file mode 100644 index 00000000000..1fa7e592721 --- /dev/null +++ b/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Support backup restore cancelation by changing the backup status to + anything other than `restoring` using `cinder backup-reset-state`.