From 73c0d73aba1855edb774d69ae940a1b53dc975e0 Mon Sep 17 00:00:00 2001 From: Andrew Bogott Date: Sun, 16 Oct 2022 22:37:36 -0500 Subject: [PATCH] cinder-backup: use the same backup backend host for incremental backups Incremental backups only work if there's a previous backup to base it on. With the posix driver, this means there needs to be a previous backup on the same host where the incremental backup is created. This patch ensures that an incremental backup is scheduled on the host that that contains the base backup being used for the increment. Previously we were relying on luck for this and much of the time an incremental backup would fail due for want of a base backup. Closes-bug: 1952805 Change-Id: Id239b4150b1c8e9f4bf32f2ef867fdffbe84f96d --- cinder/backup/api.py | 7 ++ cinder/scheduler/manager.py | 7 +- cinder/tests/unit/api/contrib/test_backups.py | 94 +++++++++++++++++++ cinder/tests/unit/api/v2/fakes.py | 1 + .../tests/unit/backup/test_backup_messages.py | 3 +- cinder/tests/unit/scheduler/test_scheduler.py | 25 ++++- ...ps-on-the-wrong-node-b20b0c137f33ef03.yaml | 7 ++ 7 files changed, 138 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/bug-1952805-cinder-schedules-incremental-backups-on-the-wrong-node-b20b0c137f33ef03.yaml diff --git a/cinder/backup/api.py b/cinder/backup/api.py index c41bb15bace..fb3155cdf50 100644 --- a/cinder/backup/api.py +++ b/cinder/backup/api.py @@ -272,6 +272,7 @@ class API(base.Base): # Find the latest backup and use it as the parent backup to do an # incremental backup. latest_backup = None + latest_host = None if incremental: backups = objects.BackupList.get_all_by_volume( context, volume_id, volume['project_id'], @@ -312,6 +313,11 @@ class API(base.Base): if latest_backup: parent = latest_backup parent_id = latest_backup.id + if 'posix' in latest_backup.service: + # The posix driver needs to schedule incremental backups + # on the same host as the last backup, otherwise there's + # nothing to base the incremental backup on. + latest_host = latest_backup.host if latest_backup['status'] != fields.BackupStatus.AVAILABLE: QUOTAS.rollback(context, reservations) msg = _('No backups available to do an incremental backup.') @@ -342,6 +348,7 @@ class API(base.Base): 'snapshot_id': snapshot_id, 'data_timestamp': data_timestamp, 'parent': parent, + 'host': latest_host, 'metadata': metadata or {} } try: diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 46b5774205f..c5842844357 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -638,9 +638,10 @@ class SchedulerManager(manager.CleanableManager, manager.Manager): volume_id = backup.volume_id volume = self.db.volume_get(context, volume_id) try: - host = self.driver.get_backup_host(volume) - backup.host = host - backup.save() + if not backup.host: + host = self.driver.get_backup_host(volume) + backup.host = host + backup.save() self.backup_api.create_backup(context, backup) except exception.ServiceNotFound: self.db.volume_update(context, volume_id, diff --git a/cinder/tests/unit/api/contrib/test_backups.py b/cinder/tests/unit/api/contrib/test_backups.py index bcd3049729f..7189cba0ca4 100644 --- a/cinder/tests/unit/api/contrib/test_backups.py +++ b/cinder/tests/unit/api/contrib/test_backups.py @@ -708,6 +708,100 @@ class BackupsAPITestCase(test.TestCase): backup.destroy() volume.destroy() + # Test default behavior for backup host + def test_create_incremental_backup(self): + volume = utils.create_volume(self.context, size=5, status='in-use') + parent_backup = utils.create_backup( + self.context, volume.id, + status=fields.BackupStatus.AVAILABLE, + size=1, + availability_zone='az1', + host='parenthost', + parent=None) + backup = utils.create_backup(self.context, volume.id, + status=fields.BackupStatus.AVAILABLE, + size=1, availability_zone='az1', + host='testhost') + body = {"backup": {"name": "nightly001", + "description": + "Nightly Backup 03-Sep-2012", + "volume_id": volume.id, + "container": "nightlybackups", + "force": True, + "incremental": True, + } + } + req = webob.Request.blank('/v3/%s/backups' % fake.PROJECT_ID) + req.method = 'POST' + req.headers['Content-Type'] = 'application/json' + req.body = jsonutils.dump_as_bytes(body) + + with mock.patch.object( + cinder.objects.backup.Backup, + '_from_db_object', + wraps=cinder.objects.backup.Backup._from_db_object + ) as mocked_from_db_object: + res = req.get_response(fakes.wsgi_app( + fake_auth_context=self.user_context)) + + res_dict = jsonutils.loads(res.body) + + self.assertEqual(HTTPStatus.ACCEPTED, res.status_int) + self.assertIn('id', res_dict['backup']) + + args = mocked_from_db_object.call_args.args + + # Host should not be set yet + self.assertIsNone(args[1]['host']) + + parent_backup.destroy() + backup.destroy() + volume.destroy() + + # Test behavior for backup host w/posix backend + # (see https://bugs.launchpad.net/cinder/+bug/1952805) + def test_create_incremental_backup_posix(self): + volume = utils.create_volume(self.context, size=5, status='in-use') + parent_backup = utils.create_backup( + self.context, volume.id, + status=fields.BackupStatus.AVAILABLE, + size=1, availability_zone='az1', + host='parenthost', + service='cinder.backup.drivers.posix.PosixBackupDriver' + ) + body = {"backup": {"name": "nightly001", + "description": + "Nightly Backup 03-Sep-2012", + "volume_id": volume.id, + "container": "nightlybackups", + "force": True, + "incremental": True, + } + } + req = webob.Request.blank('/v3/%s/backups' % fake.PROJECT_ID) + req.method = 'POST' + req.headers['Content-Type'] = 'application/json' + req.body = jsonutils.dump_as_bytes(body) + + with mock.patch.object( + cinder.objects.backup.Backup, + '_from_db_object', + wraps=cinder.objects.backup.Backup._from_db_object + ) as mocked_from_db_object: + res = req.get_response(fakes.wsgi_app( + fake_auth_context=self.user_context)) + + res_dict = jsonutils.loads(res.body) + + self.assertEqual(HTTPStatus.ACCEPTED, res.status_int) + self.assertIn('id', res_dict['backup']) + + args = mocked_from_db_object.call_args.args + self.assertEqual('parenthost', args[1]['host']) + + parent_backup.destroy() + volume.destroy() + def test_create_backup_snapshot_json(self): volume = utils.create_volume(self.context, size=5, status='available') diff --git a/cinder/tests/unit/api/v2/fakes.py b/cinder/tests/unit/api/v2/fakes.py index f3248420c15..5bb66072efd 100644 --- a/cinder/tests/unit/api/v2/fakes.py +++ b/cinder/tests/unit/api/v2/fakes.py @@ -217,6 +217,7 @@ def fake_backup(id, **kwargs): 'temp_volume_id': None, 'temp_snapshot_id': None, 'snapshot_id': None, + 'service': 'cinder.fake.backup.service', 'data_timestamp': None, 'restore_volume_id': None, 'backup_metadata': {}} diff --git a/cinder/tests/unit/backup/test_backup_messages.py b/cinder/tests/unit/backup/test_backup_messages.py index c1ba0cd3f8e..136f4bfd014 100644 --- a/cinder/tests/unit/backup/test_backup_messages.py +++ b/cinder/tests/unit/backup/test_backup_messages.py @@ -256,7 +256,8 @@ class BackupUserMessagesTest(test.TestCase): manager = sch_manager.SchedulerManager() fake_context = mock.MagicMock() fake_backup = mock.MagicMock(id=fake.BACKUP_ID, - volume_id=fake.VOLUME_ID) + volume_id=fake.VOLUME_ID, + host=None) mock_get_vol.return_value = mock.MagicMock() exception.ServiceNotFound(service_id='cinder-backup') mock_get_backup_host.side_effect = exception.ServiceNotFound( diff --git a/cinder/tests/unit/scheduler/test_scheduler.py b/cinder/tests/unit/scheduler/test_scheduler.py index 8236e76696c..c2c51a0c1e0 100644 --- a/cinder/tests/unit/scheduler/test_scheduler.py +++ b/cinder/tests/unit/scheduler/test_scheduler.py @@ -596,7 +596,7 @@ class SchedulerManagerTestCase(test.TestCase): volume = fake_volume.fake_db_volume() mock_volume_get.return_value = volume mock_host.return_value = 'cinder-backup' - backup = fake_backup.fake_backup_obj(self.context) + backup = fake_backup.fake_backup_obj(self.context, host=None) self.manager.create_backup(self.context, backup=backup) @@ -605,6 +605,27 @@ class SchedulerManagerTestCase(test.TestCase): mock_volume_get.assert_called_once_with(self.context, backup.volume_id) mock_create.assert_called_once_with(self.context, backup) + @mock.patch('cinder.backup.rpcapi.BackupAPI.create_backup') + @mock.patch('cinder.objects.backup.Backup.save') + @mock.patch('cinder.scheduler.driver.Scheduler.get_backup_host') + @mock.patch('cinder.db.volume_get') + def test_create_backup_with_host(self, mock_volume_get, + mock_host, mock_save, mock_create): + volume = fake_volume.fake_db_volume() + mock_volume_get.return_value = volume + mock_host.return_value = 'cinder-backup' + backup = fake_backup.fake_backup_obj(self.context, host='testhost') + + self.manager.create_backup(self.context, backup=backup) + + # With the ready-made host, we should skip + # looking up and updating the host: + mock_save.assert_not_called() + mock_host.assert_not_called() + + mock_volume_get.assert_called_once_with(self.context, backup.volume_id) + mock_create.assert_called_once_with(self.context, backup) + @mock.patch('cinder.volume.volume_utils.update_backup_error') @mock.patch('cinder.scheduler.driver.Scheduler.get_backup_host') @mock.patch('cinder.db.volume_get') @@ -617,7 +638,7 @@ class SchedulerManagerTestCase(test.TestCase): mock_volume_get.return_value = volume mock_host.side_effect = exception.ServiceNotFound( service_id='cinder-volume') - backup = fake_backup.fake_backup_obj(self.context) + backup = fake_backup.fake_backup_obj(self.context, host=None) self.manager.create_backup(self.context, backup=backup) diff --git a/releasenotes/notes/bug-1952805-cinder-schedules-incremental-backups-on-the-wrong-node-b20b0c137f33ef03.yaml b/releasenotes/notes/bug-1952805-cinder-schedules-incremental-backups-on-the-wrong-node-b20b0c137f33ef03.yaml new file mode 100644 index 00000000000..2ddd65534c5 --- /dev/null +++ b/releasenotes/notes/bug-1952805-cinder-schedules-incremental-backups-on-the-wrong-node-b20b0c137f33ef03.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + `Bug #1952805 `_: Fixed + the cinder-backup posix driver's behavior with multiple backup + hosts. Previously cinder-backup would frequently schedule incremental + backups on the wrong host and immediately fail.