diff --git a/cinder/coordination.py b/cinder/coordination.py index 7fa6f55ade0..2de82db9948 100644 --- a/cinder/coordination.py +++ b/cinder/coordination.py @@ -15,7 +15,12 @@ """Coordination and locking utilities.""" +import errno +import glob import inspect +import os +import re +import sys import uuid import decorator @@ -55,16 +60,28 @@ class Coordinator(object): self.agent_id = agent_id or str(uuid.uuid4()) self.started = False self.prefix = prefix + self._file_path = None + + def _get_file_path(self, backend_url): + if backend_url.startswith('file://'): + path = backend_url[7:] + # Copied from TooZ's _normalize_path to get the same path they use + if sys.platform == 'win32': + path = re.sub(r'\\(?=\w:\\)', '', os.path.normpath(path)) + return os.path.abspath(os.path.join(path, self.prefix)) + return None def start(self): if self.started: return + backend_url = cfg.CONF.coordination.backend_url + # NOTE(bluex): Tooz expects member_id as a byte string. member_id = (self.prefix + self.agent_id).encode('ascii') - self.coordinator = coordination.get_coordinator( - cfg.CONF.coordination.backend_url, member_id) + self.coordinator = coordination.get_coordinator(backend_url, member_id) self.coordinator.start(start_heart=True) + self._file_path = self._get_file_path(backend_url) self.started = True def stop(self): @@ -87,10 +104,28 @@ class Coordinator(object): else: raise exception.LockCreationFailed(_('Coordinator uninitialized.')) + def remove_lock(self, glob_name): + # Most locks clean up on release, but not the file lock, so we manually + # clean them. + if self._file_path: + files = glob.glob(self._file_path + glob_name) + for file_name in files: + try: + os.remove(file_name) + except Exception as exc: + if not (isinstance(exc, OSError) and + exc.errno == errno.ENOENT): + LOG.warning('Failed to cleanup lock %(name)s: %(exc)s', + {'name': file_name, 'exc': exc}) + COORDINATOR = Coordinator(prefix='cinder-') +def synchronized_remove(glob_name, coordinator=COORDINATOR): + coordinator.remove_lock(glob_name) + + def synchronized(lock_name, blocking=True, coordinator=COORDINATOR): """Synchronization decorator. diff --git a/cinder/tests/unit/api/v3/test_attachments.py b/cinder/tests/unit/api/v3/test_attachments.py index 24934e67b53..3f27cb28a26 100644 --- a/cinder/tests/unit/api/v3/test_attachments.py +++ b/cinder/tests/unit/api/v3/test_attachments.py @@ -216,7 +216,7 @@ class AttachmentsAPITestCase(test.TestCase): def _create_attachment(self, ctxt=None, volume_uuid=None, instance_uuid=None, mountpoint=None, attach_time=None, detach_time=None, - attach_status=None, attach_mode=None, host=None): + attach_status=None, attach_mode=None, host=''): """Create an attachment object.""" ctxt = ctxt or self.ctxt attachment = objects.VolumeAttachment(ctxt) diff --git a/cinder/tests/unit/test_coordination.py b/cinder/tests/unit/test_coordination.py index 365e37aab99..e291c84fde9 100644 --- a/cinder/tests/unit/test_coordination.py +++ b/cinder/tests/unit/test_coordination.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import errno import inspect from unittest import mock @@ -43,14 +44,18 @@ class MockToozLock(tooz.locking.Lock): self.active_locks.remove(self.name) -@mock.patch('tooz.coordination.get_coordinator') class CoordinatorTestCase(test.TestCase): MOCK_TOOZ = False - def test_coordinator_start(self, get_coordinator): + @mock.patch('cinder.coordination.cfg.CONF.coordination.backend_url') + @mock.patch('cinder.coordination.Coordinator._get_file_path') + @mock.patch('tooz.coordination.get_coordinator') + def test_coordinator_start(self, get_coordinator, mock_get_file_path, + mock_backend_url): crd = get_coordinator.return_value agent = coordination.Coordinator() + self.assertIsNone(agent._file_path) agent.start() self.assertTrue(get_coordinator.called) self.assertTrue(crd.start.called) @@ -58,6 +63,10 @@ class CoordinatorTestCase(test.TestCase): agent.start() crd.start.assert_called_once_with(start_heart=True) + mock_get_file_path.assert_called_once_with(mock_backend_url) + self.assertEqual(mock_get_file_path.return_value, agent._file_path) + + @mock.patch('tooz.coordination.get_coordinator') def test_coordinator_stop(self, get_coordinator): crd = get_coordinator.return_value @@ -71,6 +80,7 @@ class CoordinatorTestCase(test.TestCase): agent.stop() crd.stop.assert_called_once_with() + @mock.patch('tooz.coordination.get_coordinator') def test_coordinator_lock(self, get_coordinator): crd = get_coordinator.return_value crd.get_lock.side_effect = lambda n: MockToozLock(n) @@ -90,6 +100,7 @@ class CoordinatorTestCase(test.TestCase): self.assertRaises(Locked, agent2.get_lock(lock_name).acquire) self.assertNotIn(expected_name, MockToozLock.active_locks) + @mock.patch('tooz.coordination.get_coordinator') def test_coordinator_offline(self, get_coordinator): crd = get_coordinator.return_value crd.start.side_effect = tooz.coordination.ToozConnectionError('err') @@ -98,9 +109,82 @@ class CoordinatorTestCase(test.TestCase): self.assertRaises(tooz.coordination.ToozError, agent.start) self.assertFalse(agent.started) + def test_get_file_path(self): + backend_url = 'file:///opt/stack/data/cinder' + res = coordination.COORDINATOR._get_file_path(backend_url) + self.assertEqual('/opt/stack/data/cinder/cinder-', res) + + def test_get_file_path_non_file(self): + backend_url = 'etcd3+http://192.168.1.95:2379' + res = coordination.COORDINATOR._get_file_path(backend_url) + self.assertIsNone(res) + + @mock.patch('cinder.coordination.COORDINATOR._file_path', None) + @mock.patch('glob.glob') + @mock.patch('os.remove') + def test_remove_lock_non_file_lock(self, mock_remove, mock_glob): + coordination.COORDINATOR.remove_lock('lock-file') + mock_glob.assert_not_called() + mock_remove.assert_not_called() + + @mock.patch('cinder.coordination.COORDINATOR._file_path', '/data/cinder-') + @mock.patch('glob.glob') + @mock.patch('os.remove') + def test_remove_lock(self, mock_remove, mock_glob): + mock_glob.return_value = ['/data/cinder-attachment_update-UUID-1', + '/data/cinder-attachment_update-UUID-2'] + + coordination.COORDINATOR.remove_lock('attachment_update-UUID-*') + + mock_glob.assert_called_once_with( + '/data/cinder-attachment_update-UUID-*') + self.assertEqual(2, mock_remove.call_count) + mock_remove.has_calls( + [mock.call('/data/cinder-attachment_update-UUID-1'), + mock.call('/data/cinder-attachment_update-UUID-2')]) + + @mock.patch('cinder.coordination.COORDINATOR._file_path', '/data/cinder-') + @mock.patch('cinder.coordination.LOG.warning') + @mock.patch('glob.glob') + @mock.patch('os.remove') + def test_remove_lock_missing_file(self, mock_remove, mock_glob, mock_log): + mock_glob.return_value = ['/data/cinder-attachment_update-UUID-1', + '/data/cinder-attachment_update-UUID-2'] + mock_remove.side_effect = [OSError(errno.ENOENT, ''), None] + + coordination.COORDINATOR.remove_lock('attachment_update-UUID-*') + + mock_glob.assert_called_once_with( + '/data/cinder-attachment_update-UUID-*') + self.assertEqual(2, mock_remove.call_count) + mock_remove.has_calls( + [mock.call('/data/cinder-attachment_update-UUID-1'), + mock.call('/data/cinder-attachment_update-UUID-2')]) + mock_log.assert_not_called() + + @mock.patch('cinder.coordination.COORDINATOR._file_path', '/data/cinder-') + @mock.patch('cinder.coordination.LOG.warning') + @mock.patch('glob.glob') + @mock.patch('os.remove') + def test_remove_lock_unknown_failure(self, mock_remove, mock_glob, + mock_log): + mock_glob.return_value = ['/data/cinder-attachment_update-UUID-1', + '/data/cinder-attachment_update-UUID-2'] + mock_remove.side_effect = [ValueError(), None] + + coordination.COORDINATOR.remove_lock('attachment_update-UUID-*') + + mock_glob.assert_called_once_with( + '/data/cinder-attachment_update-UUID-*') + self.assertEqual(2, mock_remove.call_count) + mock_remove.has_calls( + [mock.call('/data/cinder-attachment_update-UUID-1'), + mock.call('/data/cinder-attachment_update-UUID-2')]) + self.assertEqual(1, mock_log.call_count) + -@mock.patch.object(coordination.COORDINATOR, 'get_lock') class CoordinationTestCase(test.TestCase): + @mock.patch.object(coordination.COORDINATOR, 'get_lock') def test_synchronized(self, get_lock): @coordination.synchronized('lock-{f_name}-{foo.val}-{bar[val]}') def func(foo, bar): @@ -113,3 +197,15 @@ class CoordinationTestCase(test.TestCase): func(foo, bar) get_lock.assert_called_with('lock-func-7-8') self.assertEqual(['foo', 'bar'], inspect.getfullargspec(func)[0]) + + @mock.patch('cinder.coordination.COORDINATOR.remove_lock') + def test_synchronized_remove(self, mock_remove): + coordination.synchronized_remove(mock.sentinel.glob_name) + mock_remove.assert_called_once_with(mock.sentinel.glob_name) + + @mock.patch('cinder.coordination.COORDINATOR.remove_lock') + def test_synchronized_remove_custom_coordinator(self, mock_remove): + coordinator = mock.Mock() + coordination.synchronized_remove(mock.sentinel.glob_name, coordinator) + coordinator.remove_lock.assert_called_once_with( + mock.sentinel.glob_name) diff --git a/cinder/tests/unit/test_utils.py b/cinder/tests/unit/test_utils.py index b97c5c533a6..e4184179275 100644 --- a/cinder/tests/unit/test_utils.py +++ b/cinder/tests/unit/test_utils.py @@ -1443,3 +1443,65 @@ class TestKeystoneProjectGet(test.TestCase): project = api_utils.get_project( self.context, self.context.project_id) self.assertEqual(expected_project.__dict__, project.__dict__) + + +class TestCleanFileLocks(test.TestCase): + + @mock.patch('cinder.utils.LOG.warning') + @mock.patch('cinder.utils.synchronized_remove') + def test_clean_volume_file_locks(self, mock_remove, mock_log): + driver = mock.Mock() + + utils.clean_volume_file_locks('UUID', driver) + + self.assertEqual(3, mock_remove.call_count) + mock_remove.assert_has_calls([mock.call('UUID-delete_volume'), + mock.call('UUID'), + mock.call('UUID-detach_volume')]) + driver.clean_volume_file_locks.assert_called_once_with('UUID') + mock_log.assert_not_called() + + @mock.patch('cinder.utils.LOG.warning') + @mock.patch('cinder.utils.synchronized_remove') + def test_clean_volume_file_locks_errors(self, mock_remove, mock_log): + driver = mock.Mock() + driver.clean_volume_file_locks.side_effect = Exception + mock_remove.side_effect = [None, Exception, None] + + utils.clean_volume_file_locks('UUID', driver) + + self.assertEqual(3, mock_remove.call_count) + mock_remove.assert_has_calls([mock.call('UUID-delete_volume'), + mock.call('UUID'), + mock.call('UUID-detach_volume')]) + driver.clean_volume_file_locks.assert_called_once_with('UUID') + self.assertEqual(2, mock_log.call_count) + + @mock.patch('cinder.utils.LOG.warning') + @mock.patch('cinder.utils.synchronized_remove') + def test_clean_snapshot_file_locks(self, mock_remove, mock_log): + driver = mock.Mock() + + utils.clean_snapshot_file_locks('UUID', driver) + + mock_remove.assert_called_once_with('UUID-delete_snapshot') + driver.clean_snapshot_file_locks.assert_called_once_with('UUID') + mock_log.assert_not_called() + + @mock.patch('cinder.utils.LOG.warning') + @mock.patch('cinder.utils.synchronized_remove') + def test_clean_snapshot_file_locks_failures(self, mock_remove, mock_log): + driver = mock.Mock() + driver.clean_snapshot_file_locks.side_effect = Exception + mock_remove.side_effect = Exception + + utils.clean_snapshot_file_locks('UUID', driver) + + mock_remove.assert_called_once_with('UUID-delete_snapshot') + driver.clean_snapshot_file_locks.assert_called_once_with('UUID') + self.assertEqual(2, mock_log.call_count) + + @mock.patch('cinder.coordination.synchronized_remove') + def test_api_clean_volume_file_locks(self, mock_remove): + utils.api_clean_volume_file_locks('UUID') + mock_remove.assert_called_once_with('attachment_update-UUID-*') diff --git a/cinder/tests/unit/volume/__init__.py b/cinder/tests/unit/volume/__init__.py index 8f8d9de70fd..a8f8f9a5eda 100644 --- a/cinder/tests/unit/volume/__init__.py +++ b/cinder/tests/unit/volume/__init__.py @@ -81,6 +81,10 @@ class BaseVolumeTestCase(test.TestCase): self.called = [] self.volume_api = volume_api.API() + # Don't accidentaly make a call to delete a file from the system + self.mock_lock_remove = self.patch('cinder.utils.synchronized_remove') + self.mock_dlm_lock_remove = self.patch('cinder.coordination.os.remove') + def _cleanup(self): try: shutil.rmtree(CONF.volumes_dir) diff --git a/cinder/tests/unit/volume/test_snapshot.py b/cinder/tests/unit/volume/test_snapshot.py index 672a4566b40..9f6129ad915 100644 --- a/cinder/tests/unit/volume/test_snapshot.py +++ b/cinder/tests/unit/volume/test_snapshot.py @@ -76,13 +76,15 @@ class SnapshotTestCase(base.BaseVolumeTestCase): self.vol_type = db.volume_type_get_by_name(self.context, 'vol_type_name') - def test_delete_snapshot_frozen(self): + @mock.patch('cinder.utils.clean_snapshot_file_locks') + def test_delete_snapshot_frozen(self, mock_clean): service = tests_utils.create_service(self.context, {'frozen': True}) volume = tests_utils.create_volume(self.context, host=service.host) snapshot = tests_utils.create_snapshot(self.context, volume.id) self.assertRaises(exception.InvalidInput, self.volume_api.delete_snapshot, self.context, snapshot) + mock_clean.assert_not_called() @ddt.data('create_snapshot', 'create_snapshot_force') def test_create_snapshot_frozen(self, method): @@ -114,8 +116,9 @@ class SnapshotTestCase(base.BaseVolumeTestCase): self.volume.delete_snapshot(self.context, snapshot_obj) self.volume.delete_volume(self.context, volume_src) + @mock.patch('cinder.utils.clean_snapshot_file_locks') @mock.patch('cinder.tests.unit.fake_notifier.FakeNotifier._notify') - def test_create_delete_snapshot(self, mock_notify): + def test_create_delete_snapshot(self, mock_notify, mock_clean): """Test snapshot can be created and deleted.""" volume = tests_utils.create_volume( self.context, @@ -146,6 +149,7 @@ class SnapshotTestCase(base.BaseVolumeTestCase): any_order=True) self.volume.delete_snapshot(self.context, snapshot) + mock_clean.assert_called_once_with(snapshot.id, self.volume.driver) self.assert_notify_called(mock_notify, (['INFO', 'volume.create.start'], ['INFO', 'volume.create.end'], @@ -514,7 +518,8 @@ class SnapshotTestCase(base.BaseVolumeTestCase): snapshot.destroy() db.volume_destroy(self.context, volume_id) - def test_delete_busy_snapshot(self): + @mock.patch('cinder.utils.clean_snapshot_file_locks') + def test_delete_busy_snapshot(self, mock_clean): """Test snapshot can be created and deleted.""" self.volume.driver.vg = fake_lvm.FakeBrickLVM('cinder-volumes', @@ -540,9 +545,11 @@ class SnapshotTestCase(base.BaseVolumeTestCase): self.assertEqual(fields.SnapshotStatus.AVAILABLE, snapshot_ref.status) mock_del_snap.assert_called_once_with(snapshot) + mock_clean.assert_not_called() + @mock.patch('cinder.utils.clean_snapshot_file_locks') @test.testtools.skipIf(sys.platform == "darwin", "SKIP on OSX") - def test_delete_no_dev_fails(self): + def test_delete_no_dev_fails(self, mock_clean): """Test delete snapshot with no dev file fails.""" self.mock_object(os.path, 'exists', lambda x: False) self.volume.driver.vg = fake_lvm.FakeBrickLVM('cinder-volumes', @@ -567,6 +574,7 @@ class SnapshotTestCase(base.BaseVolumeTestCase): self.assertEqual(fields.SnapshotStatus.AVAILABLE, snapshot_ref.status) mock_del_snap.assert_called_once_with(snapshot) + mock_clean.assert_not_called() def test_force_delete_snapshot(self): """Test snapshot can be forced to delete.""" @@ -618,7 +626,8 @@ class SnapshotTestCase(base.BaseVolumeTestCase): self.context, snap) - def test_delete_snapshot_driver_not_initialized(self): + @mock.patch('cinder.utils.clean_snapshot_file_locks') + def test_delete_snapshot_driver_not_initialized(self, mock_clean): volume = tests_utils.create_volume(self.context, **self.volume_params) snapshot = tests_utils.create_snapshot(self.context, volume.id) @@ -630,6 +639,7 @@ class SnapshotTestCase(base.BaseVolumeTestCase): snapshot.refresh() self.assertEqual(fields.SnapshotStatus.ERROR_DELETING, snapshot.status) + mock_clean.assert_not_called() @ddt.data({'all_tenants': '1', 'name': 'snap1'}, {'all_tenants': 'true', 'name': 'snap1'}, diff --git a/cinder/tests/unit/volume/test_volume.py b/cinder/tests/unit/volume/test_volume.py index 3b5108b98eb..6a4cc5ccefd 100644 --- a/cinder/tests/unit/volume/test_volume.py +++ b/cinder/tests/unit/volume/test_volume.py @@ -346,11 +346,13 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.assertEqual("error_deleting", volume.status) volume.destroy() + @mock.patch('cinder.utils.clean_volume_file_locks') @mock.patch('cinder.tests.unit.fake_notifier.FakeNotifier._notify') @mock.patch('cinder.quota.QUOTAS.rollback', new=mock.Mock()) @mock.patch('cinder.quota.QUOTAS.commit', new=mock.Mock()) @mock.patch('cinder.quota.QUOTAS.reserve', return_value=['RESERVATION']) - def test_create_delete_volume(self, _mock_reserve, mock_notify): + def test_create_delete_volume(self, _mock_reserve, mock_notify, + mock_clean): """Test volume can be created and deleted.""" volume = tests_utils.create_volume( self.context, @@ -387,6 +389,7 @@ class VolumeTestCase(base.BaseVolumeTestCase): db.volume_get, self.context, volume_id) + mock_clean.assert_called_once_with(volume_id, self.volume.driver) def test_create_delete_volume_with_metadata(self): """Test volume can be created with metadata and deleted.""" @@ -403,11 +406,13 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.context, volume_id) - def test_delete_volume_frozen(self): + @mock.patch('cinder.utils.clean_volume_file_locks') + def test_delete_volume_frozen(self, mock_clean): service = tests_utils.create_service(self.context, {'frozen': True}) volume = tests_utils.create_volume(self.context, host=service.host) self.assertRaises(exception.InvalidInput, self.volume_api.delete, self.context, volume) + mock_clean.assert_not_called() def test_delete_volume_another_cluster_fails(self): """Test delete of volume from another cluster fails.""" @@ -564,7 +569,8 @@ class VolumeTestCase(base.BaseVolumeTestCase): 'fake_key1', FAKE_METADATA_TYPE.fake_type) - def test_delete_volume_metadata_maintenance(self): + @mock.patch('cinder.utils.clean_volume_file_locks') + def test_delete_volume_metadata_maintenance(self, mock_clean): """Test delete volume metadata in maintenance.""" FAKE_METADATA_TYPE = enum.Enum('METADATA_TYPES', 'fake_type') test_meta1 = {'fake_key1': 'fake_value1', 'fake_key2': 'fake_value2'} @@ -577,6 +583,7 @@ class VolumeTestCase(base.BaseVolumeTestCase): volume, 'fake_key1', FAKE_METADATA_TYPE.fake_type) + mock_clean.assert_not_called() def test_accept_transfer_maintenance(self): """Test accept transfer in maintenance.""" @@ -974,7 +981,8 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.assertEqual("deleting", volume.status) volume.destroy() - def test_delete_busy_volume(self): + @mock.patch('cinder.utils.clean_volume_file_locks') + def test_delete_busy_volume(self, mock_clean): """Test volume survives deletion if driver reports it as busy.""" volume = tests_utils.create_volume(self.context, **self.volume_params) volume_id = volume['id'] @@ -989,8 +997,10 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.assertEqual(volume_id, volume_ref.id) self.assertEqual("available", volume_ref.status) mock_del_vol.assert_called_once_with(volume) + mock_clean.assert_not_called() - def test_unmanage_encrypted_volume_fails(self): + @mock.patch('cinder.utils.clean_volume_file_locks') + def test_unmanage_encrypted_volume_fails(self, mock_clean): volume = tests_utils.create_volume( self.context, encryption_key_id=fake.ENCRYPTION_KEY_ID, @@ -1002,6 +1012,7 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.context, volume, unmanage_only=True) + mock_clean.assert_not_called() self.volume.delete_volume(self.context, volume) def test_unmanage_cascade_delete_fails(self): @@ -1082,7 +1093,8 @@ class VolumeTestCase(base.BaseVolumeTestCase): volume_api.get_all(self.context, filters={'all_tenants': '1'}) self.assertTrue(get_all.called) - def test_delete_volume_in_error_extending(self): + @mock.patch('cinder.utils.clean_volume_file_locks') + def test_delete_volume_in_error_extending(self, mock_clean): """Test volume can be deleted in error_extending stats.""" # create a volume volume = tests_utils.create_volume(self.context, **self.volume_params) @@ -1094,16 +1106,19 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.volume.delete_volume(self.context, volume) self.assertRaises(exception.NotFound, db.volume_get, self.context, volume['id']) + mock_clean.assert_called_once_with(volume.id, self.volume.driver) + @mock.patch('cinder.utils.clean_volume_file_locks') @mock.patch.object(db.sqlalchemy.api, 'volume_get', side_effect=exception.VolumeNotFound( volume_id='12345678-1234-5678-1234-567812345678')) - def test_delete_volume_not_found(self, mock_get_volume): + def test_delete_volume_not_found(self, mock_get_volume, mock_clean): """Test delete volume moves on if the volume does not exist.""" volume_id = '12345678-1234-5678-1234-567812345678' volume = objects.Volume(self.context, status='available', id=volume_id) self.volume.delete_volume(self.context, volume) self.assertTrue(mock_get_volume.called) + mock_clean.assert_called_once_with(volume_id, self.volume.driver) @mock.patch('cinder.volume.drivers.lvm.LVMVolumeDriver.' 'create_volume_from_snapshot') @@ -2119,7 +2134,8 @@ class VolumeTestCase(base.BaseVolumeTestCase): """Test volume can't be deleted in maintenance status.""" self._test_cannot_delete_volume('maintenance') - def _test_cannot_delete_volume(self, status): + @mock.patch('cinder.utils.clean_volume_file_locks') + def _test_cannot_delete_volume(self, status, mock_clean): """Test volume can't be deleted in invalid stats.""" # create a volume and assign to host volume = tests_utils.create_volume(self.context, CONF.host, @@ -2130,6 +2146,7 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.volume_api.delete, self.context, volume) + mock_clean.assert_not_called() # clean up self.volume.delete_volume(self.context, volume) @@ -2172,7 +2189,8 @@ class VolumeTestCase(base.BaseVolumeTestCase): db.volume_destroy(self.context, volume.id) - def test__revert_to_snapshot_generic_failed(self): + @mock.patch('cinder.utils.clean_volume_file_locks') + def test__revert_to_snapshot_generic_failed(self, mock_clean): fake_volume = tests_utils.create_volume(self.context, status='available') fake_snapshot = tests_utils.create_snapshot(self.context, @@ -2197,8 +2215,11 @@ class VolumeTestCase(base.BaseVolumeTestCase): mock_copy.assert_called_once_with( self.context, temp_volume, fake_volume) mock_driver_delete.assert_called_once_with(temp_volume) + mock_clean.assert_called_once_with(temp_volume.id, + self.volume.driver) - def test__revert_to_snapshot_generic(self): + @mock.patch('cinder.utils.clean_volume_file_locks') + def test__revert_to_snapshot_generic(self, mock_clean): fake_volume = tests_utils.create_volume(self.context, status='available') fake_snapshot = tests_utils.create_snapshot(self.context, @@ -2218,6 +2239,8 @@ class VolumeTestCase(base.BaseVolumeTestCase): mock_copy.assert_called_once_with( self.context, temp_volume, fake_volume) mock_driver_delete.assert_called_once_with(temp_volume) + mock_clean.assert_called_once_with(temp_volume.id, + self.volume.driver) @ddt.data({'driver_error': True}, {'driver_error': False}) @@ -3133,14 +3156,16 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.assertRaises(exception.VolumeNotFound, self.volume.create_volume, self.context, test_vol, - {'volume_properties': self.volume_params}, + {'volume_properties': self.volume_params, + 'source_volid': fake.VOLUME_ID}, {'retry': {'num_attempts': 1, 'host': []}}) volume = db.volume_get(context.get_admin_context(), test_vol_id) self.assertEqual('error', volume['status']) self.assertEqual({'_pool0': {'allocated_capacity_gb': 1}}, self.volume.stats['pools']) - def test_cascade_delete_volume_with_snapshots(self): + @mock.patch('cinder.utils.api_clean_volume_file_locks') + def test_cascade_delete_volume_with_snapshots(self, mock_api_clean): """Test volume deletion with dependent snapshots.""" volume = tests_utils.create_volume(self.context, **self.volume_params) self.volume.create_volume(self.context, volume) @@ -3158,8 +3183,10 @@ class VolumeTestCase(base.BaseVolumeTestCase): volume_api.delete(self.context, volume, cascade=True) + mock_api_clean.assert_called_once_with(volume.id) - def test_cascade_delete_volume_with_snapshots_error(self): + @mock.patch('cinder.utils.api_clean_volume_file_locks') + def test_cascade_delete_volume_with_snapshots_error(self, mock_api_clean): """Test volume deletion with dependent snapshots.""" volume = tests_utils.create_volume(self.context, **self.volume_params) self.volume.create_volume(self.context, volume) @@ -3182,8 +3209,11 @@ class VolumeTestCase(base.BaseVolumeTestCase): self.context, volume, cascade=True) + mock_api_clean.assert_not_called() - def test_cascade_force_delete_volume_with_snapshots_error(self): + @mock.patch('cinder.utils.api_clean_volume_file_locks') + def test_cascade_force_delete_volume_with_snapshots_error(self, + mock_api_clean): """Test volume force deletion with errored dependent snapshots.""" volume = tests_utils.create_volume(self.context, host='fakehost') @@ -3202,6 +3232,7 @@ class VolumeTestCase(base.BaseVolumeTestCase): volume = objects.Volume.get_by_id(self.context, volume.id) self.assertEqual('deleting', volume.status) + mock_api_clean.assert_called_once_with(volume.id) def test_cascade_delete_volume_with_snapshots_in_other_project(self): """Test volume deletion with dependent snapshots in other project.""" diff --git a/cinder/utils.py b/cinder/utils.py index fcbb9f6af31..b96df875f4b 100644 --- a/cinder/utils.py +++ b/cinder/utils.py @@ -57,6 +57,7 @@ from oslo_utils import strutils from oslo_utils import timeutils import tenacity +from cinder import coordination from cinder import exception from cinder.i18n import _ @@ -69,6 +70,48 @@ INFINITE_UNKNOWN_VALUES = ('infinite', 'unknown') synchronized = lockutils.synchronized_with_prefix('cinder-') +synchronized_remove = lockutils.remove_external_lock_file_with_prefix( + 'cinder-') + + +def clean_volume_file_locks(volume_id, driver): + """Remove file locks used by Cinder. + + This doesn't take care of driver locks, those should be handled in driver's + delete_volume method. + """ + for name in (volume_id + '-delete_volume', volume_id, + volume_id + '-detach_volume'): + try: + synchronized_remove(name) + except Exception as exc: + LOG.warning('Failed to cleanup volume lock %(name)s: %(exc)s', + {'name': name, 'exc': exc}) + + try: + driver.clean_volume_file_locks(volume_id) + except Exception as exc: + LOG.warning('Failed to cleanup driver locks for volume %(id)s: ' + '%(exc)s', {'id': volume_id, 'exc': exc}) + + +def api_clean_volume_file_locks(volume_id): + coordination.synchronized_remove('attachment_update-' + volume_id + '-*') + + +def clean_snapshot_file_locks(snapshot_id, driver): + try: + name = snapshot_id + '-delete_snapshot' + synchronized_remove(name) + except Exception as exc: + LOG.warning('Failed to cleanup snapshot lock %(name)s: %(exc)s', + {'name': name, 'exc': exc}) + + try: + driver.clean_snapshot_file_locks(snapshot_id) + except Exception as exc: + LOG.warning('Failed to cleanup driver locks for snapshot %(id)s: ' + '%(exc)s', {'id': snapshot_id, 'exc': exc}) def as_int(obj: Union[int, float, str], quiet: bool = True) -> int: diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 8c5df3f50b7..fdd0c5f46bc 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -425,6 +425,7 @@ class API(base.Base): LOG.info("Delete volume request issued successfully.", resource={'type': 'volume', 'id': volume.id}) + utils.api_clean_volume_file_locks(volume.id) return if not unmanage_only: @@ -533,6 +534,7 @@ class API(base.Base): volume, unmanage_only, cascade) + utils.api_clean_volume_file_locks(volume.id) LOG.info("Delete volume request issued successfully.", resource=volume) @@ -2297,6 +2299,7 @@ class API(base.Base): volume.status = status_updates['status'] volume.attach_status = status_updates['attach_status'] volume.save() + return remaining_attachments diff --git a/cinder/volume/driver.py b/cinder/volume/driver.py index e5d434e0ef2..1433d211db1 100644 --- a/cinder/volume/driver.py +++ b/cinder/volume/driver.py @@ -620,6 +620,9 @@ class BaseVD(object, metaclass=abc.ABCMeta): It is imperative that this operation ensures that the data from the deleted volume cannot leak into new volumes when they are created, as new volumes are likely to belong to a different tenant/project. + + If the driver uses custom file locks they should be cleaned on success + using cinder.utils.synchronized_remove """ return @@ -1953,6 +1956,50 @@ class BaseVD(object, metaclass=abc.ABCMeta): return [CONF.backend_defaults._group._opts[cfg_name]['opt'] for cfg_name in cfg_names] + @classmethod + def clean_volume_file_locks(cls, volume_id): + """Clean up driver specific volume locks. + + This method will be called when a volume has been removed from Cinder + or when we detect that the volume doesn't exist. + + There are 3 types of locks in Cinder: + + - Process locks: Don't need cleanup + - Node locks: Must use cinder.utils.synchronized_remove + - Global locks: Must use cinder.coordination.synchronized_remove + + When using method cinder.utils.synchronized_remove we must pass the + exact lock name, whereas method cinder.coordination.synchronized_remove + accepts a glob. + + Refer to clean_volume_file_locks, api_clean_volume_file_locks, and + clean_snapshot_file_locks in cinder.utils for examples. + """ + pass + + @classmethod + def clean_snapshot_file_locks(self, snapshot_id): + """Clean up driver specific snapshot locks. + + This method will be called when a snapshot has been removed from cinder + or when we detect that the snapshot doesn't exist. + + There are 3 types of locks in Cinder: + + - Process locks: Don't need cleanup + - Node locks: Must use cinder.utils.synchronized_remove + - Global locks: Must use cinder.coordination.synchronized_remove + + When using method cinder.utils.synchronized_remove we must pass the + exact lock name, whereas method cinder.coordination.synchronized_remove + accepts a glob. + + Refer to clean_volume_file_locks, api_clean_volume_file_locks, and + clean_snapshot_file_locks in cinder.utils for examples. + """ + pass + class CloneableImageVD(object, metaclass=abc.ABCMeta): @abc.abstractmethod @@ -2217,7 +2264,11 @@ class VolumeDriver(ManageableVD, CloneableImageVD, ManageableSnapshotsVD, raise NotImplementedError() def delete_snapshot(self, snapshot): - """Deletes a snapshot.""" + """Deletes a snapshot. + + If the driver uses custom file locks they should be cleaned on success + using cinder.utils.synchronized_remove + """ raise NotImplementedError() def local_path(self, volume): diff --git a/cinder/volume/drivers/hpe/hpe_3par_base.py b/cinder/volume/drivers/hpe/hpe_3par_base.py index b6eb1d9ec2e..67ecd4d3336 100644 --- a/cinder/volume/drivers/hpe/hpe_3par_base.py +++ b/cinder/volume/drivers/hpe/hpe_3par_base.py @@ -32,6 +32,7 @@ except ImportError: from oslo_log import log as logging +from cinder import coordination from cinder import exception from cinder.i18n import _ from cinder.volume import driver @@ -668,3 +669,7 @@ class HPE3PARDriverBase(driver.ManageableVD, default="normal") return properties, 'HPE:3PAR' + + @classmethod + def clean_volume_file_locks(cls, volume_id): + coordination.synchronized_remove('3par-' + volume_id) diff --git a/cinder/volume/flows/manager/create_volume.py b/cinder/volume/flows/manager/create_volume.py index 7775e82b716..7ad0313d131 100644 --- a/cinder/volume/flows/manager/create_volume.py +++ b/cinder/volume/flows/manager/create_volume.py @@ -75,7 +75,7 @@ class OnFailureRescheduleTask(flow_utils.CinderTask): this volume elsewhere. """ - def __init__(self, reschedule_context, db, driver, scheduler_rpcapi, + def __init__(self, reschedule_context, db, manager, scheduler_rpcapi, do_reschedule): requires = ['filter_properties', 'request_spec', 'volume', 'context'] @@ -84,7 +84,7 @@ class OnFailureRescheduleTask(flow_utils.CinderTask): self.do_reschedule = do_reschedule self.scheduler_rpcapi = scheduler_rpcapi self.db = db - self.driver = driver + self.manager = manager self.reschedule_context = reschedule_context # These exception types will trigger the volume to be set into error # status rather than being rescheduled. @@ -175,7 +175,7 @@ class OnFailureRescheduleTask(flow_utils.CinderTask): # host field will be erased. Just in case volume was already created at # the backend, we attempt to delete it. try: - self.driver.delete_volume(volume) + self.manager.driver_delete_volume(volume) except Exception: # Most likely the volume weren't created at the backend. We can # safely ignore this. @@ -1257,7 +1257,7 @@ def get_flow(context, manager, db, driver, scheduler_rpcapi, host, volume, # status when reverting the flow. Meanwhile, no need to revert process of # ExtractVolumeRefTask. do_reschedule = allow_reschedule and request_spec and retry - volume_flow.add(OnFailureRescheduleTask(reschedule_context, db, driver, + volume_flow.add(OnFailureRescheduleTask(reschedule_context, db, manager, scheduler_rpcapi, do_reschedule)) LOG.debug("Volume reschedule parameters: %(allow)s " diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index f40149db55b..800d94c4c64 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -35,6 +35,7 @@ intact. """ +import functools import time import typing as ty @@ -74,6 +75,7 @@ from cinder.objects import cgsnapshot from cinder.objects import consistencygroup from cinder.objects import fields from cinder import quota +from cinder import utils from cinder import volume as cinder_volume from cinder.volume import configuration as config from cinder.volume.flows.manager import create_volume @@ -184,6 +186,37 @@ MAPPING = { } +def clean_volume_locks(func): + @functools.wraps(func) + def wrapper(self, context, volume, *args, **kwargs): + try: + skip_clean = func(self, context, volume, *args, **kwargs) + except Exception: + # On quota failure volume will have been deleted from the DB + skip_clean = not volume.deleted + raise + finally: + if not skip_clean: + # Most TooZ drivers clean after themselves (like etcd3), so + # we clean TooZ file locks that are the same as oslo's. + utils.clean_volume_file_locks(volume.id, self.driver) + return wrapper + + +def clean_snapshot_locks(func): + @functools.wraps(func) + def wrapper(self, context, snapshot, *args, **kwargs): + try: + skip_clean = func(self, context, snapshot, *args, **kwargs) + except Exception: + skip_clean = not snapshot.deleted + raise + finally: + if not skip_clean: + utils.clean_snapshot_file_locks(snapshot.id, self.driver) + return wrapper + + class VolumeManager(manager.CleanableManager, manager.SchedulerDependentManager): """Manages attachable block storage devices.""" @@ -772,6 +805,12 @@ class VolumeManager(manager.CleanableManager, else: with coordination.COORDINATOR.get_lock(locked_action): _run_flow() + except exception.VolumeNotFound: + with excutils.save_and_reraise_exception(): + utils.clean_volume_file_locks(source_volid, self.driver) + except exception.SnapshotNotFound: + with excutils.save_and_reraise_exception(): + utils.clean_snapshot_file_locks(snapshot_id, self.driver) finally: try: flow_engine.storage.fetch('refreshed') @@ -818,7 +857,19 @@ class VolumeManager(manager.CleanableManager, 'backend': backend}) raise exception.Invalid(msg) - @coordination.synchronized('{volume.id}-{f_name}') + def driver_delete_volume(self, volume): + self.driver.delete_volume(volume) + # Most TooZ drivers clean after themselves (like etcd3), so we don't + # worry about those locks, only about TooZ file locks that are the same + # as oslo's. + utils.clean_volume_file_locks(volume.id, self.driver) + + def driver_delete_snapshot(self, snapshot): + self.driver.delete_snapshot(snapshot) + utils.clean_snapshot_file_locks(snapshot.id, self.driver) + + @clean_volume_locks + @coordination.synchronized('{volume.id}-delete_volume') @objects.Volume.set_workers def delete_volume(self, context: context.RequestContext, @@ -926,7 +977,7 @@ class VolumeManager(manager.CleanableManager, # If this is a destination volume, we have to clear the database # record to avoid user confusion. self._clear_db(is_migrating_dest, volume, 'available') - return + return True # Let caller know we skipped deletion except Exception: with excutils.save_and_reraise_exception(): # If this is a destination volume, we have to clear the @@ -1014,7 +1065,7 @@ class VolumeManager(manager.CleanableManager, temp_vol = self.driver._create_temp_volume_from_snapshot( ctxt, volume, snapshot, volume_options=v_options) self._copy_volume_data(ctxt, temp_vol, volume) - self.driver.delete_volume(temp_vol) + self.driver_delete_volume(temp_vol) temp_vol.destroy() except Exception: with excutils.save_and_reraise_exception(): @@ -1025,7 +1076,7 @@ class VolumeManager(manager.CleanableManager, {'snapshot': snapshot.id, 'volume': volume.id}) if temp_vol and temp_vol.status == 'available': - self.driver.delete_volume(temp_vol) + self.driver_delete_volume(temp_vol) temp_vol.destroy() def _revert_to_snapshot(self, context, volume, snapshot) -> None: @@ -1220,7 +1271,8 @@ class VolumeManager(manager.CleanableManager, resource=snapshot) return snapshot.id - @coordination.synchronized('{snapshot.id}-{f_name}') + @clean_snapshot_locks + @coordination.synchronized('{snapshot.id}-delete_snapshot') def delete_snapshot(self, context: context.RequestContext, snapshot: objects.Snapshot, @@ -1257,7 +1309,7 @@ class VolumeManager(manager.CleanableManager, resource_type=message_field.Resource.VOLUME_SNAPSHOT, resource_uuid=snapshot['id'], exception=busy_error) - return + return True # Let caller know we skipped deletion except Exception as delete_error: with excutils.save_and_reraise_exception(): snapshot.status = fields.SnapshotStatus.ERROR_DELETING @@ -3787,7 +3839,7 @@ class VolumeManager(manager.CleanableManager, volume_model_update = {'id': volume_ref.id} try: self.driver.remove_export(context, volume_ref) - self.driver.delete_volume(volume_ref) + self.driver_delete_volume(volume_ref) volume_model_update['status'] = 'deleted' except exception.VolumeIsBusy: volume_model_update['status'] = 'available' @@ -4102,7 +4154,7 @@ class VolumeManager(manager.CleanableManager, for snapshot in snapshots: snapshot_model_update = {'id': snapshot.id} try: - self.driver.delete_snapshot(snapshot) + self.driver_delete_snapshot(snapshot) snapshot_model_update['status'] = ( fields.SnapshotStatus.DELETED) except exception.SnapshotIsBusy: diff --git a/doc/source/contributor/high_availability.rst b/doc/source/contributor/high_availability.rst index 3d59ba0dabf..a75d2c085b4 100644 --- a/doc/source/contributor/high_availability.rst +++ b/doc/source/contributor/high_availability.rst @@ -607,6 +607,27 @@ Example from `cinder/backup/manager.py`: choose a generic lock name for all your locks and try to create a unique name for each locking domain. +Drivers that use node locks based on volumes should implement method +``clean_volume_file_locks`` and if they use locks based on the snapshots they +should also implement ``clean_snapshot_file_locks`` and use method +``synchronized_remove`` from ``cinder.utils``. + +Example for a driver that used ``cinder.utils.synchronized``: + +.. code-block:: python + + def my_operation(self, volume): + @utils.synchronized('my-driver-lock' + volume.id) + def method(): + pass + + method() + + @classmethod + def clean_volume_file_locks(cls, volume_id): + utils.synchronized_remove('my-driver-lock-' + volume_id) + + Global locks ~~~~~~~~~~~~ @@ -664,6 +685,19 @@ For a detailed description of the requirement for global locks in cinder please refer to the `replacing local locks with Tooz`_ and `manager local locks`_ specs. +Drivers that use global locks based on volumes should implement method +``clean_volume_file_locks`` and if they use locks based on the snapshots they +should also implement ``clean_snapshot_file_locks`` and use method +``synchronized_remove`` from ``cinder.coordination``. + +Example for the 3PAR driver: + +.. code-block:: python + + @classmethod + def clean_volume_file_locks(cls, volume_id): + coordination.synchronized_remove('3par-' + volume_id) + Cinder locking ~~~~~~~~~~~~~~ diff --git a/releasenotes/notes/clean-file-locks-on-remove-e5898012f4114d3c.yaml b/releasenotes/notes/clean-file-locks-on-remove-e5898012f4114d3c.yaml new file mode 100644 index 00000000000..732e35713aa --- /dev/null +++ b/releasenotes/notes/clean-file-locks-on-remove-e5898012f4114d3c.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + `Bug #1432387 `_: Try + to automatically clean up file locks after a resource (volume, snapshot) is + deleted. This will alleviate the issue of the locks directory always + increasing the number of files.