Remove file locks once we delete a resource

File locks are never removed from the system, so they keep increasing in
the locks directory, which can become problematic.

In this patch we start trying to delete these lock files when we delete
a volume or a snapshot.

This affects the 2 type of file locks we currently have:
- Using oslo lockutils synchronized with external=True
- Using coordination.synchronized when deployed in Active-Passive and no
  DLM

This will alleviate the ever increasing files in the locks directory.
Deployment tools should implement a service that runs when the host is
booting and cleans of the locks directory before the OpenStack services
are started.

Partial-Bug: #1432387
Change-Id: Ic73ee64257aeb024383c6cb79f2e8c04810aaf69
This commit is contained in:
Gorka Eguileor 2019-10-18 15:51:13 +02:00 committed by Eric Harney
parent b06a300eac
commit d2f6ec5569
15 changed files with 471 additions and 38 deletions

View File

@ -15,7 +15,12 @@
"""Coordination and locking utilities."""
import errno
import glob
import inspect
import os
import re
import sys
import uuid
import decorator
@ -55,16 +60,28 @@ class Coordinator(object):
self.agent_id = agent_id or str(uuid.uuid4())
self.started = False
self.prefix = prefix
self._file_path = None
def _get_file_path(self, backend_url):
if backend_url.startswith('file://'):
path = backend_url[7:]
# Copied from TooZ's _normalize_path to get the same path they use
if sys.platform == 'win32':
path = re.sub(r'\\(?=\w:\\)', '', os.path.normpath(path))
return os.path.abspath(os.path.join(path, self.prefix))
return None
def start(self):
if self.started:
return
backend_url = cfg.CONF.coordination.backend_url
# NOTE(bluex): Tooz expects member_id as a byte string.
member_id = (self.prefix + self.agent_id).encode('ascii')
self.coordinator = coordination.get_coordinator(
cfg.CONF.coordination.backend_url, member_id)
self.coordinator = coordination.get_coordinator(backend_url, member_id)
self.coordinator.start(start_heart=True)
self._file_path = self._get_file_path(backend_url)
self.started = True
def stop(self):
@ -87,10 +104,28 @@ class Coordinator(object):
else:
raise exception.LockCreationFailed(_('Coordinator uninitialized.'))
def remove_lock(self, glob_name):
# Most locks clean up on release, but not the file lock, so we manually
# clean them.
if self._file_path:
files = glob.glob(self._file_path + glob_name)
for file_name in files:
try:
os.remove(file_name)
except Exception as exc:
if not (isinstance(exc, OSError) and
exc.errno == errno.ENOENT):
LOG.warning('Failed to cleanup lock %(name)s: %(exc)s',
{'name': file_name, 'exc': exc})
COORDINATOR = Coordinator(prefix='cinder-')
def synchronized_remove(glob_name, coordinator=COORDINATOR):
coordinator.remove_lock(glob_name)
def synchronized(lock_name, blocking=True, coordinator=COORDINATOR):
"""Synchronization decorator.

View File

@ -216,7 +216,7 @@ class AttachmentsAPITestCase(test.TestCase):
def _create_attachment(self, ctxt=None, volume_uuid=None,
instance_uuid=None, mountpoint=None,
attach_time=None, detach_time=None,
attach_status=None, attach_mode=None, host=None):
attach_status=None, attach_mode=None, host=''):
"""Create an attachment object."""
ctxt = ctxt or self.ctxt
attachment = objects.VolumeAttachment(ctxt)

View File

@ -13,6 +13,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import errno
import inspect
from unittest import mock
@ -43,14 +44,18 @@ class MockToozLock(tooz.locking.Lock):
self.active_locks.remove(self.name)
@mock.patch('tooz.coordination.get_coordinator')
class CoordinatorTestCase(test.TestCase):
MOCK_TOOZ = False
def test_coordinator_start(self, get_coordinator):
@mock.patch('cinder.coordination.cfg.CONF.coordination.backend_url')
@mock.patch('cinder.coordination.Coordinator._get_file_path')
@mock.patch('tooz.coordination.get_coordinator')
def test_coordinator_start(self, get_coordinator, mock_get_file_path,
mock_backend_url):
crd = get_coordinator.return_value
agent = coordination.Coordinator()
self.assertIsNone(agent._file_path)
agent.start()
self.assertTrue(get_coordinator.called)
self.assertTrue(crd.start.called)
@ -58,6 +63,10 @@ class CoordinatorTestCase(test.TestCase):
agent.start()
crd.start.assert_called_once_with(start_heart=True)
mock_get_file_path.assert_called_once_with(mock_backend_url)
self.assertEqual(mock_get_file_path.return_value, agent._file_path)
@mock.patch('tooz.coordination.get_coordinator')
def test_coordinator_stop(self, get_coordinator):
crd = get_coordinator.return_value
@ -71,6 +80,7 @@ class CoordinatorTestCase(test.TestCase):
agent.stop()
crd.stop.assert_called_once_with()
@mock.patch('tooz.coordination.get_coordinator')
def test_coordinator_lock(self, get_coordinator):
crd = get_coordinator.return_value
crd.get_lock.side_effect = lambda n: MockToozLock(n)
@ -90,6 +100,7 @@ class CoordinatorTestCase(test.TestCase):
self.assertRaises(Locked, agent2.get_lock(lock_name).acquire)
self.assertNotIn(expected_name, MockToozLock.active_locks)
@mock.patch('tooz.coordination.get_coordinator')
def test_coordinator_offline(self, get_coordinator):
crd = get_coordinator.return_value
crd.start.side_effect = tooz.coordination.ToozConnectionError('err')
@ -98,9 +109,82 @@ class CoordinatorTestCase(test.TestCase):
self.assertRaises(tooz.coordination.ToozError, agent.start)
self.assertFalse(agent.started)
def test_get_file_path(self):
backend_url = 'file:///opt/stack/data/cinder'
res = coordination.COORDINATOR._get_file_path(backend_url)
self.assertEqual('/opt/stack/data/cinder/cinder-', res)
def test_get_file_path_non_file(self):
backend_url = 'etcd3+http://192.168.1.95:2379'
res = coordination.COORDINATOR._get_file_path(backend_url)
self.assertIsNone(res)
@mock.patch('cinder.coordination.COORDINATOR._file_path', None)
@mock.patch('glob.glob')
@mock.patch('os.remove')
def test_remove_lock_non_file_lock(self, mock_remove, mock_glob):
coordination.COORDINATOR.remove_lock('lock-file')
mock_glob.assert_not_called()
mock_remove.assert_not_called()
@mock.patch('cinder.coordination.COORDINATOR._file_path', '/data/cinder-')
@mock.patch('glob.glob')
@mock.patch('os.remove')
def test_remove_lock(self, mock_remove, mock_glob):
mock_glob.return_value = ['/data/cinder-attachment_update-UUID-1',
'/data/cinder-attachment_update-UUID-2']
coordination.COORDINATOR.remove_lock('attachment_update-UUID-*')
mock_glob.assert_called_once_with(
'/data/cinder-attachment_update-UUID-*')
self.assertEqual(2, mock_remove.call_count)
mock_remove.has_calls(
[mock.call('/data/cinder-attachment_update-UUID-1'),
mock.call('/data/cinder-attachment_update-UUID-2')])
@mock.patch('cinder.coordination.COORDINATOR._file_path', '/data/cinder-')
@mock.patch('cinder.coordination.LOG.warning')
@mock.patch('glob.glob')
@mock.patch('os.remove')
def test_remove_lock_missing_file(self, mock_remove, mock_glob, mock_log):
mock_glob.return_value = ['/data/cinder-attachment_update-UUID-1',
'/data/cinder-attachment_update-UUID-2']
mock_remove.side_effect = [OSError(errno.ENOENT, ''), None]
coordination.COORDINATOR.remove_lock('attachment_update-UUID-*')
mock_glob.assert_called_once_with(
'/data/cinder-attachment_update-UUID-*')
self.assertEqual(2, mock_remove.call_count)
mock_remove.has_calls(
[mock.call('/data/cinder-attachment_update-UUID-1'),
mock.call('/data/cinder-attachment_update-UUID-2')])
mock_log.assert_not_called()
@mock.patch('cinder.coordination.COORDINATOR._file_path', '/data/cinder-')
@mock.patch('cinder.coordination.LOG.warning')
@mock.patch('glob.glob')
@mock.patch('os.remove')
def test_remove_lock_unknown_failure(self, mock_remove, mock_glob,
mock_log):
mock_glob.return_value = ['/data/cinder-attachment_update-UUID-1',
'/data/cinder-attachment_update-UUID-2']
mock_remove.side_effect = [ValueError(), None]
coordination.COORDINATOR.remove_lock('attachment_update-UUID-*')
mock_glob.assert_called_once_with(
'/data/cinder-attachment_update-UUID-*')
self.assertEqual(2, mock_remove.call_count)
mock_remove.has_calls(
[mock.call('/data/cinder-attachment_update-UUID-1'),
mock.call('/data/cinder-attachment_update-UUID-2')])
self.assertEqual(1, mock_log.call_count)
@mock.patch.object(coordination.COORDINATOR, 'get_lock')
class CoordinationTestCase(test.TestCase):
@mock.patch.object(coordination.COORDINATOR, 'get_lock')
def test_synchronized(self, get_lock):
@coordination.synchronized('lock-{f_name}-{foo.val}-{bar[val]}')
def func(foo, bar):
@ -113,3 +197,15 @@ class CoordinationTestCase(test.TestCase):
func(foo, bar)
get_lock.assert_called_with('lock-func-7-8')
self.assertEqual(['foo', 'bar'], inspect.getfullargspec(func)[0])
@mock.patch('cinder.coordination.COORDINATOR.remove_lock')
def test_synchronized_remove(self, mock_remove):
coordination.synchronized_remove(mock.sentinel.glob_name)
mock_remove.assert_called_once_with(mock.sentinel.glob_name)
@mock.patch('cinder.coordination.COORDINATOR.remove_lock')
def test_synchronized_remove_custom_coordinator(self, mock_remove):
coordinator = mock.Mock()
coordination.synchronized_remove(mock.sentinel.glob_name, coordinator)
coordinator.remove_lock.assert_called_once_with(
mock.sentinel.glob_name)

View File

@ -1443,3 +1443,65 @@ class TestKeystoneProjectGet(test.TestCase):
project = api_utils.get_project(
self.context, self.context.project_id)
self.assertEqual(expected_project.__dict__, project.__dict__)
class TestCleanFileLocks(test.TestCase):
@mock.patch('cinder.utils.LOG.warning')
@mock.patch('cinder.utils.synchronized_remove')
def test_clean_volume_file_locks(self, mock_remove, mock_log):
driver = mock.Mock()
utils.clean_volume_file_locks('UUID', driver)
self.assertEqual(3, mock_remove.call_count)
mock_remove.assert_has_calls([mock.call('UUID-delete_volume'),
mock.call('UUID'),
mock.call('UUID-detach_volume')])
driver.clean_volume_file_locks.assert_called_once_with('UUID')
mock_log.assert_not_called()
@mock.patch('cinder.utils.LOG.warning')
@mock.patch('cinder.utils.synchronized_remove')
def test_clean_volume_file_locks_errors(self, mock_remove, mock_log):
driver = mock.Mock()
driver.clean_volume_file_locks.side_effect = Exception
mock_remove.side_effect = [None, Exception, None]
utils.clean_volume_file_locks('UUID', driver)
self.assertEqual(3, mock_remove.call_count)
mock_remove.assert_has_calls([mock.call('UUID-delete_volume'),
mock.call('UUID'),
mock.call('UUID-detach_volume')])
driver.clean_volume_file_locks.assert_called_once_with('UUID')
self.assertEqual(2, mock_log.call_count)
@mock.patch('cinder.utils.LOG.warning')
@mock.patch('cinder.utils.synchronized_remove')
def test_clean_snapshot_file_locks(self, mock_remove, mock_log):
driver = mock.Mock()
utils.clean_snapshot_file_locks('UUID', driver)
mock_remove.assert_called_once_with('UUID-delete_snapshot')
driver.clean_snapshot_file_locks.assert_called_once_with('UUID')
mock_log.assert_not_called()
@mock.patch('cinder.utils.LOG.warning')
@mock.patch('cinder.utils.synchronized_remove')
def test_clean_snapshot_file_locks_failures(self, mock_remove, mock_log):
driver = mock.Mock()
driver.clean_snapshot_file_locks.side_effect = Exception
mock_remove.side_effect = Exception
utils.clean_snapshot_file_locks('UUID', driver)
mock_remove.assert_called_once_with('UUID-delete_snapshot')
driver.clean_snapshot_file_locks.assert_called_once_with('UUID')
self.assertEqual(2, mock_log.call_count)
@mock.patch('cinder.coordination.synchronized_remove')
def test_api_clean_volume_file_locks(self, mock_remove):
utils.api_clean_volume_file_locks('UUID')
mock_remove.assert_called_once_with('attachment_update-UUID-*')

View File

@ -81,6 +81,10 @@ class BaseVolumeTestCase(test.TestCase):
self.called = []
self.volume_api = volume_api.API()
# Don't accidentaly make a call to delete a file from the system
self.mock_lock_remove = self.patch('cinder.utils.synchronized_remove')
self.mock_dlm_lock_remove = self.patch('cinder.coordination.os.remove')
def _cleanup(self):
try:
shutil.rmtree(CONF.volumes_dir)

View File

@ -76,13 +76,15 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
self.vol_type = db.volume_type_get_by_name(self.context,
'vol_type_name')
def test_delete_snapshot_frozen(self):
@mock.patch('cinder.utils.clean_snapshot_file_locks')
def test_delete_snapshot_frozen(self, mock_clean):
service = tests_utils.create_service(self.context, {'frozen': True})
volume = tests_utils.create_volume(self.context, host=service.host)
snapshot = tests_utils.create_snapshot(self.context, volume.id)
self.assertRaises(exception.InvalidInput,
self.volume_api.delete_snapshot, self.context,
snapshot)
mock_clean.assert_not_called()
@ddt.data('create_snapshot', 'create_snapshot_force')
def test_create_snapshot_frozen(self, method):
@ -114,8 +116,9 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
self.volume.delete_snapshot(self.context, snapshot_obj)
self.volume.delete_volume(self.context, volume_src)
@mock.patch('cinder.utils.clean_snapshot_file_locks')
@mock.patch('cinder.tests.unit.fake_notifier.FakeNotifier._notify')
def test_create_delete_snapshot(self, mock_notify):
def test_create_delete_snapshot(self, mock_notify, mock_clean):
"""Test snapshot can be created and deleted."""
volume = tests_utils.create_volume(
self.context,
@ -146,6 +149,7 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
any_order=True)
self.volume.delete_snapshot(self.context, snapshot)
mock_clean.assert_called_once_with(snapshot.id, self.volume.driver)
self.assert_notify_called(mock_notify,
(['INFO', 'volume.create.start'],
['INFO', 'volume.create.end'],
@ -514,7 +518,8 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
snapshot.destroy()
db.volume_destroy(self.context, volume_id)
def test_delete_busy_snapshot(self):
@mock.patch('cinder.utils.clean_snapshot_file_locks')
def test_delete_busy_snapshot(self, mock_clean):
"""Test snapshot can be created and deleted."""
self.volume.driver.vg = fake_lvm.FakeBrickLVM('cinder-volumes',
@ -540,9 +545,11 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
self.assertEqual(fields.SnapshotStatus.AVAILABLE,
snapshot_ref.status)
mock_del_snap.assert_called_once_with(snapshot)
mock_clean.assert_not_called()
@mock.patch('cinder.utils.clean_snapshot_file_locks')
@test.testtools.skipIf(sys.platform == "darwin", "SKIP on OSX")
def test_delete_no_dev_fails(self):
def test_delete_no_dev_fails(self, mock_clean):
"""Test delete snapshot with no dev file fails."""
self.mock_object(os.path, 'exists', lambda x: False)
self.volume.driver.vg = fake_lvm.FakeBrickLVM('cinder-volumes',
@ -567,6 +574,7 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
self.assertEqual(fields.SnapshotStatus.AVAILABLE,
snapshot_ref.status)
mock_del_snap.assert_called_once_with(snapshot)
mock_clean.assert_not_called()
def test_force_delete_snapshot(self):
"""Test snapshot can be forced to delete."""
@ -618,7 +626,8 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
self.context,
snap)
def test_delete_snapshot_driver_not_initialized(self):
@mock.patch('cinder.utils.clean_snapshot_file_locks')
def test_delete_snapshot_driver_not_initialized(self, mock_clean):
volume = tests_utils.create_volume(self.context, **self.volume_params)
snapshot = tests_utils.create_snapshot(self.context, volume.id)
@ -630,6 +639,7 @@ class SnapshotTestCase(base.BaseVolumeTestCase):
snapshot.refresh()
self.assertEqual(fields.SnapshotStatus.ERROR_DELETING,
snapshot.status)
mock_clean.assert_not_called()
@ddt.data({'all_tenants': '1', 'name': 'snap1'},
{'all_tenants': 'true', 'name': 'snap1'},

View File

@ -346,11 +346,13 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.assertEqual("error_deleting", volume.status)
volume.destroy()
@mock.patch('cinder.utils.clean_volume_file_locks')
@mock.patch('cinder.tests.unit.fake_notifier.FakeNotifier._notify')
@mock.patch('cinder.quota.QUOTAS.rollback', new=mock.Mock())
@mock.patch('cinder.quota.QUOTAS.commit', new=mock.Mock())
@mock.patch('cinder.quota.QUOTAS.reserve', return_value=['RESERVATION'])
def test_create_delete_volume(self, _mock_reserve, mock_notify):
def test_create_delete_volume(self, _mock_reserve, mock_notify,
mock_clean):
"""Test volume can be created and deleted."""
volume = tests_utils.create_volume(
self.context,
@ -387,6 +389,7 @@ class VolumeTestCase(base.BaseVolumeTestCase):
db.volume_get,
self.context,
volume_id)
mock_clean.assert_called_once_with(volume_id, self.volume.driver)
def test_create_delete_volume_with_metadata(self):
"""Test volume can be created with metadata and deleted."""
@ -403,11 +406,13 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.context,
volume_id)
def test_delete_volume_frozen(self):
@mock.patch('cinder.utils.clean_volume_file_locks')
def test_delete_volume_frozen(self, mock_clean):
service = tests_utils.create_service(self.context, {'frozen': True})
volume = tests_utils.create_volume(self.context, host=service.host)
self.assertRaises(exception.InvalidInput,
self.volume_api.delete, self.context, volume)
mock_clean.assert_not_called()
def test_delete_volume_another_cluster_fails(self):
"""Test delete of volume from another cluster fails."""
@ -564,7 +569,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
'fake_key1',
FAKE_METADATA_TYPE.fake_type)
def test_delete_volume_metadata_maintenance(self):
@mock.patch('cinder.utils.clean_volume_file_locks')
def test_delete_volume_metadata_maintenance(self, mock_clean):
"""Test delete volume metadata in maintenance."""
FAKE_METADATA_TYPE = enum.Enum('METADATA_TYPES', 'fake_type')
test_meta1 = {'fake_key1': 'fake_value1', 'fake_key2': 'fake_value2'}
@ -577,6 +583,7 @@ class VolumeTestCase(base.BaseVolumeTestCase):
volume,
'fake_key1',
FAKE_METADATA_TYPE.fake_type)
mock_clean.assert_not_called()
def test_accept_transfer_maintenance(self):
"""Test accept transfer in maintenance."""
@ -974,7 +981,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.assertEqual("deleting", volume.status)
volume.destroy()
def test_delete_busy_volume(self):
@mock.patch('cinder.utils.clean_volume_file_locks')
def test_delete_busy_volume(self, mock_clean):
"""Test volume survives deletion if driver reports it as busy."""
volume = tests_utils.create_volume(self.context, **self.volume_params)
volume_id = volume['id']
@ -989,8 +997,10 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.assertEqual(volume_id, volume_ref.id)
self.assertEqual("available", volume_ref.status)
mock_del_vol.assert_called_once_with(volume)
mock_clean.assert_not_called()
def test_unmanage_encrypted_volume_fails(self):
@mock.patch('cinder.utils.clean_volume_file_locks')
def test_unmanage_encrypted_volume_fails(self, mock_clean):
volume = tests_utils.create_volume(
self.context,
encryption_key_id=fake.ENCRYPTION_KEY_ID,
@ -1002,6 +1012,7 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.context,
volume,
unmanage_only=True)
mock_clean.assert_not_called()
self.volume.delete_volume(self.context, volume)
def test_unmanage_cascade_delete_fails(self):
@ -1082,7 +1093,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
volume_api.get_all(self.context, filters={'all_tenants': '1'})
self.assertTrue(get_all.called)
def test_delete_volume_in_error_extending(self):
@mock.patch('cinder.utils.clean_volume_file_locks')
def test_delete_volume_in_error_extending(self, mock_clean):
"""Test volume can be deleted in error_extending stats."""
# create a volume
volume = tests_utils.create_volume(self.context, **self.volume_params)
@ -1094,16 +1106,19 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.volume.delete_volume(self.context, volume)
self.assertRaises(exception.NotFound, db.volume_get,
self.context, volume['id'])
mock_clean.assert_called_once_with(volume.id, self.volume.driver)
@mock.patch('cinder.utils.clean_volume_file_locks')
@mock.patch.object(db.sqlalchemy.api, 'volume_get',
side_effect=exception.VolumeNotFound(
volume_id='12345678-1234-5678-1234-567812345678'))
def test_delete_volume_not_found(self, mock_get_volume):
def test_delete_volume_not_found(self, mock_get_volume, mock_clean):
"""Test delete volume moves on if the volume does not exist."""
volume_id = '12345678-1234-5678-1234-567812345678'
volume = objects.Volume(self.context, status='available', id=volume_id)
self.volume.delete_volume(self.context, volume)
self.assertTrue(mock_get_volume.called)
mock_clean.assert_called_once_with(volume_id, self.volume.driver)
@mock.patch('cinder.volume.drivers.lvm.LVMVolumeDriver.'
'create_volume_from_snapshot')
@ -2119,7 +2134,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
"""Test volume can't be deleted in maintenance status."""
self._test_cannot_delete_volume('maintenance')
def _test_cannot_delete_volume(self, status):
@mock.patch('cinder.utils.clean_volume_file_locks')
def _test_cannot_delete_volume(self, status, mock_clean):
"""Test volume can't be deleted in invalid stats."""
# create a volume and assign to host
volume = tests_utils.create_volume(self.context, CONF.host,
@ -2130,6 +2146,7 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.volume_api.delete,
self.context,
volume)
mock_clean.assert_not_called()
# clean up
self.volume.delete_volume(self.context, volume)
@ -2172,7 +2189,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
db.volume_destroy(self.context, volume.id)
def test__revert_to_snapshot_generic_failed(self):
@mock.patch('cinder.utils.clean_volume_file_locks')
def test__revert_to_snapshot_generic_failed(self, mock_clean):
fake_volume = tests_utils.create_volume(self.context,
status='available')
fake_snapshot = tests_utils.create_snapshot(self.context,
@ -2197,8 +2215,11 @@ class VolumeTestCase(base.BaseVolumeTestCase):
mock_copy.assert_called_once_with(
self.context, temp_volume, fake_volume)
mock_driver_delete.assert_called_once_with(temp_volume)
mock_clean.assert_called_once_with(temp_volume.id,
self.volume.driver)
def test__revert_to_snapshot_generic(self):
@mock.patch('cinder.utils.clean_volume_file_locks')
def test__revert_to_snapshot_generic(self, mock_clean):
fake_volume = tests_utils.create_volume(self.context,
status='available')
fake_snapshot = tests_utils.create_snapshot(self.context,
@ -2218,6 +2239,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
mock_copy.assert_called_once_with(
self.context, temp_volume, fake_volume)
mock_driver_delete.assert_called_once_with(temp_volume)
mock_clean.assert_called_once_with(temp_volume.id,
self.volume.driver)
@ddt.data({'driver_error': True},
{'driver_error': False})
@ -3133,14 +3156,16 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.assertRaises(exception.VolumeNotFound,
self.volume.create_volume,
self.context, test_vol,
{'volume_properties': self.volume_params},
{'volume_properties': self.volume_params,
'source_volid': fake.VOLUME_ID},
{'retry': {'num_attempts': 1, 'host': []}})
volume = db.volume_get(context.get_admin_context(), test_vol_id)
self.assertEqual('error', volume['status'])
self.assertEqual({'_pool0': {'allocated_capacity_gb': 1}},
self.volume.stats['pools'])
def test_cascade_delete_volume_with_snapshots(self):
@mock.patch('cinder.utils.api_clean_volume_file_locks')
def test_cascade_delete_volume_with_snapshots(self, mock_api_clean):
"""Test volume deletion with dependent snapshots."""
volume = tests_utils.create_volume(self.context, **self.volume_params)
self.volume.create_volume(self.context, volume)
@ -3158,8 +3183,10 @@ class VolumeTestCase(base.BaseVolumeTestCase):
volume_api.delete(self.context,
volume,
cascade=True)
mock_api_clean.assert_called_once_with(volume.id)
def test_cascade_delete_volume_with_snapshots_error(self):
@mock.patch('cinder.utils.api_clean_volume_file_locks')
def test_cascade_delete_volume_with_snapshots_error(self, mock_api_clean):
"""Test volume deletion with dependent snapshots."""
volume = tests_utils.create_volume(self.context, **self.volume_params)
self.volume.create_volume(self.context, volume)
@ -3182,8 +3209,11 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.context,
volume,
cascade=True)
mock_api_clean.assert_not_called()
def test_cascade_force_delete_volume_with_snapshots_error(self):
@mock.patch('cinder.utils.api_clean_volume_file_locks')
def test_cascade_force_delete_volume_with_snapshots_error(self,
mock_api_clean):
"""Test volume force deletion with errored dependent snapshots."""
volume = tests_utils.create_volume(self.context,
host='fakehost')
@ -3202,6 +3232,7 @@ class VolumeTestCase(base.BaseVolumeTestCase):
volume = objects.Volume.get_by_id(self.context, volume.id)
self.assertEqual('deleting', volume.status)
mock_api_clean.assert_called_once_with(volume.id)
def test_cascade_delete_volume_with_snapshots_in_other_project(self):
"""Test volume deletion with dependent snapshots in other project."""

View File

@ -57,6 +57,7 @@ from oslo_utils import strutils
from oslo_utils import timeutils
import tenacity
from cinder import coordination
from cinder import exception
from cinder.i18n import _
@ -69,6 +70,48 @@ INFINITE_UNKNOWN_VALUES = ('infinite', 'unknown')
synchronized = lockutils.synchronized_with_prefix('cinder-')
synchronized_remove = lockutils.remove_external_lock_file_with_prefix(
'cinder-')
def clean_volume_file_locks(volume_id, driver):
"""Remove file locks used by Cinder.
This doesn't take care of driver locks, those should be handled in driver's
delete_volume method.
"""
for name in (volume_id + '-delete_volume', volume_id,
volume_id + '-detach_volume'):
try:
synchronized_remove(name)
except Exception as exc:
LOG.warning('Failed to cleanup volume lock %(name)s: %(exc)s',
{'name': name, 'exc': exc})
try:
driver.clean_volume_file_locks(volume_id)
except Exception as exc:
LOG.warning('Failed to cleanup driver locks for volume %(id)s: '
'%(exc)s', {'id': volume_id, 'exc': exc})
def api_clean_volume_file_locks(volume_id):
coordination.synchronized_remove('attachment_update-' + volume_id + '-*')
def clean_snapshot_file_locks(snapshot_id, driver):
try:
name = snapshot_id + '-delete_snapshot'
synchronized_remove(name)
except Exception as exc:
LOG.warning('Failed to cleanup snapshot lock %(name)s: %(exc)s',
{'name': name, 'exc': exc})
try:
driver.clean_snapshot_file_locks(snapshot_id)
except Exception as exc:
LOG.warning('Failed to cleanup driver locks for snapshot %(id)s: '
'%(exc)s', {'id': snapshot_id, 'exc': exc})
def as_int(obj: Union[int, float, str], quiet: bool = True) -> int:

View File

@ -425,6 +425,7 @@ class API(base.Base):
LOG.info("Delete volume request issued successfully.",
resource={'type': 'volume',
'id': volume.id})
utils.api_clean_volume_file_locks(volume.id)
return
if not unmanage_only:
@ -533,6 +534,7 @@ class API(base.Base):
volume,
unmanage_only,
cascade)
utils.api_clean_volume_file_locks(volume.id)
LOG.info("Delete volume request issued successfully.",
resource=volume)
@ -2297,6 +2299,7 @@ class API(base.Base):
volume.status = status_updates['status']
volume.attach_status = status_updates['attach_status']
volume.save()
return remaining_attachments

View File

@ -620,6 +620,9 @@ class BaseVD(object, metaclass=abc.ABCMeta):
It is imperative that this operation ensures that the data from the
deleted volume cannot leak into new volumes when they are created, as
new volumes are likely to belong to a different tenant/project.
If the driver uses custom file locks they should be cleaned on success
using cinder.utils.synchronized_remove
"""
return
@ -1953,6 +1956,50 @@ class BaseVD(object, metaclass=abc.ABCMeta):
return [CONF.backend_defaults._group._opts[cfg_name]['opt']
for cfg_name in cfg_names]
@classmethod
def clean_volume_file_locks(cls, volume_id):
"""Clean up driver specific volume locks.
This method will be called when a volume has been removed from Cinder
or when we detect that the volume doesn't exist.
There are 3 types of locks in Cinder:
- Process locks: Don't need cleanup
- Node locks: Must use cinder.utils.synchronized_remove
- Global locks: Must use cinder.coordination.synchronized_remove
When using method cinder.utils.synchronized_remove we must pass the
exact lock name, whereas method cinder.coordination.synchronized_remove
accepts a glob.
Refer to clean_volume_file_locks, api_clean_volume_file_locks, and
clean_snapshot_file_locks in cinder.utils for examples.
"""
pass
@classmethod
def clean_snapshot_file_locks(self, snapshot_id):
"""Clean up driver specific snapshot locks.
This method will be called when a snapshot has been removed from cinder
or when we detect that the snapshot doesn't exist.
There are 3 types of locks in Cinder:
- Process locks: Don't need cleanup
- Node locks: Must use cinder.utils.synchronized_remove
- Global locks: Must use cinder.coordination.synchronized_remove
When using method cinder.utils.synchronized_remove we must pass the
exact lock name, whereas method cinder.coordination.synchronized_remove
accepts a glob.
Refer to clean_volume_file_locks, api_clean_volume_file_locks, and
clean_snapshot_file_locks in cinder.utils for examples.
"""
pass
class CloneableImageVD(object, metaclass=abc.ABCMeta):
@abc.abstractmethod
@ -2217,7 +2264,11 @@ class VolumeDriver(ManageableVD, CloneableImageVD, ManageableSnapshotsVD,
raise NotImplementedError()
def delete_snapshot(self, snapshot):
"""Deletes a snapshot."""
"""Deletes a snapshot.
If the driver uses custom file locks they should be cleaned on success
using cinder.utils.synchronized_remove
"""
raise NotImplementedError()
def local_path(self, volume):

View File

@ -32,6 +32,7 @@ except ImportError:
from oslo_log import log as logging
from cinder import coordination
from cinder import exception
from cinder.i18n import _
from cinder.volume import driver
@ -668,3 +669,7 @@ class HPE3PARDriverBase(driver.ManageableVD,
default="normal")
return properties, 'HPE:3PAR'
@classmethod
def clean_volume_file_locks(cls, volume_id):
coordination.synchronized_remove('3par-' + volume_id)

View File

@ -75,7 +75,7 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
this volume elsewhere.
"""
def __init__(self, reschedule_context, db, driver, scheduler_rpcapi,
def __init__(self, reschedule_context, db, manager, scheduler_rpcapi,
do_reschedule):
requires = ['filter_properties', 'request_spec', 'volume',
'context']
@ -84,7 +84,7 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
self.do_reschedule = do_reschedule
self.scheduler_rpcapi = scheduler_rpcapi
self.db = db
self.driver = driver
self.manager = manager
self.reschedule_context = reschedule_context
# These exception types will trigger the volume to be set into error
# status rather than being rescheduled.
@ -175,7 +175,7 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
# host field will be erased. Just in case volume was already created at
# the backend, we attempt to delete it.
try:
self.driver.delete_volume(volume)
self.manager.driver_delete_volume(volume)
except Exception:
# Most likely the volume weren't created at the backend. We can
# safely ignore this.
@ -1257,7 +1257,7 @@ def get_flow(context, manager, db, driver, scheduler_rpcapi, host, volume,
# status when reverting the flow. Meanwhile, no need to revert process of
# ExtractVolumeRefTask.
do_reschedule = allow_reschedule and request_spec and retry
volume_flow.add(OnFailureRescheduleTask(reschedule_context, db, driver,
volume_flow.add(OnFailureRescheduleTask(reschedule_context, db, manager,
scheduler_rpcapi, do_reschedule))
LOG.debug("Volume reschedule parameters: %(allow)s "

View File

@ -35,6 +35,7 @@ intact.
"""
import functools
import time
import typing as ty
@ -74,6 +75,7 @@ from cinder.objects import cgsnapshot
from cinder.objects import consistencygroup
from cinder.objects import fields
from cinder import quota
from cinder import utils
from cinder import volume as cinder_volume
from cinder.volume import configuration as config
from cinder.volume.flows.manager import create_volume
@ -184,6 +186,37 @@ MAPPING = {
}
def clean_volume_locks(func):
@functools.wraps(func)
def wrapper(self, context, volume, *args, **kwargs):
try:
skip_clean = func(self, context, volume, *args, **kwargs)
except Exception:
# On quota failure volume will have been deleted from the DB
skip_clean = not volume.deleted
raise
finally:
if not skip_clean:
# Most TooZ drivers clean after themselves (like etcd3), so
# we clean TooZ file locks that are the same as oslo's.
utils.clean_volume_file_locks(volume.id, self.driver)
return wrapper
def clean_snapshot_locks(func):
@functools.wraps(func)
def wrapper(self, context, snapshot, *args, **kwargs):
try:
skip_clean = func(self, context, snapshot, *args, **kwargs)
except Exception:
skip_clean = not snapshot.deleted
raise
finally:
if not skip_clean:
utils.clean_snapshot_file_locks(snapshot.id, self.driver)
return wrapper
class VolumeManager(manager.CleanableManager,
manager.SchedulerDependentManager):
"""Manages attachable block storage devices."""
@ -772,6 +805,12 @@ class VolumeManager(manager.CleanableManager,
else:
with coordination.COORDINATOR.get_lock(locked_action):
_run_flow()
except exception.VolumeNotFound:
with excutils.save_and_reraise_exception():
utils.clean_volume_file_locks(source_volid, self.driver)
except exception.SnapshotNotFound:
with excutils.save_and_reraise_exception():
utils.clean_snapshot_file_locks(snapshot_id, self.driver)
finally:
try:
flow_engine.storage.fetch('refreshed')
@ -818,7 +857,19 @@ class VolumeManager(manager.CleanableManager,
'backend': backend})
raise exception.Invalid(msg)
@coordination.synchronized('{volume.id}-{f_name}')
def driver_delete_volume(self, volume):
self.driver.delete_volume(volume)
# Most TooZ drivers clean after themselves (like etcd3), so we don't
# worry about those locks, only about TooZ file locks that are the same
# as oslo's.
utils.clean_volume_file_locks(volume.id, self.driver)
def driver_delete_snapshot(self, snapshot):
self.driver.delete_snapshot(snapshot)
utils.clean_snapshot_file_locks(snapshot.id, self.driver)
@clean_volume_locks
@coordination.synchronized('{volume.id}-delete_volume')
@objects.Volume.set_workers
def delete_volume(self,
context: context.RequestContext,
@ -926,7 +977,7 @@ class VolumeManager(manager.CleanableManager,
# If this is a destination volume, we have to clear the database
# record to avoid user confusion.
self._clear_db(is_migrating_dest, volume, 'available')
return
return True # Let caller know we skipped deletion
except Exception:
with excutils.save_and_reraise_exception():
# If this is a destination volume, we have to clear the
@ -1014,7 +1065,7 @@ class VolumeManager(manager.CleanableManager,
temp_vol = self.driver._create_temp_volume_from_snapshot(
ctxt, volume, snapshot, volume_options=v_options)
self._copy_volume_data(ctxt, temp_vol, volume)
self.driver.delete_volume(temp_vol)
self.driver_delete_volume(temp_vol)
temp_vol.destroy()
except Exception:
with excutils.save_and_reraise_exception():
@ -1025,7 +1076,7 @@ class VolumeManager(manager.CleanableManager,
{'snapshot': snapshot.id,
'volume': volume.id})
if temp_vol and temp_vol.status == 'available':
self.driver.delete_volume(temp_vol)
self.driver_delete_volume(temp_vol)
temp_vol.destroy()
def _revert_to_snapshot(self, context, volume, snapshot) -> None:
@ -1220,7 +1271,8 @@ class VolumeManager(manager.CleanableManager,
resource=snapshot)
return snapshot.id
@coordination.synchronized('{snapshot.id}-{f_name}')
@clean_snapshot_locks
@coordination.synchronized('{snapshot.id}-delete_snapshot')
def delete_snapshot(self,
context: context.RequestContext,
snapshot: objects.Snapshot,
@ -1257,7 +1309,7 @@ class VolumeManager(manager.CleanableManager,
resource_type=message_field.Resource.VOLUME_SNAPSHOT,
resource_uuid=snapshot['id'],
exception=busy_error)
return
return True # Let caller know we skipped deletion
except Exception as delete_error:
with excutils.save_and_reraise_exception():
snapshot.status = fields.SnapshotStatus.ERROR_DELETING
@ -3787,7 +3839,7 @@ class VolumeManager(manager.CleanableManager,
volume_model_update = {'id': volume_ref.id}
try:
self.driver.remove_export(context, volume_ref)
self.driver.delete_volume(volume_ref)
self.driver_delete_volume(volume_ref)
volume_model_update['status'] = 'deleted'
except exception.VolumeIsBusy:
volume_model_update['status'] = 'available'
@ -4102,7 +4154,7 @@ class VolumeManager(manager.CleanableManager,
for snapshot in snapshots:
snapshot_model_update = {'id': snapshot.id}
try:
self.driver.delete_snapshot(snapshot)
self.driver_delete_snapshot(snapshot)
snapshot_model_update['status'] = (
fields.SnapshotStatus.DELETED)
except exception.SnapshotIsBusy:

View File

@ -607,6 +607,27 @@ Example from `cinder/backup/manager.py`:
choose a generic lock name for all your locks and try to create a unique
name for each locking domain.
Drivers that use node locks based on volumes should implement method
``clean_volume_file_locks`` and if they use locks based on the snapshots they
should also implement ``clean_snapshot_file_locks`` and use method
``synchronized_remove`` from ``cinder.utils``.
Example for a driver that used ``cinder.utils.synchronized``:
.. code-block:: python
def my_operation(self, volume):
@utils.synchronized('my-driver-lock' + volume.id)
def method():
pass
method()
@classmethod
def clean_volume_file_locks(cls, volume_id):
utils.synchronized_remove('my-driver-lock-' + volume_id)
Global locks
~~~~~~~~~~~~
@ -664,6 +685,19 @@ For a detailed description of the requirement for global locks in cinder please
refer to the `replacing local locks with Tooz`_ and `manager local locks`_
specs.
Drivers that use global locks based on volumes should implement method
``clean_volume_file_locks`` and if they use locks based on the snapshots they
should also implement ``clean_snapshot_file_locks`` and use method
``synchronized_remove`` from ``cinder.coordination``.
Example for the 3PAR driver:
.. code-block:: python
@classmethod
def clean_volume_file_locks(cls, volume_id):
coordination.synchronized_remove('3par-' + volume_id)
Cinder locking
~~~~~~~~~~~~~~

View File

@ -0,0 +1,7 @@
---
fixes:
- |
`Bug #1432387 <https://bugs.launchpad.net/cinder/+bug/1432387>`_: Try
to automatically clean up file locks after a resource (volume, snapshot) is
deleted. This will alleviate the issue of the locks directory always
increasing the number of files.