Merge "Add cinder-manage command to remove file locks"
This commit is contained in:
commit
d689b6b521
@ -52,7 +52,12 @@
|
||||
|
||||
import collections
|
||||
import collections.abc as collections_abc
|
||||
import errno
|
||||
import glob
|
||||
import itertools
|
||||
import logging as python_logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
@ -830,6 +835,112 @@ class ConsistencyGroupCommands(object):
|
||||
gr.save()
|
||||
|
||||
|
||||
class UtilCommands(object):
|
||||
"""Generic utils."""
|
||||
|
||||
@staticmethod
|
||||
def _get_resources_locks():
|
||||
"""Get all vol/snap/backup file lock paths."""
|
||||
backup_locks_prefix = 'cinder-cleanup_incomplete_backups_'
|
||||
oslo_dir = os.path.abspath(cfg.CONF.oslo_concurrency.lock_path)
|
||||
filenames = glob.glob(os.path.join(oslo_dir, 'cinder-*'))
|
||||
|
||||
backend_url = cfg.CONF.coordination.backend_url
|
||||
if backend_url.startswith('file://'):
|
||||
tooz_dir = os.path.abspath(backend_url[7:])
|
||||
if tooz_dir != oslo_dir:
|
||||
filenames += glob.glob(os.path.join(tooz_dir, 'cinder-*'))
|
||||
|
||||
volumes = collections.defaultdict(list)
|
||||
snapshots = collections.defaultdict(list)
|
||||
backups = collections.defaultdict(list)
|
||||
matcher = re.compile('.*?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-'
|
||||
'[0-9a-f]{4}-[0-9a-f]{12}).*?', re.IGNORECASE)
|
||||
for filename in filenames:
|
||||
basename = os.path.basename(filename)
|
||||
match = matcher.match(basename)
|
||||
if match:
|
||||
dest = snapshots if 'snapshot' in basename else volumes
|
||||
res_id = match.group(1)
|
||||
dest[res_id].append(filename)
|
||||
elif basename.startswith(backup_locks_prefix):
|
||||
pgrp = basename[34:]
|
||||
backups[pgrp].append(filename)
|
||||
|
||||
return volumes, snapshots, backups
|
||||
|
||||
def _exclude_running_backups(self, backups):
|
||||
"""Remove backup entries from the dict for running backup services."""
|
||||
for backup_pgrp in list(backups.keys()):
|
||||
# The PGRP is the same as the PID of the parent process, so we know
|
||||
# the lock could be in use if the process is running and it's the
|
||||
# cinder-backup command (the PID could have been reused).
|
||||
cmdline_file = os.path.join('/proc', backup_pgrp, 'cmdline')
|
||||
try:
|
||||
with open(cmdline_file, 'r') as f:
|
||||
if 'cinder-backup' in f.read():
|
||||
del backups[backup_pgrp]
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
except Exception:
|
||||
# Unexpected error, leaving the lock file just in case
|
||||
del backups[backup_pgrp]
|
||||
|
||||
@args('--services-offline', dest='online',
|
||||
action='store_false', default=True,
|
||||
help='All locks can be deleted as Cinder services are not running.')
|
||||
def clean_locks(self, online):
|
||||
"""Clean file locks for vols, snaps, and backups on the current host.
|
||||
|
||||
Should be run on any host where we are running a Cinder service (API,
|
||||
Scheduler, Volume, Backup) and can be run with the Cinder services
|
||||
running or stopped.
|
||||
|
||||
If the services are running it will check existing resources in the
|
||||
Cinder database in order to know which resources are still available
|
||||
(it's not safe to remove their file locks) and will only remove the
|
||||
file locks for the resources that are no longer present. Deleting
|
||||
locks while the services are offline is faster as there's no need to
|
||||
check the database.
|
||||
|
||||
For backups, the way to know if we can remove the startup lock is by
|
||||
checking if the PGRP in the file name is currently running
|
||||
cinder-backup.
|
||||
|
||||
Default assumes that services are online, must pass
|
||||
``--services-offline`` to specify that they are offline.
|
||||
|
||||
Doesn't clean DLM locks (except when using file locks), as those don't
|
||||
leave lock leftovers.
|
||||
"""
|
||||
self.ctxt = context.get_admin_context()
|
||||
# Find volume and snapshots ids, and backups PGRP based on the existing
|
||||
# file locks
|
||||
volumes, snapshots, backups = self._get_resources_locks()
|
||||
|
||||
# If services are online we cannot delete locks for existing resources
|
||||
if online:
|
||||
# We don't want to delete file locks for existing resources
|
||||
volumes = {vol_id: files for vol_id, files in volumes.items()
|
||||
if not objects.Volume.exists(vol_id)}
|
||||
snapshots = {snap_id: files for snap_id, files in snapshots.items()
|
||||
if not objects.Snapshot.exists(snap_id)}
|
||||
self._exclude_running_backups(backups)
|
||||
|
||||
# Now clean
|
||||
for filenames in itertools.chain(volumes.values(),
|
||||
snapshots.values(),
|
||||
backups.values()):
|
||||
for filename in filenames:
|
||||
try:
|
||||
os.remove(filename)
|
||||
except Exception as exc:
|
||||
if not (isinstance(exc, OSError) and
|
||||
exc.errno == errno.ENOENT):
|
||||
print('Failed to cleanup lock %(name)s: %(exc)s',
|
||||
{'name': filename, 'exc': exc})
|
||||
|
||||
|
||||
CATEGORIES = {
|
||||
'backup': BackupCommands,
|
||||
'config': ConfigCommands,
|
||||
@ -841,6 +952,7 @@ CATEGORIES = {
|
||||
'service': ServiceCommands,
|
||||
'version': VersionCommands,
|
||||
'volume': VolumeCommands,
|
||||
'util': UtilCommands,
|
||||
}
|
||||
|
||||
|
||||
|
@ -10,7 +10,9 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import collections
|
||||
import datetime
|
||||
import errno
|
||||
import io
|
||||
import re
|
||||
import sys
|
||||
@ -1242,6 +1244,151 @@ class TestCinderManageCmd(test.TestCase):
|
||||
service_commands = cinder_manage.ServiceCommands()
|
||||
self.assertIsNone(service_commands.remove('abinary', 'ahost'))
|
||||
|
||||
@mock.patch('glob.glob')
|
||||
def test_util__get_resources_locks(self, mock_glob):
|
||||
cinder_manage.cfg.CONF.set_override('lock_path', '/locks',
|
||||
group='oslo_concurrency')
|
||||
cinder_manage.cfg.CONF.set_override('backend_url', 'file:///dlm',
|
||||
group='coordination')
|
||||
|
||||
vol1 = fake.VOLUME_ID
|
||||
vol2 = fake.VOLUME2_ID
|
||||
snap = fake.SNAPSHOT_ID
|
||||
attach = fake.ATTACHMENT_ID
|
||||
|
||||
files = [
|
||||
'cinder-something', # Non UUID files are ignored
|
||||
f'/locks/cinder-{vol1}-delete_volume',
|
||||
f'/locks/cinder-{vol2}-delete_volume',
|
||||
f'/locks/cinder-{vol2}',
|
||||
f'/locks/cinder-{vol2}-detach_volume',
|
||||
f'/locks/cinder-{snap}-delete_snapshot',
|
||||
'/locks/cinder-cleanup_incomplete_backups_12345',
|
||||
'/locks/cinder-unrelated-backup-named-file',
|
||||
]
|
||||
dlm_locks = [
|
||||
f'/dlm/cinder-attachment_update-{vol2}-{attach}',
|
||||
]
|
||||
mock_glob.side_effect = [files, dlm_locks]
|
||||
|
||||
commands = cinder_manage.UtilCommands()
|
||||
res = commands._get_resources_locks()
|
||||
|
||||
self.assertEqual(2, mock_glob.call_count)
|
||||
mock_glob.assert_has_calls([
|
||||
mock.call('/locks/cinder-*'),
|
||||
mock.call('/dlm/cinder-*')
|
||||
])
|
||||
|
||||
expected_vols = {
|
||||
vol1: [f'/locks/cinder-{vol1}-delete_volume'],
|
||||
vol2: [f'/locks/cinder-{vol2}-delete_volume',
|
||||
f'/locks/cinder-{vol2}',
|
||||
f'/locks/cinder-{vol2}-detach_volume',
|
||||
f'/dlm/cinder-attachment_update-{vol2}-{attach}'],
|
||||
}
|
||||
expected_snaps = {
|
||||
snap: [f'/locks/cinder-{snap}-delete_snapshot']
|
||||
}
|
||||
expected_backups = {
|
||||
'12345': ['/locks/cinder-cleanup_incomplete_backups_12345']
|
||||
}
|
||||
expected = (expected_vols, expected_snaps, expected_backups)
|
||||
self.assertEqual(expected, res)
|
||||
|
||||
@mock.patch.object(cinder_manage, 'open')
|
||||
def test__exclude_running_backups(self, mock_open):
|
||||
mock_running = mock.mock_open(read_data='cinder-backup --config-file '
|
||||
'/etc/cinder/cinder.conf')
|
||||
file_running = mock_running.return_value.__enter__.return_value
|
||||
mock_other = mock.mock_open(read_data='python')
|
||||
file_other = mock_other.return_value.__enter__.return_value
|
||||
|
||||
mock_open.side_effect = (FileNotFoundError, mock_running.return_value,
|
||||
mock_other.return_value,
|
||||
ValueError)
|
||||
|
||||
backups = {'12341': '/locks/cinder-cleanup_incomplete_backups_12341',
|
||||
'12342': '/locks/cinder-cleanup_incomplete_backups_12342',
|
||||
'12343': '/locks/cinder-cleanup_incomplete_backups_12343',
|
||||
'12344': '/locks/cinder-cleanup_incomplete_backups_12344'}
|
||||
|
||||
expected = {'12341': '/locks/cinder-cleanup_incomplete_backups_12341',
|
||||
'12343': '/locks/cinder-cleanup_incomplete_backups_12343'}
|
||||
|
||||
commands = cinder_manage.UtilCommands()
|
||||
res = commands._exclude_running_backups(backups)
|
||||
|
||||
self.assertIsNone(res)
|
||||
self.assertEqual(expected, backups)
|
||||
|
||||
self.assertEqual(4, mock_open.call_count)
|
||||
mock_open.assert_has_calls([mock.call('/proc/12341/cmdline', 'r'),
|
||||
mock.call('/proc/12342/cmdline', 'r'),
|
||||
mock.call('/proc/12343/cmdline', 'r'),
|
||||
mock.call('/proc/12344/cmdline', 'r')])
|
||||
file_running.read.assert_called_once_with()
|
||||
file_other.read.assert_called_once_with()
|
||||
|
||||
@ddt.data(True, False)
|
||||
@mock.patch.object(cinder_manage, 'print')
|
||||
@mock.patch.object(cinder_manage.os, 'remove')
|
||||
@mock.patch.object(cinder_manage.UtilCommands, '_exclude_running_backups')
|
||||
@mock.patch('cinder.objects.Snapshot.exists')
|
||||
@mock.patch('cinder.objects.Volume.exists')
|
||||
@mock.patch.object(cinder_manage.UtilCommands, '_get_resources_locks')
|
||||
@mock.patch.object(cinder_manage.context, 'get_admin_context')
|
||||
def test_clean_locks(self, online, mock_ctxt, mock_get_locks,
|
||||
mock_vol_exists, mock_snap_exists, mock_exclude_backs,
|
||||
mock_remove, mock_print):
|
||||
vol1_files = [f'/locks/cinder-{fake.VOLUME_ID}-delete_volume']
|
||||
vol2_files = [f'/locks/cinder-{fake.VOLUME2_ID}-delete_volume',
|
||||
f'/locks/cinder-{fake.VOLUME2_ID}',
|
||||
f'/locks/cinder-{fake.VOLUME2_ID}-detach_volume',
|
||||
f'/dlm/cinder-attachment_update-{fake.VOLUME2_ID}-'
|
||||
f'{fake.ATTACHMENT_ID}']
|
||||
vols = collections.OrderedDict(((fake.VOLUME_ID, vol1_files),
|
||||
(fake.VOLUME2_ID, vol2_files)))
|
||||
snap_files = [f'/locks/cinder-{fake.SNAPSHOT_ID}-delete_snapshot']
|
||||
snaps = {fake.SNAPSHOT_ID: snap_files}
|
||||
back_files = ['/locks/cinder-cleanup_incomplete_backups_12345']
|
||||
backs = {'12345': back_files}
|
||||
mock_get_locks.return_value = (vols, snaps, backs)
|
||||
mock_vol_exists.side_effect = (True, False)
|
||||
mock_snap_exists.return_value = False
|
||||
mock_remove.side_effect = [None, errno.ENOENT, None, None,
|
||||
errno.ENOENT, ValueError, None]
|
||||
|
||||
commands = cinder_manage.UtilCommands()
|
||||
commands.clean_locks(online=online)
|
||||
|
||||
mock_ctxt.assert_called_once_with()
|
||||
mock_get_locks.assert_called_once_with()
|
||||
expected_calls = ([mock.call(v) for v in vol1_files] +
|
||||
[mock.call(v) for v in vol2_files] +
|
||||
[mock.call(s) for s in snap_files] +
|
||||
[mock.call(b) for b in back_files])
|
||||
if online:
|
||||
self.assertEqual(2, mock_vol_exists.call_count)
|
||||
mock_vol_exists.assert_has_calls((mock.call(fake.VOLUME_ID),
|
||||
mock.call(fake.VOLUME2_ID)))
|
||||
mock_snap_exists.assert_called_once_with(fake.SNAPSHOT_ID)
|
||||
mock_exclude_backs.assert_called_once_with(backs)
|
||||
# If services are online we'll check resources that still exist
|
||||
# and then we won't delete those that do. In this case the files
|
||||
# for the first volume.
|
||||
del expected_calls[0]
|
||||
else:
|
||||
mock_vol_exists.assert_not_called()
|
||||
mock_snap_exists.assert_not_called()
|
||||
mock_exclude_backs.assert_not_called()
|
||||
|
||||
self.assertEqual(len(expected_calls), mock_remove.call_count)
|
||||
mock_remove.assert_has_calls(expected_calls)
|
||||
|
||||
# Only the ValueError exception should be logged
|
||||
self.assertEqual(1, mock_print.call_count)
|
||||
|
||||
|
||||
@test.testtools.skipIf(sys.platform == 'darwin', 'Not supported on macOS')
|
||||
class TestCinderRtstoolCmd(test.TestCase):
|
||||
|
@ -243,6 +243,48 @@ Displays the current configuration parameters (options) for Cinder. The
|
||||
optional flag parameter may be used to display the configuration of one
|
||||
parameter.
|
||||
|
||||
Cinder Util
|
||||
~~~~~~~~~~~
|
||||
|
||||
``cinder-manage util clean_locks [-h] [--services-offline]``
|
||||
|
||||
Clean file locks on the current host that were created and are used by drivers
|
||||
and cinder services for volumes, snapshots, and the backup service on the
|
||||
current host.
|
||||
|
||||
Should be run on any host where we are running a Cinder service (API,
|
||||
Scheduler, Volume, Backup) and can be run with the Cinder services running or
|
||||
stopped.
|
||||
|
||||
If the services are running it will check existing resources in the Cinder
|
||||
database in order to only remove resources that are no longer present (it's
|
||||
safe to delete the files).
|
||||
|
||||
For backups, the way to know if we can remove the startup lock is by checking
|
||||
if the PGRP in the file name is currently running cinder-backup.
|
||||
|
||||
Deleting locks while the services are offline is faster as there's no need to
|
||||
check the database or the running processes.
|
||||
|
||||
Default assumes that services are online, must pass ``--services-offline`` to
|
||||
specify that they are offline.
|
||||
|
||||
The common use case for running the command with ``--services-offline`` is to
|
||||
be called on startup as a service unit before any cinder service is started.
|
||||
Command will be usually called without the ``--services-offline`` parameter
|
||||
manually or from a cron job.
|
||||
|
||||
.. warning::
|
||||
|
||||
Passing ``--services-offline`` when the Cinder services are still running
|
||||
breaks the locking mechanism and can lead to undesired behavior in ongoing
|
||||
Cinder operations.
|
||||
|
||||
.. note::
|
||||
|
||||
This command doesn't clean DLM locks (except when using file locks), as
|
||||
those don't leave lock leftovers.
|
||||
|
||||
FILES
|
||||
=====
|
||||
|
||||
|
@ -0,0 +1,10 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
`Bug #1432387 <https://bugs.launchpad.net/cinder/+bug/1432387>`_: Add a
|
||||
command to cinder-manage to clean up file locks existing in hosts where
|
||||
there is a Cinder service running (API, Scheduler, Volume, Backup).
|
||||
Command works with the Cinder services running, useful to be called as a
|
||||
cron job, as well as stopped, to be called on host startup. Command
|
||||
invocation ``cinder-manage util clean_locks`` with optional parameter
|
||||
``--services-offline``.
|
Loading…
Reference in New Issue
Block a user