Merge "Add cinder-manage command to remove file locks"
This commit is contained in:
commit
d689b6b521
@ -52,7 +52,12 @@
|
|||||||
|
|
||||||
import collections
|
import collections
|
||||||
import collections.abc as collections_abc
|
import collections.abc as collections_abc
|
||||||
|
import errno
|
||||||
|
import glob
|
||||||
|
import itertools
|
||||||
import logging as python_logging
|
import logging as python_logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -830,6 +835,112 @@ class ConsistencyGroupCommands(object):
|
|||||||
gr.save()
|
gr.save()
|
||||||
|
|
||||||
|
|
||||||
|
class UtilCommands(object):
|
||||||
|
"""Generic utils."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_resources_locks():
|
||||||
|
"""Get all vol/snap/backup file lock paths."""
|
||||||
|
backup_locks_prefix = 'cinder-cleanup_incomplete_backups_'
|
||||||
|
oslo_dir = os.path.abspath(cfg.CONF.oslo_concurrency.lock_path)
|
||||||
|
filenames = glob.glob(os.path.join(oslo_dir, 'cinder-*'))
|
||||||
|
|
||||||
|
backend_url = cfg.CONF.coordination.backend_url
|
||||||
|
if backend_url.startswith('file://'):
|
||||||
|
tooz_dir = os.path.abspath(backend_url[7:])
|
||||||
|
if tooz_dir != oslo_dir:
|
||||||
|
filenames += glob.glob(os.path.join(tooz_dir, 'cinder-*'))
|
||||||
|
|
||||||
|
volumes = collections.defaultdict(list)
|
||||||
|
snapshots = collections.defaultdict(list)
|
||||||
|
backups = collections.defaultdict(list)
|
||||||
|
matcher = re.compile('.*?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-'
|
||||||
|
'[0-9a-f]{4}-[0-9a-f]{12}).*?', re.IGNORECASE)
|
||||||
|
for filename in filenames:
|
||||||
|
basename = os.path.basename(filename)
|
||||||
|
match = matcher.match(basename)
|
||||||
|
if match:
|
||||||
|
dest = snapshots if 'snapshot' in basename else volumes
|
||||||
|
res_id = match.group(1)
|
||||||
|
dest[res_id].append(filename)
|
||||||
|
elif basename.startswith(backup_locks_prefix):
|
||||||
|
pgrp = basename[34:]
|
||||||
|
backups[pgrp].append(filename)
|
||||||
|
|
||||||
|
return volumes, snapshots, backups
|
||||||
|
|
||||||
|
def _exclude_running_backups(self, backups):
|
||||||
|
"""Remove backup entries from the dict for running backup services."""
|
||||||
|
for backup_pgrp in list(backups.keys()):
|
||||||
|
# The PGRP is the same as the PID of the parent process, so we know
|
||||||
|
# the lock could be in use if the process is running and it's the
|
||||||
|
# cinder-backup command (the PID could have been reused).
|
||||||
|
cmdline_file = os.path.join('/proc', backup_pgrp, 'cmdline')
|
||||||
|
try:
|
||||||
|
with open(cmdline_file, 'r') as f:
|
||||||
|
if 'cinder-backup' in f.read():
|
||||||
|
del backups[backup_pgrp]
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
# Unexpected error, leaving the lock file just in case
|
||||||
|
del backups[backup_pgrp]
|
||||||
|
|
||||||
|
@args('--services-offline', dest='online',
|
||||||
|
action='store_false', default=True,
|
||||||
|
help='All locks can be deleted as Cinder services are not running.')
|
||||||
|
def clean_locks(self, online):
|
||||||
|
"""Clean file locks for vols, snaps, and backups on the current host.
|
||||||
|
|
||||||
|
Should be run on any host where we are running a Cinder service (API,
|
||||||
|
Scheduler, Volume, Backup) and can be run with the Cinder services
|
||||||
|
running or stopped.
|
||||||
|
|
||||||
|
If the services are running it will check existing resources in the
|
||||||
|
Cinder database in order to know which resources are still available
|
||||||
|
(it's not safe to remove their file locks) and will only remove the
|
||||||
|
file locks for the resources that are no longer present. Deleting
|
||||||
|
locks while the services are offline is faster as there's no need to
|
||||||
|
check the database.
|
||||||
|
|
||||||
|
For backups, the way to know if we can remove the startup lock is by
|
||||||
|
checking if the PGRP in the file name is currently running
|
||||||
|
cinder-backup.
|
||||||
|
|
||||||
|
Default assumes that services are online, must pass
|
||||||
|
``--services-offline`` to specify that they are offline.
|
||||||
|
|
||||||
|
Doesn't clean DLM locks (except when using file locks), as those don't
|
||||||
|
leave lock leftovers.
|
||||||
|
"""
|
||||||
|
self.ctxt = context.get_admin_context()
|
||||||
|
# Find volume and snapshots ids, and backups PGRP based on the existing
|
||||||
|
# file locks
|
||||||
|
volumes, snapshots, backups = self._get_resources_locks()
|
||||||
|
|
||||||
|
# If services are online we cannot delete locks for existing resources
|
||||||
|
if online:
|
||||||
|
# We don't want to delete file locks for existing resources
|
||||||
|
volumes = {vol_id: files for vol_id, files in volumes.items()
|
||||||
|
if not objects.Volume.exists(vol_id)}
|
||||||
|
snapshots = {snap_id: files for snap_id, files in snapshots.items()
|
||||||
|
if not objects.Snapshot.exists(snap_id)}
|
||||||
|
self._exclude_running_backups(backups)
|
||||||
|
|
||||||
|
# Now clean
|
||||||
|
for filenames in itertools.chain(volumes.values(),
|
||||||
|
snapshots.values(),
|
||||||
|
backups.values()):
|
||||||
|
for filename in filenames:
|
||||||
|
try:
|
||||||
|
os.remove(filename)
|
||||||
|
except Exception as exc:
|
||||||
|
if not (isinstance(exc, OSError) and
|
||||||
|
exc.errno == errno.ENOENT):
|
||||||
|
print('Failed to cleanup lock %(name)s: %(exc)s',
|
||||||
|
{'name': filename, 'exc': exc})
|
||||||
|
|
||||||
|
|
||||||
CATEGORIES = {
|
CATEGORIES = {
|
||||||
'backup': BackupCommands,
|
'backup': BackupCommands,
|
||||||
'config': ConfigCommands,
|
'config': ConfigCommands,
|
||||||
@ -841,6 +952,7 @@ CATEGORIES = {
|
|||||||
'service': ServiceCommands,
|
'service': ServiceCommands,
|
||||||
'version': VersionCommands,
|
'version': VersionCommands,
|
||||||
'volume': VolumeCommands,
|
'volume': VolumeCommands,
|
||||||
|
'util': UtilCommands,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,7 +10,9 @@
|
|||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
|
import collections
|
||||||
import datetime
|
import datetime
|
||||||
|
import errno
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
@ -1242,6 +1244,151 @@ class TestCinderManageCmd(test.TestCase):
|
|||||||
service_commands = cinder_manage.ServiceCommands()
|
service_commands = cinder_manage.ServiceCommands()
|
||||||
self.assertIsNone(service_commands.remove('abinary', 'ahost'))
|
self.assertIsNone(service_commands.remove('abinary', 'ahost'))
|
||||||
|
|
||||||
|
@mock.patch('glob.glob')
|
||||||
|
def test_util__get_resources_locks(self, mock_glob):
|
||||||
|
cinder_manage.cfg.CONF.set_override('lock_path', '/locks',
|
||||||
|
group='oslo_concurrency')
|
||||||
|
cinder_manage.cfg.CONF.set_override('backend_url', 'file:///dlm',
|
||||||
|
group='coordination')
|
||||||
|
|
||||||
|
vol1 = fake.VOLUME_ID
|
||||||
|
vol2 = fake.VOLUME2_ID
|
||||||
|
snap = fake.SNAPSHOT_ID
|
||||||
|
attach = fake.ATTACHMENT_ID
|
||||||
|
|
||||||
|
files = [
|
||||||
|
'cinder-something', # Non UUID files are ignored
|
||||||
|
f'/locks/cinder-{vol1}-delete_volume',
|
||||||
|
f'/locks/cinder-{vol2}-delete_volume',
|
||||||
|
f'/locks/cinder-{vol2}',
|
||||||
|
f'/locks/cinder-{vol2}-detach_volume',
|
||||||
|
f'/locks/cinder-{snap}-delete_snapshot',
|
||||||
|
'/locks/cinder-cleanup_incomplete_backups_12345',
|
||||||
|
'/locks/cinder-unrelated-backup-named-file',
|
||||||
|
]
|
||||||
|
dlm_locks = [
|
||||||
|
f'/dlm/cinder-attachment_update-{vol2}-{attach}',
|
||||||
|
]
|
||||||
|
mock_glob.side_effect = [files, dlm_locks]
|
||||||
|
|
||||||
|
commands = cinder_manage.UtilCommands()
|
||||||
|
res = commands._get_resources_locks()
|
||||||
|
|
||||||
|
self.assertEqual(2, mock_glob.call_count)
|
||||||
|
mock_glob.assert_has_calls([
|
||||||
|
mock.call('/locks/cinder-*'),
|
||||||
|
mock.call('/dlm/cinder-*')
|
||||||
|
])
|
||||||
|
|
||||||
|
expected_vols = {
|
||||||
|
vol1: [f'/locks/cinder-{vol1}-delete_volume'],
|
||||||
|
vol2: [f'/locks/cinder-{vol2}-delete_volume',
|
||||||
|
f'/locks/cinder-{vol2}',
|
||||||
|
f'/locks/cinder-{vol2}-detach_volume',
|
||||||
|
f'/dlm/cinder-attachment_update-{vol2}-{attach}'],
|
||||||
|
}
|
||||||
|
expected_snaps = {
|
||||||
|
snap: [f'/locks/cinder-{snap}-delete_snapshot']
|
||||||
|
}
|
||||||
|
expected_backups = {
|
||||||
|
'12345': ['/locks/cinder-cleanup_incomplete_backups_12345']
|
||||||
|
}
|
||||||
|
expected = (expected_vols, expected_snaps, expected_backups)
|
||||||
|
self.assertEqual(expected, res)
|
||||||
|
|
||||||
|
@mock.patch.object(cinder_manage, 'open')
|
||||||
|
def test__exclude_running_backups(self, mock_open):
|
||||||
|
mock_running = mock.mock_open(read_data='cinder-backup --config-file '
|
||||||
|
'/etc/cinder/cinder.conf')
|
||||||
|
file_running = mock_running.return_value.__enter__.return_value
|
||||||
|
mock_other = mock.mock_open(read_data='python')
|
||||||
|
file_other = mock_other.return_value.__enter__.return_value
|
||||||
|
|
||||||
|
mock_open.side_effect = (FileNotFoundError, mock_running.return_value,
|
||||||
|
mock_other.return_value,
|
||||||
|
ValueError)
|
||||||
|
|
||||||
|
backups = {'12341': '/locks/cinder-cleanup_incomplete_backups_12341',
|
||||||
|
'12342': '/locks/cinder-cleanup_incomplete_backups_12342',
|
||||||
|
'12343': '/locks/cinder-cleanup_incomplete_backups_12343',
|
||||||
|
'12344': '/locks/cinder-cleanup_incomplete_backups_12344'}
|
||||||
|
|
||||||
|
expected = {'12341': '/locks/cinder-cleanup_incomplete_backups_12341',
|
||||||
|
'12343': '/locks/cinder-cleanup_incomplete_backups_12343'}
|
||||||
|
|
||||||
|
commands = cinder_manage.UtilCommands()
|
||||||
|
res = commands._exclude_running_backups(backups)
|
||||||
|
|
||||||
|
self.assertIsNone(res)
|
||||||
|
self.assertEqual(expected, backups)
|
||||||
|
|
||||||
|
self.assertEqual(4, mock_open.call_count)
|
||||||
|
mock_open.assert_has_calls([mock.call('/proc/12341/cmdline', 'r'),
|
||||||
|
mock.call('/proc/12342/cmdline', 'r'),
|
||||||
|
mock.call('/proc/12343/cmdline', 'r'),
|
||||||
|
mock.call('/proc/12344/cmdline', 'r')])
|
||||||
|
file_running.read.assert_called_once_with()
|
||||||
|
file_other.read.assert_called_once_with()
|
||||||
|
|
||||||
|
@ddt.data(True, False)
|
||||||
|
@mock.patch.object(cinder_manage, 'print')
|
||||||
|
@mock.patch.object(cinder_manage.os, 'remove')
|
||||||
|
@mock.patch.object(cinder_manage.UtilCommands, '_exclude_running_backups')
|
||||||
|
@mock.patch('cinder.objects.Snapshot.exists')
|
||||||
|
@mock.patch('cinder.objects.Volume.exists')
|
||||||
|
@mock.patch.object(cinder_manage.UtilCommands, '_get_resources_locks')
|
||||||
|
@mock.patch.object(cinder_manage.context, 'get_admin_context')
|
||||||
|
def test_clean_locks(self, online, mock_ctxt, mock_get_locks,
|
||||||
|
mock_vol_exists, mock_snap_exists, mock_exclude_backs,
|
||||||
|
mock_remove, mock_print):
|
||||||
|
vol1_files = [f'/locks/cinder-{fake.VOLUME_ID}-delete_volume']
|
||||||
|
vol2_files = [f'/locks/cinder-{fake.VOLUME2_ID}-delete_volume',
|
||||||
|
f'/locks/cinder-{fake.VOLUME2_ID}',
|
||||||
|
f'/locks/cinder-{fake.VOLUME2_ID}-detach_volume',
|
||||||
|
f'/dlm/cinder-attachment_update-{fake.VOLUME2_ID}-'
|
||||||
|
f'{fake.ATTACHMENT_ID}']
|
||||||
|
vols = collections.OrderedDict(((fake.VOLUME_ID, vol1_files),
|
||||||
|
(fake.VOLUME2_ID, vol2_files)))
|
||||||
|
snap_files = [f'/locks/cinder-{fake.SNAPSHOT_ID}-delete_snapshot']
|
||||||
|
snaps = {fake.SNAPSHOT_ID: snap_files}
|
||||||
|
back_files = ['/locks/cinder-cleanup_incomplete_backups_12345']
|
||||||
|
backs = {'12345': back_files}
|
||||||
|
mock_get_locks.return_value = (vols, snaps, backs)
|
||||||
|
mock_vol_exists.side_effect = (True, False)
|
||||||
|
mock_snap_exists.return_value = False
|
||||||
|
mock_remove.side_effect = [None, errno.ENOENT, None, None,
|
||||||
|
errno.ENOENT, ValueError, None]
|
||||||
|
|
||||||
|
commands = cinder_manage.UtilCommands()
|
||||||
|
commands.clean_locks(online=online)
|
||||||
|
|
||||||
|
mock_ctxt.assert_called_once_with()
|
||||||
|
mock_get_locks.assert_called_once_with()
|
||||||
|
expected_calls = ([mock.call(v) for v in vol1_files] +
|
||||||
|
[mock.call(v) for v in vol2_files] +
|
||||||
|
[mock.call(s) for s in snap_files] +
|
||||||
|
[mock.call(b) for b in back_files])
|
||||||
|
if online:
|
||||||
|
self.assertEqual(2, mock_vol_exists.call_count)
|
||||||
|
mock_vol_exists.assert_has_calls((mock.call(fake.VOLUME_ID),
|
||||||
|
mock.call(fake.VOLUME2_ID)))
|
||||||
|
mock_snap_exists.assert_called_once_with(fake.SNAPSHOT_ID)
|
||||||
|
mock_exclude_backs.assert_called_once_with(backs)
|
||||||
|
# If services are online we'll check resources that still exist
|
||||||
|
# and then we won't delete those that do. In this case the files
|
||||||
|
# for the first volume.
|
||||||
|
del expected_calls[0]
|
||||||
|
else:
|
||||||
|
mock_vol_exists.assert_not_called()
|
||||||
|
mock_snap_exists.assert_not_called()
|
||||||
|
mock_exclude_backs.assert_not_called()
|
||||||
|
|
||||||
|
self.assertEqual(len(expected_calls), mock_remove.call_count)
|
||||||
|
mock_remove.assert_has_calls(expected_calls)
|
||||||
|
|
||||||
|
# Only the ValueError exception should be logged
|
||||||
|
self.assertEqual(1, mock_print.call_count)
|
||||||
|
|
||||||
|
|
||||||
@test.testtools.skipIf(sys.platform == 'darwin', 'Not supported on macOS')
|
@test.testtools.skipIf(sys.platform == 'darwin', 'Not supported on macOS')
|
||||||
class TestCinderRtstoolCmd(test.TestCase):
|
class TestCinderRtstoolCmd(test.TestCase):
|
||||||
|
@ -243,6 +243,48 @@ Displays the current configuration parameters (options) for Cinder. The
|
|||||||
optional flag parameter may be used to display the configuration of one
|
optional flag parameter may be used to display the configuration of one
|
||||||
parameter.
|
parameter.
|
||||||
|
|
||||||
|
Cinder Util
|
||||||
|
~~~~~~~~~~~
|
||||||
|
|
||||||
|
``cinder-manage util clean_locks [-h] [--services-offline]``
|
||||||
|
|
||||||
|
Clean file locks on the current host that were created and are used by drivers
|
||||||
|
and cinder services for volumes, snapshots, and the backup service on the
|
||||||
|
current host.
|
||||||
|
|
||||||
|
Should be run on any host where we are running a Cinder service (API,
|
||||||
|
Scheduler, Volume, Backup) and can be run with the Cinder services running or
|
||||||
|
stopped.
|
||||||
|
|
||||||
|
If the services are running it will check existing resources in the Cinder
|
||||||
|
database in order to only remove resources that are no longer present (it's
|
||||||
|
safe to delete the files).
|
||||||
|
|
||||||
|
For backups, the way to know if we can remove the startup lock is by checking
|
||||||
|
if the PGRP in the file name is currently running cinder-backup.
|
||||||
|
|
||||||
|
Deleting locks while the services are offline is faster as there's no need to
|
||||||
|
check the database or the running processes.
|
||||||
|
|
||||||
|
Default assumes that services are online, must pass ``--services-offline`` to
|
||||||
|
specify that they are offline.
|
||||||
|
|
||||||
|
The common use case for running the command with ``--services-offline`` is to
|
||||||
|
be called on startup as a service unit before any cinder service is started.
|
||||||
|
Command will be usually called without the ``--services-offline`` parameter
|
||||||
|
manually or from a cron job.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
|
Passing ``--services-offline`` when the Cinder services are still running
|
||||||
|
breaks the locking mechanism and can lead to undesired behavior in ongoing
|
||||||
|
Cinder operations.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This command doesn't clean DLM locks (except when using file locks), as
|
||||||
|
those don't leave lock leftovers.
|
||||||
|
|
||||||
FILES
|
FILES
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
`Bug #1432387 <https://bugs.launchpad.net/cinder/+bug/1432387>`_: Add a
|
||||||
|
command to cinder-manage to clean up file locks existing in hosts where
|
||||||
|
there is a Cinder service running (API, Scheduler, Volume, Backup).
|
||||||
|
Command works with the Cinder services running, useful to be called as a
|
||||||
|
cron job, as well as stopped, to be called on host startup. Command
|
||||||
|
invocation ``cinder-manage util clean_locks`` with optional parameter
|
||||||
|
``--services-offline``.
|
Loading…
Reference in New Issue
Block a user