Fix inactive md devices removal in fuel-agent
Provision will fail to create new md device if any of disks belongs to inactive md device from time to time. Change-Id: I9abea747e21963b830c1fc27699cc0d756a8c58c Closes-Bug: #1390492
This commit is contained in:
parent
da81b8f681
commit
724124f8a9
@ -87,6 +87,10 @@ class MDWrongSpecError(BaseError):
|
||||
pass
|
||||
|
||||
|
||||
class MDRemovingError(BaseError):
|
||||
pass
|
||||
|
||||
|
||||
class WrongConfigDriveDataError(BaseError):
|
||||
pass
|
||||
|
||||
|
@ -119,6 +119,10 @@ localhost.localdomain)
|
||||
{'device': '/dev/fake2'}]
|
||||
|
||||
mu.mdcreate('/dev/md0', 'mirror', '/dev/fake1', '/dev/fake2')
|
||||
mock_mdclean_expected_calls = [mock.call('/dev/fake1'),
|
||||
mock.call('/dev/fake2')]
|
||||
self.assertEqual(mock_mdclean_expected_calls,
|
||||
mock_mdclean.call_args_list)
|
||||
mock_exec.assert_called_once_with(
|
||||
'mdadm', '--create', '--force', '/dev/md0', '-e0.90',
|
||||
'--level=mirror',
|
||||
@ -182,6 +186,32 @@ localhost.localdomain)
|
||||
expected_calls = [mock.call('/dev/fake1'), mock.call('/dev/fake2')]
|
||||
self.assertEqual(mock_mdclean.call_args_list, expected_calls)
|
||||
|
||||
@mock.patch.object(mu, 'mdclean')
|
||||
@mock.patch.object(mu, 'mdremove')
|
||||
@mock.patch.object(mu, 'mddisplay')
|
||||
def test_mdclean_all(self, mock_mddisplay, mock_mdremove, mock_mdclean):
|
||||
mock_mddisplay.side_effect = [
|
||||
[{'name': '/dev/md10', 'devices': ['/dev/fake10']},
|
||||
{'name': '/dev/md11'}],
|
||||
[{'name': '/dev/md11'}],
|
||||
[]
|
||||
]
|
||||
mu.mdclean_all()
|
||||
mock_mdremove_expected_calls = [
|
||||
mock.call('/dev/md10'), mock.call('/dev/md11'),
|
||||
mock.call('/dev/md11')]
|
||||
mock_mdclean.assert_called_once_with('/dev/fake10')
|
||||
self.assertEqual(mock_mdremove.call_args_list,
|
||||
mock_mdremove_expected_calls)
|
||||
|
||||
@mock.patch.object(mu, 'mdclean')
|
||||
@mock.patch.object(mu, 'mdremove')
|
||||
@mock.patch.object(mu, 'mddisplay')
|
||||
def test_mdclean_all_fail(self, mock_mddisplay, mock_mdremove,
|
||||
mock_mdclean):
|
||||
mock_mddisplay.return_value = [{'name': '/dev/md11'}]
|
||||
self.assertRaises(errors.MDRemovingError, mu.mdclean_all)
|
||||
|
||||
@mock.patch.object(utils, 'execute')
|
||||
@mock.patch.object(mu, 'get_mdnames')
|
||||
def test_mdremove_ok(self, mock_get_mdn, mock_exec):
|
||||
@ -189,6 +219,7 @@ localhost.localdomain)
|
||||
# should run mdadm command to remove md device
|
||||
mock_get_mdn.return_value = ['/dev/md0']
|
||||
expected_calls = [
|
||||
mock.call('udevadm', 'settle', '--quiet', check_exit_code=[0]),
|
||||
mock.call('mdadm', '--stop', '/dev/md0', check_exit_code=[0]),
|
||||
mock.call('mdadm', '--remove', '/dev/md0', check_exit_code=[0, 1])
|
||||
]
|
||||
|
@ -100,7 +100,8 @@ def mdcreate(mdname, level, device, *args):
|
||||
'Error while creating md: at least one of devices is '
|
||||
'already in belongs to some md')
|
||||
|
||||
# cleaning md metadata from devices
|
||||
#FIXME: mdadm will ask user to continue creating if any device appears to
|
||||
# be a part of raid array. Superblock zeroing helps to avoid that.
|
||||
map(mdclean, devices)
|
||||
utils.execute('mdadm', '--create', '--force', mdname, '-e0.90',
|
||||
'--level=%s' % level,
|
||||
@ -113,6 +114,31 @@ def mdremove(mdname):
|
||||
if mdname not in get_mdnames():
|
||||
raise errors.MDNotFoundError(
|
||||
'Error while removing md: md %s not found' % mdname)
|
||||
#FIXME: The issue faced was quiet hard to reproduce and to figure out the
|
||||
# root cause. For unknown reason already removed md device is
|
||||
# unexpectedly returning back after a while from time to time making
|
||||
# new md device creation to fail.
|
||||
# Still the actual reason of its failure is unknown, but after a
|
||||
# searching on a web a mention was found about a race in udev
|
||||
# http://dev.bizo.com/2012/07/mdadm-device-or-resource-busy.html
|
||||
# The article recommends to disable udev's queue entirely during md
|
||||
# device manipulation which sounds rather unappropriate for our case.
|
||||
# And the link to original post on mailing list suggests to execute
|
||||
# `udevadm settle` before removing the md device.
|
||||
# here -> http://permalink.gmane.org/gmane.linux.raid/34027
|
||||
# So, what was done. `udevadm settle` calls were placed just
|
||||
# before any of `mdadm` calls and the analizyng the logs was started.
|
||||
# According to the manual `settle` is an option that "Watches the
|
||||
# udev event queue, and exits if all current events are handled".
|
||||
# That means it will wait for udev's finishing of processing the
|
||||
# events. According to the logs noticeable delay had been recognized
|
||||
# between `udevadm settle` and the next `mdadm` call.
|
||||
# The delay was about 150-200ms or even bigger. It was appeared
|
||||
# right before the `mdadm --stop` call. That just means that udev was
|
||||
# too busy with events when we start to modifiy md devices hard.
|
||||
# Thus `udevadm settle` is helping to avoid the later failure and
|
||||
# to prevent strange behaviour of md device.
|
||||
utils.execute('udevadm', 'settle', '--quiet', check_exit_code=[0])
|
||||
utils.execute('mdadm', '--stop', mdname, check_exit_code=[0])
|
||||
utils.execute('mdadm', '--remove', mdname, check_exit_code=[0, 1])
|
||||
|
||||
@ -129,3 +155,10 @@ def mdclean_all():
|
||||
mdremove(md['name'])
|
||||
for dev in md.get('devices', []):
|
||||
mdclean(dev)
|
||||
# second attempt, remove stale inactive devices
|
||||
for md in mddisplay():
|
||||
mdremove(md['name'])
|
||||
mds = mddisplay()
|
||||
if len(mds) > 0:
|
||||
raise errors.MDRemovingError(
|
||||
'Error while removing mds: few devices still presented %s' % mds)
|
||||
|
Loading…
Reference in New Issue
Block a user