add quiet cleanup option

This commit:
- Introduces a new configuration variable that can suppress
  the python exceptions during disk metadata cleanup.

Closes-Bug: #2061362
Signed-off-by: Adam Rozman <adam.rozman@est.tech>
Change-Id: I581fe092e2a703188374ba90d6bcc2cab0934585
This commit is contained in:
Adam Rozman 2024-04-08 11:08:16 +03:00
parent af907322f6
commit 915fd7a38a
No known key found for this signature in database
GPG Key ID: 7F1877295730C9F9
9 changed files with 370 additions and 40 deletions

View File

@ -355,6 +355,12 @@ cli_opts = [
'cleaning from inadvertently destroying a running '
'cluster which may be visible over a storage fabric '
'such as FibreChannel.'),
cfg.BoolOpt('quiet_cleanup',
default=APARAMS.get('ipa-quiet-cleanup', False),
help='When this option is set to True, the disk cleanup'
'failure exceptions will be suppressed and the error'
'will be just logged instead of causing a cleanup'
'failure.'),
cfg.BoolOpt('md5_enabled',
default=True,
help='If the MD5 algorithm is enabled for file checksums. '

View File

@ -438,7 +438,7 @@ def get_dev_sector_size(dev):
return int(sect_sz)
def destroy_disk_metadata(dev, node_uuid):
def destroy_disk_metadata(dev, node_uuid, quiet_mode=False):
"""Destroy metadata structures on node's disk.
Ensure that node's disk magic strings are wiped without zeroing the
@ -456,12 +456,31 @@ def destroy_disk_metadata(dev, node_uuid):
use_standard_locale=True)
except processutils.ProcessExecutionError as e:
with excutils.save_and_reraise_exception() as ctxt:
ctxt.reraise = not quiet_mode
# NOTE(zhenguo): Check if --force option is supported for wipefs,
# if not, we should try without it.
if '--force' in str(e):
# Note(adam): Won't reraise even in non quiet mode
ctxt.reraise = False
utils.execute('wipefs', '--all', dev,
use_standard_locale=True)
try:
utils.execute('wipefs', '--all', dev,
use_standard_locale=True)
except Exception as er:
with excutils.save_and_reraise_exception() as ctxti:
ctxti.reraise = not quiet_mode
LOG.error("wipefs --force device %(device)s Error "
"%(error)s", {'device': dev, 'error': er})
if quiet_mode:
return
LOG.error("wipefs --force --all device %(device)s Error "
"%(error)s", {'device': dev, 'error': e})
return
except Exception as e:
with excutils.save_and_reraise_exception() as ctxt:
ctxt.reraise = not quiet_mode
LOG.error("wipefs on device %(device)s Error %(error)s",
{'device': dev, 'error': e})
return
# NOTE(TheJulia): sgdisk attempts to load and make sense of the
# partition tables in advance of wiping the partition data.
# This means when a CRC error is found, sgdisk fails before
@ -469,7 +488,14 @@ def destroy_disk_metadata(dev, node_uuid):
# This is the same bug as
# https://bugs.launchpad.net/ironic-python-agent/+bug/1737556
sector_size = get_dev_sector_size(dev)
try:
sector_size = get_dev_sector_size(dev)
except Exception as e:
with excutils.save_and_reraise_exception() as ctxt:
ctxt.reraise = not quiet_mode
LOG.error("Getting sector size on %(device)s failed Error "
"%(error)s", {'device': dev, 'error': e})
return
# https://uefi.org/specs/UEFI/2.10/05_GUID_Partition_Table_Format.html If
# the block size is 512, the First Usable LBA must be greater than or equal
# to 34 [...] if the logical block size is 4096, the First Usable LBA must
@ -480,29 +506,66 @@ def destroy_disk_metadata(dev, node_uuid):
gpt_sectors = 5
# Overwrite the Primary GPT, catch very small partitions (like EBRs)
try:
dev_size = get_dev_byte_size(dev)
except Exception as e:
with excutils.save_and_reraise_exception() as ctxt:
ctxt.reraise = not quiet_mode
LOG.error("Getting device size (byte) on %(device)s failed Error "
"%(error)s", {'device': dev, 'error': e})
return
dd_bs = 'bs=%s' % sector_size
dd_device = 'of=%s' % dev
dd_count = 'count=%s' % gpt_sectors
dev_size = get_dev_byte_size(dev)
if dev_size < gpt_sectors * sector_size:
dd_count = 'count=%s' % int(dev_size / sector_size)
utils.execute('dd', dd_bs, 'if=/dev/zero', dd_device, dd_count,
'oflag=direct', use_standard_locale=True)
try:
utils.execute('dd', dd_bs, 'if=/dev/zero', dd_device, dd_count,
'oflag=direct', use_standard_locale=True)
except Exception as e:
with excutils.save_and_reraise_exception() as ctxt:
ctxt.reraise = not quiet_mode
LOG.error("Destroying metadata %(device)s Error %(error)s "
"Destruction with dd dev_size < GPT_SIZE_SECTORS.",
{'device': dev, 'error': e})
return
# Overwrite the Secondary GPT, do this only if there could be one
if dev_size > gpt_sectors * sector_size:
gpt_backup = int(dev_size / sector_size - gpt_sectors)
dd_seek = 'seek=%i' % gpt_backup
dd_count = 'count=%s' % gpt_sectors
utils.execute('dd', dd_bs, 'if=/dev/zero', dd_device, dd_count,
'oflag=direct', dd_seek, use_standard_locale=True)
try:
utils.execute('dd', dd_bs, 'if=/dev/zero', dd_device, dd_count,
'oflag=direct', dd_seek, use_standard_locale=True)
except Exception as e:
with excutils.save_and_reraise_exception() as ctxt:
ctxt.reraise = not quiet_mode
LOG.error("Destroying metadata %(device)s Error %(error)s "
"Destruction with dd dev_size > GPT_SIZE_SECTORS.",
{'device': dev, 'error': e})
return
# Go ahead and let sgdisk run as well.
utils.execute('sgdisk', '-Z', dev, use_standard_locale=True)
try:
utils.execute('sgdisk', '-Z', dev, use_standard_locale=True)
except Exception as e:
with excutils.save_and_reraise_exception() as ctxt:
ctxt.reraise = not quiet_mode
LOG.error("Destroying metadata on device \" %(device)s \" "
": Error \" %(error)s \" Destruction with sgdisk.",
{'device': dev, 'error': e})
return
try:
wait_for_disk_to_become_available(dev)
except exception.IronicException as e:
if quiet_mode:
LOG.error("Destroying metadata on %(dev)s for node "
"%(node)s has failed! Error: disk is not available!",
{'dev': dev, 'node': node_uuid})
return
raise exception.InstanceDeployFailure(
_('Destroying metadata failed on device %(device)s. '
'Error: %(error)s')

View File

@ -342,7 +342,7 @@ def _write_whole_disk_image(image, image_info, device):
error.
"""
# FIXME(dtantsur): pass the real node UUID for logging
disk_utils.destroy_disk_metadata(device, '')
disk_utils.destroy_disk_metadata(device, '', CONF.quiet_cleanup)
disk_utils.udev_settle()
command = ['qemu-img', 'convert',

View File

@ -1836,7 +1836,8 @@ class GenericHardwareManager(HardwareManager):
for dev in self._list_erasable_devices(node):
safety_check_block_device(node, dev.name)
try:
disk_utils.destroy_disk_metadata(dev.name, node['uuid'])
disk_utils.destroy_disk_metadata(dev.name, node['uuid'],
CONF.quiet_cleanup)
except processutils.ProcessExecutionError as e:
LOG.error('Failed to erase the metadata on device "%(dev)s". '
'Error: %(error)s', {'dev': dev.name, 'error': e})
@ -1882,7 +1883,8 @@ class GenericHardwareManager(HardwareManager):
'clean', {'dev': dev.name, 'error': e})
secure_erase_error = e
try:
disk_utils.destroy_disk_metadata(dev.name, node['uuid'])
disk_utils.destroy_disk_metadata(dev.name, node['uuid'],
CONF.quiet_cleanup)
except processutils.ProcessExecutionError as e:
LOG.error('Failed to erase the metadata on device '
'"%(dev)s". Error: %(error)s',

View File

@ -231,8 +231,7 @@ def work_on_disk(dev, root_mb, swap_mb, ephemeral_mb, ephemeral_format,
commit = not preserve_ephemeral
# now if we are committing the changes to disk clean first.
if commit:
disk_utils.destroy_disk_metadata(dev, node_uuid)
disk_utils.destroy_disk_metadata(dev, node_uuid, CONF.quiet_cleanup)
try:
# If requested, get the configdrive file and determine the size
# of the configdrive partition

View File

@ -300,7 +300,7 @@ class TestStandbyExtension(base.IronicAgentTest):
cache='directsync',
out_of_order=True,
sparse_size='0')
wipe_mock.assert_called_once_with(device, '')
wipe_mock.assert_called_once_with(device, '', False)
udev_mock.assert_called_once_with()
rescan_mock.assert_called_once_with(device)
fix_gpt_mock.assert_called_once_with(device, node_uuid=None)

View File

@ -367,6 +367,33 @@ class DestroyMetaDataTestCase(base.IronicLibTestCase):
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid)
mock_exec.assert_has_calls(expected_calls)
def test_destroy_disk_metadata_quiet(self, mock_exec):
mock_exec.side_effect = iter([
(None, None),
('512\n', None),
('524288\n', None),
(None, None),
(None, None),
(None, None),
(None, None)])
expected_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev'),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
use_standard_locale=True),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
'seek=991', use_standard_locale=True),
mock.call('sgdisk', '-Z', 'fake-dev',
use_standard_locale=True),
mock.call('fuser', self.dev, check_exit_code=[0, 1])]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid, True)
mock_exec.assert_has_calls(expected_calls)
def test_destroy_disk_metadata_wipefs_fail(self, mock_exec):
mock_exec.side_effect = processutils.ProcessExecutionError
@ -378,6 +405,218 @@ class DestroyMetaDataTestCase(base.IronicLibTestCase):
self.node_uuid)
mock_exec.assert_has_calls(expected_call)
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destroy_disk_metadata_wipefs_faili_quiet(self, mock_log,
mock_exec):
err = processutils.ProcessExecutionError(None, None, None, None, None)
mock_exec.side_effect = err
mock_exec_expected_call = [mock.call('wipefs', '--force', '--all',
'fake-dev', use_standard_locale=True)]
mock_log_expected_call = [mock.call("wipefs --force --all device"
" %(device)s Error %(error)s",
{'device': 'fake-dev',
'error': err})]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid, True)
mock_exec.assert_has_calls(mock_exec_expected_call)
mock_log.assert_has_calls(mock_log_expected_call)
def test_destroy_disk_metadata_wipefs_not_support_force(self, mock_exec):
"""This has no quiet version, this should be quiet by default"""
mock_exec.side_effect = iter([
processutils.ProcessExecutionError(description='--force'),
(None, None),
('512\n', None),
(None, None),
(None, None),
(None, None),
(None, None)])
expected_call = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('wipefs', '--all', 'fake-dev',
use_standard_locale=True)]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid)
mock_exec.assert_has_calls(expected_call)
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destory_disk_metadata_sector_size_fail_quiet(self, mock_log,
mock_exec):
err = processutils.ProcessExecutionError()
mock_exec.side_effect = iter([(None, None), err])
mock_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev')]
log_calls = [mock.call("Getting sector size on %(device)s failed "
"Error %(error)s", {'device': 'fake-dev',
'error': err})]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid, True)
mock_exec.assert_has_calls(mock_calls)
mock_log.assert_has_calls(log_calls)
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destory_disk_metadata_sector_size_fail(self, mock_log, mock_exec):
err = processutils.ProcessExecutionError()
mock_exec.side_effect = iter([(None, None), err])
mock_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev')]
log_calls = [mock.call("Getting sector size on %(device)s failed "
"Error %(error)s", {'device': 'fake-dev',
'error': err})]
self.assertRaises(processutils.ProcessExecutionError,
disk_utils.destroy_disk_metadata,
self.dev,
self.node_uuid)
mock_exec.assert_has_calls(mock_calls)
mock_log.assert_has_calls(log_calls)
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destory_disk_metadata_device_size_fail_quiet(self, mock_log,
mock_exec):
err = processutils.ProcessExecutionError()
mock_exec.side_effect = iter([(None, None), ('512\n', None), err])
mock_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev')]
log_calls = [mock.call("Getting device size (byte) on %(device)s "
"failed Error %(error)s", {'device': 'fake-dev',
'error': err})]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid, True)
mock_exec.assert_has_calls(mock_calls)
mock_log.assert_has_calls(log_calls)
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destory_disk_metadata_device_size_fail(self, mock_log, mock_exec):
err = processutils.ProcessExecutionError()
mock_exec.side_effect = iter([(None, None), ('512\n', None), err])
mock_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev')]
log_calls = [mock.call("Getting device size (byte) on %(device)s "
"failed Error %(error)s", {'device': 'fake-dev',
'error': err})]
self.assertRaises(processutils.ProcessExecutionError,
disk_utils.destroy_disk_metadata,
self.dev,
self.node_uuid)
mock_exec.assert_has_calls(mock_calls)
mock_log.assert_has_calls(log_calls)
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destory_disk_metadata_dd_primary_fail_quiet(self, mock_log,
mock_exec):
err = processutils.ProcessExecutionError()
mock_exec.side_effect = iter([
(None, None), ('512\n', None),
('524288\n', None),
err,
(None, None)])
mock_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev'),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
use_standard_locale=True)]
log_calls = [mock.call("Destroying metadata %(device)s Error "
"%(error)s Destruction with dd dev_size < "
"GPT_SIZE_SECTORS.", {'device': 'fake-dev',
'error': err})]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid, True)
mock_exec.assert_has_calls(mock_calls)
mock_log.assert_has_calls(log_calls)
def test_destroy_disk_metadata_dd_primary_fail(self, mock_exec):
expected_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev'),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
use_standard_locale=True)]
mock_exec.side_effect = iter([(None, None),
('512\n', None),
('524288\n', None),
processutils.ProcessExecutionError()])
self.assertRaises(processutils.ProcessExecutionError,
disk_utils.destroy_disk_metadata,
self.dev,
self.node_uuid)
mock_exec.assert_has_calls(expected_calls)
def test_destroy_disk_metadata_dd_secondary_fail(self, mock_exec):
expected_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev'),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
use_standard_locale=True),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
'seek=991', use_standard_locale=True)]
mock_exec.side_effect = iter([(None, None),
('512\n', None),
('524288\n', None),
(None, None),
processutils.ProcessExecutionError()])
self.assertRaises(processutils.ProcessExecutionError,
disk_utils.destroy_disk_metadata,
self.dev,
self.node_uuid)
mock_exec.assert_has_calls(expected_calls)
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destroy_disk_metadata_dd_secondary_fail_quiet(self, mock_log,
mock_exec):
err = processutils.ProcessExecutionError()
mock_exec.side_effect = iter([(None, None),
('512\n', None),
('524288\n', None),
(None, None),
err])
exec_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev'),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
use_standard_locale=True),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
'seek=991', use_standard_locale=True)]
log_calls = [mock.call("Destroying metadata %(device)s "
"Error %(error)s Destruction with "
"dd dev_size > GPT_SIZE_SECTORS.",
{'device': 'fake-dev', 'error': err})]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid, True)
mock_exec.assert_has_calls(exec_calls)
mock_log.assert_has_calls(log_calls)
def test_destroy_disk_metadata_sgdisk_fail(self, mock_exec):
expected_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
@ -404,23 +643,37 @@ class DestroyMetaDataTestCase(base.IronicLibTestCase):
self.node_uuid)
mock_exec.assert_has_calls(expected_calls)
def test_destroy_disk_metadata_wipefs_not_support_force(self, mock_exec):
mock_exec.side_effect = iter([
processutils.ProcessExecutionError(description='--force'),
(None, None),
('512\n', None),
('524288\n', None),
(None, None),
(None, None),
(None, None),
(None, None)])
@mock.patch.object(disk_utils.LOG, 'error', autospec=True)
def test_destroy_disk_metadata_sgdisk_fail_quiet(self, mock_log,
mock_exec):
err = processutils.ProcessExecutionError()
mock_exec.side_effect = iter([(None, None),
('512\n', None),
('524288\n', None),
(None, None),
(None, None),
err])
expected_call = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('wipefs', '--all', 'fake-dev',
use_standard_locale=True)]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid)
mock_exec.assert_has_calls(expected_call)
exec_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',
use_standard_locale=True),
mock.call('blockdev', '--getss', 'fake-dev'),
mock.call('blockdev', '--getsize64', 'fake-dev'),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
use_standard_locale=True),
mock.call('dd', 'bs=512', 'if=/dev/zero',
'of=fake-dev', 'count=33', 'oflag=direct',
'seek=991', use_standard_locale=True),
mock.call('sgdisk', '-Z', 'fake-dev',
use_standard_locale=True)]
log_calls = [mock.call("Destroying metadata on device \" %(device)s \""
" : Error \" %(error)s \" Destruction with "
"sgdisk.",
{'device': 'fake-dev', 'error': err})]
disk_utils.destroy_disk_metadata(self.dev, self.node_uuid, True)
mock_exec.assert_has_calls(exec_calls)
mock_log.assert_has_calls(log_calls)
def test_destroy_disk_metadata_ebr(self, mock_exec):
expected_calls = [mock.call('wipefs', '--force', '--all', 'fake-dev',

View File

@ -2595,8 +2595,8 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.assertEqual([mock.call(self.hardware, block_devices[2]),
mock.call(self.hardware, block_devices[3])],
mock_nvme_erase.call_args_list)
self.assertEqual([mock.call('/dev/sda', self.node['uuid']),
mock.call('/dev/md0', self.node['uuid'])],
self.assertEqual([mock.call('/dev/sda', self.node['uuid'], False),
mock.call('/dev/md0', self.node['uuid'], False)],
mock_destroy_disk_metadata.call_args_list)
mock_list_erasable_devices.assert_called_with(self.hardware,
self.node)
@ -2675,9 +2675,9 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.hardware.erase_devices_metadata(self.node, [])
self.assertEqual([mock.call('/dev/sda1', self.node['uuid']),
mock.call('/dev/sda', self.node['uuid']),
mock.call('/dev/md0', self.node['uuid'])],
self.assertEqual([mock.call('/dev/sda1', self.node['uuid'], False),
mock.call('/dev/sda', self.node['uuid'], False),
mock.call('/dev/md0', self.node['uuid'], False)],
mock_metadata.call_args_list)
mock_list_devs.assert_called_with(self.hardware,
include_partitions=True,
@ -2737,7 +2737,7 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.hardware.erase_devices_metadata,
self.node, [])
self.assertEqual([mock.call('/dev/sda1', self.node['uuid'])],
self.assertEqual([mock.call('/dev/sda1', self.node['uuid'], False)],
mock_metadata.call_args_list)
mock_list_devs.assert_called_with(self.hardware,
include_partitions=True,
@ -2786,8 +2786,8 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.node, [])
# Assert all devices are erased independent if one of them
# failed previously
self.assertEqual([mock.call('/dev/sdb', self.node['uuid']),
mock.call('/dev/sda', self.node['uuid'])],
self.assertEqual([mock.call('/dev/sdb', self.node['uuid'], False),
mock.call('/dev/sda', self.node['uuid'], False)],
mock_metadata.call_args_list)
mock_list_devs.assert_called_with(self.hardware,
include_partitions=True,

View File

@ -0,0 +1,7 @@
---
features:
- |
IPA configuration option ``quiet-cleanup`` makes it possible to supress
the Python exceptions during the execution of the disk metadata cleanup.
The default value for ``quiet-cleanup`` is ``false``. The option can be
configured via kernel parameters such as ``ipa-quiet-cleanup=<yourvalue>``.