Merge "preserve/handle config drives on 4k block devices" into stable/2023.1
This commit is contained in:
commit
3c61f2b4de
@ -326,6 +326,11 @@ cli_opts = [
|
||||
'cleaning from inadvertently destroying a running '
|
||||
'cluster which may be visible over a storage fabric '
|
||||
'such as FibreChannel.'),
|
||||
cfg.BoolOpt('config_drive_rebuild',
|
||||
default=False,
|
||||
help='If the agent should rebuild the configuration drive '
|
||||
'using a local filesystem, instead of letting Ironic '
|
||||
'determine if this action is necessary.'),
|
||||
]
|
||||
|
||||
CONF.register_cli_opts(cli_opts)
|
||||
|
@ -465,12 +465,27 @@ def create_config_drive_partition(node_uuid, device, configdrive):
|
||||
{'part': config_drive_part, 'node': node_uuid})
|
||||
utils.execute('test', '-e', config_drive_part, attempts=15,
|
||||
delay_on_retry=True)
|
||||
|
||||
disk_utils.dd(confdrive_file, config_drive_part)
|
||||
if not CONF.config_drive_rebuild:
|
||||
disk_utils.dd(confdrive_file, config_drive_part)
|
||||
if not _does_config_drive_work(config_drive_part):
|
||||
# If we have reached this point, we might have an
|
||||
# invalid configuration drive, OR the block device
|
||||
# layer doesn't support 2K block Logical IO (iso9660)
|
||||
_try_build_fat32_config_drive(config_drive_part,
|
||||
confdrive_file)
|
||||
else:
|
||||
LOG.info('Extracting configuration drive to write copy to disk.')
|
||||
_try_build_fat32_config_drive(config_drive_part, confdrive_file)
|
||||
LOG.info("Configdrive for node %(node)s successfully "
|
||||
"copied onto partition %(part)s",
|
||||
{'node': node_uuid, 'part': config_drive_part})
|
||||
|
||||
except exception.InstanceDeployFailure:
|
||||
# Since we no longer have a final action on the decorator, we need
|
||||
# to catch the failure, and still perform the cleanup.
|
||||
if confdrive_file:
|
||||
utils.unlink_without_raise(confdrive_file)
|
||||
raise
|
||||
except (processutils.UnknownArgumentError,
|
||||
processutils.ProcessExecutionError, OSError) as e:
|
||||
msg = ('Failed to create config drive on disk %(disk)s '
|
||||
@ -478,13 +493,90 @@ def create_config_drive_partition(node_uuid, device, configdrive):
|
||||
{'disk': device, 'node': node_uuid, 'error': e})
|
||||
LOG.error(msg)
|
||||
raise exception.InstanceDeployFailure(msg)
|
||||
finally:
|
||||
# If the configdrive was requested make sure we delete the file
|
||||
# after copying the content to the partition
|
||||
|
||||
finally:
|
||||
if confdrive_file:
|
||||
utils.unlink_without_raise(confdrive_file)
|
||||
|
||||
|
||||
def _does_config_drive_work(config_drive_part):
|
||||
"""Attempts to mount the config drive to validate it works.
|
||||
|
||||
:param config_drive_part: The partition to which the configuration drive
|
||||
was written.
|
||||
:returns: True if we were able to mount the configuration drive partition.
|
||||
"""
|
||||
temp_folder = tempfile.mkdtemp()
|
||||
try:
|
||||
# Why: If the filesystem is ISO9660 or vfat, and the logical sector
|
||||
# size which is supported is *not* something which supports 512 bytes,
|
||||
# i.e. a 4k Block size, then ISO9660 just will not work. Vfat also
|
||||
# will not work because the logical size needs to match the logical
|
||||
# size which is usable. If the underlying driver cannot use that size,
|
||||
# then the filesystem will not work and cannot be updated because
|
||||
# structurally it is incompaible with the block device driver.
|
||||
utils.execute('mount', '-o', 'ro', '-t', 'auto', config_drive_part,
|
||||
temp_folder)
|
||||
utils.execute('umount', temp_folder)
|
||||
except (processutils.ProcessExecutionError, OSError) as e:
|
||||
LOG.error('Encountered issue attempting to validate the '
|
||||
'supplied configuration drive. Error: %s', e)
|
||||
return False
|
||||
finally:
|
||||
utils.unlink_without_raise(temp_folder)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _try_build_fat32_config_drive(partition, confdrive_file):
|
||||
conf_drive_temp = tempfile.mkdtemp()
|
||||
try:
|
||||
utils.execute('mount', '-o', 'loop,ro', '-t', 'auto',
|
||||
confdrive_file, conf_drive_temp)
|
||||
except (processutils.ProcessExecutionError, OSError) as e:
|
||||
# Config drive is invalid, at least to our point of view.
|
||||
# Bailing.
|
||||
LOG.warning('We were unable to examine the configuration drive, '
|
||||
'bypassing. Error: %s', e)
|
||||
return
|
||||
|
||||
new_drive_temp = tempfile.mkdtemp()
|
||||
try:
|
||||
# While creating a config drive file from scratch or on
|
||||
# a loopback will likely result in a 512 byte sector size,
|
||||
# the underlying fat filesystem utilities *automatically*
|
||||
# check the device block sector sizing. This *will* break
|
||||
# above 4k blocks, or at least might. Officially, 4k is the
|
||||
# *maximum* in the FAT standard. See:
|
||||
# https://github.com/dosfstools/dosfstools/blame/c483196dd46eab22abba756cef511d36f5f42070/src/mkfs.fat.c#L1987
|
||||
utils.mkfs(fs='vfat', path=partition, label='CONFIG-2')
|
||||
utils.execute('mount', '-t', 'auto', partition, new_drive_temp)
|
||||
# copytree, using copy2, copies everything in the source folder
|
||||
# into the destination folder, so we should be good, and metadata
|
||||
# is attempted to be preserved.
|
||||
shutil.copytree(conf_drive_temp, new_drive_temp, dirs_exist_ok=True)
|
||||
except (processutils.ProcessExecutionError, OSError) as e:
|
||||
# We failed to make the filesystem :(
|
||||
# This is a fairly hard error as we could not use the
|
||||
# config drive, nor could we recover the state.
|
||||
LOG.error('We were unable to make a new filesystem for the '
|
||||
'configuration drive. Error: %s', e)
|
||||
msg = ('A failure occured while attempting to format, copy, and '
|
||||
're-create the configuration drive in a structure which '
|
||||
'is compatible with the underlying hardware and Operating '
|
||||
'System. Due to the nature of configuration drive, it could '
|
||||
'have been incorrectly formatted. Operator investigation is '
|
||||
'required. Error: {}'.format(str(e)))
|
||||
raise exception.InstanceDeployFailure(msg)
|
||||
finally:
|
||||
utils.execute('umount', conf_drive_temp)
|
||||
utils.execute('umount', new_drive_temp)
|
||||
utils.unlink_without_raise(new_drive_temp)
|
||||
utils.unlink_without_raise(conf_drive_temp)
|
||||
|
||||
|
||||
def _is_disk_larger_than_max_size(device, node_uuid):
|
||||
"""Check if total disk size exceeds 2TB msdos limit
|
||||
|
||||
|
@ -20,6 +20,7 @@ from ironic_lib import disk_utils
|
||||
from ironic_lib import exception
|
||||
from ironic_lib import utils
|
||||
from oslo_concurrency import processutils
|
||||
from oslo_config import cfg
|
||||
import requests
|
||||
|
||||
from ironic_python_agent import errors
|
||||
@ -28,6 +29,9 @@ from ironic_python_agent import partition_utils
|
||||
from ironic_python_agent.tests.unit import base
|
||||
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
@mock.patch.object(shutil, 'copyfileobj', autospec=True)
|
||||
@mock.patch.object(requests, 'get', autospec=True)
|
||||
class GetConfigdriveTestCase(base.IronicAgentTest):
|
||||
@ -649,7 +653,10 @@ class CreateConfigDriveTestCases(base.IronicAgentTest):
|
||||
self.config_part_label,
|
||||
self.node_uuid)
|
||||
self.assertFalse(mock_list_partitions.called)
|
||||
self.assertFalse(mock_execute.called)
|
||||
mock_execute.assert_has_calls([
|
||||
mock.call('mount', '-o', 'ro', '-t', 'auto',
|
||||
'/dev/fake-part1', mock.ANY),
|
||||
mock.call('umount', mock.ANY)])
|
||||
self.assertFalse(mock_table_type.called)
|
||||
mock_dd.assert_called_with(configdrive_file, configdrive_part)
|
||||
mock_unlink.assert_called_with(configdrive_file)
|
||||
@ -706,6 +713,135 @@ class CreateConfigDriveTestCases(base.IronicAgentTest):
|
||||
mock_dd.assert_called_with(configdrive_file, expected_part)
|
||||
mock_unlink.assert_called_with(configdrive_file)
|
||||
|
||||
@mock.patch('oslo_utils.uuidutils.generate_uuid', lambda: 'fake-uuid')
|
||||
@mock.patch.object(partition_utils, '_try_build_fat32_config_drive',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, '_does_config_drive_work',
|
||||
autospec=True)
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(utils, 'unlink_without_raise',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'dd',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'fix_gpt_partition',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'get_partition_table_type',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, 'get_partition',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, 'get_labelled_partition',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, 'get_configdrive',
|
||||
autospec=True)
|
||||
def test_create_partition_gpt_with_fallback(
|
||||
self, mock_get_configdrive,
|
||||
mock_get_labelled_partition,
|
||||
mock_get_partition_by_uuid,
|
||||
mock_table_type,
|
||||
mock_fix_gpt_partition,
|
||||
mock_dd, mock_unlink, mock_execute,
|
||||
mock_config_drive_work,
|
||||
mock_rebuild_config_drive):
|
||||
config_url = 'http://1.2.3.4/cd'
|
||||
configdrive_file = '/tmp/xyz'
|
||||
configdrive_mb = 10
|
||||
|
||||
mock_get_configdrive.return_value = (configdrive_mb, configdrive_file)
|
||||
mock_get_labelled_partition.return_value = None
|
||||
|
||||
mock_table_type.return_value = 'gpt'
|
||||
expected_part = '/dev/fake4'
|
||||
mock_get_partition_by_uuid.return_value = expected_part
|
||||
mock_config_drive_work.return_value = False
|
||||
|
||||
partition_utils.create_config_drive_partition(self.node_uuid, self.dev,
|
||||
config_url)
|
||||
mock_execute.assert_has_calls([
|
||||
mock.call('sgdisk', '-n', '0:-64MB:0', '-u', '0:fake-uuid',
|
||||
self.dev, run_as_root=True),
|
||||
mock.call('sync'),
|
||||
mock.call('udevadm', 'settle'),
|
||||
mock.call('partprobe', self.dev, attempts=10, run_as_root=True),
|
||||
mock.call('sgdisk', '-v', self.dev, run_as_root=True),
|
||||
|
||||
mock.call('udevadm', 'settle'),
|
||||
mock.call('test', '-e', expected_part, attempts=15,
|
||||
delay_on_retry=True)
|
||||
])
|
||||
|
||||
mock_table_type.assert_called_with(self.dev)
|
||||
mock_fix_gpt_partition.assert_called_with(self.dev, self.node_uuid)
|
||||
mock_dd.assert_called_with(configdrive_file, expected_part)
|
||||
mock_unlink.assert_called_with(configdrive_file)
|
||||
mock_config_drive_work.assert_called_once_with(expected_part)
|
||||
mock_rebuild_config_drive.assert_called_once_with(expected_part,
|
||||
configdrive_file)
|
||||
|
||||
@mock.patch('oslo_utils.uuidutils.generate_uuid', lambda: 'fake-uuid')
|
||||
@mock.patch.object(partition_utils, '_try_build_fat32_config_drive',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, '_does_config_drive_work',
|
||||
autospec=True)
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(utils, 'unlink_without_raise',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'dd',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'fix_gpt_partition',
|
||||
autospec=True)
|
||||
@mock.patch.object(disk_utils, 'get_partition_table_type',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, 'get_partition',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, 'get_labelled_partition',
|
||||
autospec=True)
|
||||
@mock.patch.object(partition_utils, 'get_configdrive',
|
||||
autospec=True)
|
||||
def test_create_partition_gpt_use_vfat(
|
||||
self, mock_get_configdrive,
|
||||
mock_get_labelled_partition,
|
||||
mock_get_partition_by_uuid,
|
||||
mock_table_type,
|
||||
mock_fix_gpt_partition,
|
||||
mock_dd, mock_unlink, mock_execute,
|
||||
mock_config_drive_work,
|
||||
mock_rebuild_config_drive):
|
||||
config_url = 'http://1.2.3.4/cd'
|
||||
configdrive_file = '/tmp/xyz'
|
||||
configdrive_mb = 10
|
||||
|
||||
CONF.set_override('config_drive_rebuild', True)
|
||||
mock_get_configdrive.return_value = (configdrive_mb, configdrive_file)
|
||||
mock_get_labelled_partition.return_value = None
|
||||
|
||||
mock_table_type.return_value = 'gpt'
|
||||
expected_part = '/dev/fake4'
|
||||
mock_get_partition_by_uuid.return_value = expected_part
|
||||
mock_config_drive_work.return_value = True
|
||||
|
||||
partition_utils.create_config_drive_partition(self.node_uuid, self.dev,
|
||||
config_url)
|
||||
mock_execute.assert_has_calls([
|
||||
mock.call('sgdisk', '-n', '0:-64MB:0', '-u', '0:fake-uuid',
|
||||
self.dev, run_as_root=True),
|
||||
mock.call('sync'),
|
||||
mock.call('udevadm', 'settle'),
|
||||
mock.call('partprobe', self.dev, attempts=10, run_as_root=True),
|
||||
mock.call('sgdisk', '-v', self.dev, run_as_root=True),
|
||||
|
||||
mock.call('udevadm', 'settle'),
|
||||
mock.call('test', '-e', expected_part, attempts=15,
|
||||
delay_on_retry=True)
|
||||
])
|
||||
|
||||
mock_table_type.assert_called_with(self.dev)
|
||||
mock_fix_gpt_partition.assert_called_with(self.dev, self.node_uuid)
|
||||
mock_dd.assert_not_called()
|
||||
mock_unlink.assert_called_with(configdrive_file)
|
||||
mock_config_drive_work.assert_not_called()
|
||||
mock_rebuild_config_drive.assert_called_once_with(expected_part,
|
||||
configdrive_file)
|
||||
|
||||
@mock.patch.object(disk_utils, 'count_mbr_partitions', autospec=True)
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
@mock.patch.object(partition_utils.LOG, 'warning', autospec=True)
|
||||
@ -1288,3 +1424,94 @@ class TestGetPartition(base.IronicAgentTest):
|
||||
mock.call('lsblk', '-PbioKNAME,UUID,PARTUUID,TYPE,LABEL',
|
||||
self.fake_dev)]
|
||||
mock_execute.assert_has_calls(expected)
|
||||
|
||||
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
class TestConfigDriveTestRecovery(base.IronicAgentTest):
|
||||
|
||||
fake_dev = '/dev/fake'
|
||||
configdrive_file = '/tmp/config-drive'
|
||||
|
||||
def test__does_config_drive_work(self, mock_execute):
|
||||
self.assertTrue(partition_utils._does_config_drive_work(self.fake_dev))
|
||||
mock_execute.assert_has_calls([
|
||||
mock.call('mount', '-o', 'ro', '-t', 'auto', self.fake_dev,
|
||||
mock.ANY),
|
||||
mock.call('umount', mock.ANY)])
|
||||
|
||||
def test__does_config_drive_failed(self, mock_execute):
|
||||
mock_execute.side_effect = processutils.ProcessExecutionError('boom')
|
||||
self.assertFalse(
|
||||
partition_utils._does_config_drive_work(self.fake_dev)
|
||||
)
|
||||
mock_execute.assert_has_calls([
|
||||
mock.call('mount', '-o', 'ro', '-t', 'auto', self.fake_dev,
|
||||
mock.ANY)])
|
||||
|
||||
@mock.patch.object(shutil, 'copytree', autospec=True)
|
||||
@mock.patch.object(utils, 'mkfs', autospec=True)
|
||||
def test__try_build_fat32_config_drive(self,
|
||||
mock_mkfs,
|
||||
mock_copy,
|
||||
mock_execute):
|
||||
partition_utils._try_build_fat32_config_drive(self.fake_dev,
|
||||
self.configdrive_file)
|
||||
mock_execute.assert_has_calls([
|
||||
mock.call('mount', '-o', 'loop,ro', '-t', 'auto',
|
||||
self.configdrive_file, mock.ANY),
|
||||
mock.call('mount', '-t', 'auto', self.fake_dev, mock.ANY),
|
||||
mock.call('umount', mock.ANY),
|
||||
mock.call('umount', mock.ANY),
|
||||
])
|
||||
mock_mkfs.assert_called_once_with(fs='vfat', path=self.fake_dev,
|
||||
label='CONFIG-2')
|
||||
# Validate we called copy as we expect, both source and destination
|
||||
# are temporary folders.
|
||||
mock_copy.assert_called_once_with(mock.ANY, mock.ANY,
|
||||
dirs_exist_ok=True)
|
||||
|
||||
@mock.patch.object(shutil, 'copytree', autospec=True)
|
||||
@mock.patch.object(utils, 'mkfs', autospec=True)
|
||||
def test__try_build_fat32_config_drive_graceful_fail(
|
||||
self,
|
||||
mock_mkfs,
|
||||
mock_copy,
|
||||
mock_execute):
|
||||
mock_execute.side_effect = processutils.ProcessExecutionError('boom')
|
||||
self.assertIsNone(
|
||||
partition_utils._try_build_fat32_config_drive(
|
||||
self.fake_dev,
|
||||
self.configdrive_file)
|
||||
)
|
||||
mock_execute.assert_called_once_with(
|
||||
'mount', '-o', 'loop,ro', '-t', 'auto',
|
||||
self.configdrive_file, mock.ANY)
|
||||
mock_mkfs.assert_not_called()
|
||||
# Validate we called copy as we expect, both source and destination
|
||||
# are temporary folders.
|
||||
mock_copy.assert_not_called()
|
||||
|
||||
@mock.patch.object(shutil, 'copytree', autospec=True)
|
||||
@mock.patch.object(utils, 'mkfs', autospec=True)
|
||||
def test__try_build_fat32_config_drive_fails_once_invalid(
|
||||
self,
|
||||
mock_mkfs,
|
||||
mock_copy,
|
||||
mock_execute):
|
||||
mock_mkfs.side_effect = processutils.ProcessExecutionError('boom')
|
||||
self.assertRaisesRegex(
|
||||
exception.InstanceDeployFailure,
|
||||
'A failure occured while attempting to format.*',
|
||||
partition_utils._try_build_fat32_config_drive,
|
||||
self.fake_dev,
|
||||
self.configdrive_file)
|
||||
mock_execute.assert_has_calls([
|
||||
mock.call('mount', '-o', 'loop,ro', '-t', 'auto',
|
||||
self.configdrive_file, mock.ANY),
|
||||
mock.call('umount', mock.ANY),
|
||||
mock.call('umount', mock.ANY),
|
||||
])
|
||||
|
||||
mock_mkfs.assert_called_once_with(fs='vfat', path=self.fake_dev,
|
||||
label='CONFIG-2')
|
||||
mock_copy.assert_not_called()
|
||||
|
@ -0,0 +1,12 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes a failure case where a deployed instance may be unable to access
|
||||
the configuration drive post-deployment. This can occur when block
|
||||
devices only support 4KB IO interactions. When 4KB block IO sizes
|
||||
are in use, the ISO9660 filesystem driver in Linux cannot be used
|
||||
as it is modeled around a 2KB block. We now attempt to verify, and
|
||||
rebuild the configuration drive on a FAT filesystem when we cannot
|
||||
mount the supplied configuration drive. Operators can force the agent
|
||||
to write configuration drives using the FAT filesystem using the
|
||||
``[DEFAULT]config_drive_rebuild`` option.
|
Loading…
Reference in New Issue
Block a user