From 33f01fa3c2f32f447ed36f00fea68321c3991c2e Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Fri, 15 Sep 2023 15:03:30 -0700 Subject: [PATCH] Fix vmedia network config drive handling When performing DHCP-less deployments, the agent can start and discover more than one configuration drive present on a host. For example, a host was previously deployed using Ironic, and is now being re-deployed again. If Glean was present in the ramdisk, the glean-early.sh would end mounting the folder based upon label. If cloud-init, somehow is still in the ramdisk, the other folder could somehow get mounted. This patch, which is intended to be backportable, causes the agent to unmount any configuration drive folders, mount the most likely candidate based upon device type, partition, and overall state of the machine, and then utilize that configuration, if present, to re-configure and reload networking. Thus allowing dhcp-less re-deployments to be fixed without forcing any breaking changes. It should also be noted that this fix was generated in concert with an additional tempest test case, because this overall failure case needed to be reproduced to ensure we had a workable non-breaking path forward. Closes-Bug: 2032377 Change-Id: I9a3b3dbb9ca98771ce2decf893eba7a4c1890eee --- ironic_python_agent/cmd/agent.py | 5 +- ironic_python_agent/tests/unit/test_utils.py | 159 ++++++++++++++---- ironic_python_agent/utils.py | 103 +++++++++++- ...-other-devices-found-7e9ebb861ed30ad5.yaml | 19 +++ 4 files changed, 247 insertions(+), 39 deletions(-) create mode 100644 releasenotes/notes/reload-glean-config-if-other-devices-found-7e9ebb861ed30ad5.yaml diff --git a/ironic_python_agent/cmd/agent.py b/ironic_python_agent/cmd/agent.py index 9ebe30065..4d0eeb22f 100644 --- a/ironic_python_agent/cmd/agent.py +++ b/ironic_python_agent/cmd/agent.py @@ -30,8 +30,9 @@ def run(): """Entrypoint for IronicPythonAgent.""" # NOTE(dtantsur): this must happen very early of the files from # /etc/ironic-python-agent.d won't be loaded - utils.copy_config_from_vmedia() - + vmedia_configuration = utils.copy_config_from_vmedia() + if vmedia_configuration: + utils.trigger_glean_network_refresh() log.register_options(CONF) CONF(args=sys.argv[1:]) # Debug option comes from oslo.log, allow overriding it via kernel cmdline diff --git a/ironic_python_agent/tests/unit/test_utils.py b/ironic_python_agent/tests/unit/test_utils.py index fc683d22e..7319780ee 100644 --- a/ironic_python_agent/tests/unit/test_utils.py +++ b/ironic_python_agent/tests/unit/test_utils.py @@ -883,7 +883,7 @@ class TestClockSyncUtils(ironic_agent_base.IronicAgentTest): @mock.patch.object(shutil, 'copy', autospec=True) @mock.patch.object(ironic_utils, 'mounted', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) -class TestCopyConfigFromVmedia(testtools.TestCase): +class TestConfigFromVmedia(testtools.TestCase): def test_vmedia_found_not_booted_from_vmedia( self, mock_execute, mock_mount, mock_copy, @@ -909,45 +909,61 @@ class TestCopyConfigFromVmedia(testtools.TestCase): mock_check_vmedia.assert_not_called() self.assertFalse(mock_booted_from_vmedia.called) + @mock.patch.object(os.path, 'ismount', autospec=True) def test_no_files( - self, mock_execute, mock_mount, mock_copy, + self, mock_ismount, mock_execute, mock_mount, mock_copy, mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_ismount.return_value = False mock_booted_from_vmedia.return_value = True temp_path = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(temp_path)) - mock_execute.side_effect = processutils.ProcessExecutionError + mock_execute.side_effect = [('/mnt/config', ()), + ((), ())] mock_find_device.return_value = '/dev/something' mock_mount.return_value.__enter__.return_value = temp_path utils.copy_config_from_vmedia() mock_mount.assert_called_once_with('/dev/something') - mock_execute.assert_called_once_with('findmnt', '-n', '-oTARGET', - '/dev/something') + mock_execute.assert_has_calls([ + mock.call('findmnt', '-n', '-oTARGET', '/dev/something'), + mock.call('umount', '/mnt/config', run_as_root=True)]) + self.assertEqual(2, mock_execute.call_count) mock_copy.assert_not_called() self.assertTrue(mock_booted_from_vmedia.called) + @mock.patch.object(os.path, 'ismount', autospec=True) def test_mounted_no_files( - self, mock_execute, mock_mount, mock_copy, + self, mock_ismount, mock_execute, mock_mount, mock_copy, mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_ismount.side_effect = [True, False] mock_booted_from_vmedia.return_value = True - mock_execute.return_value = '/some/path', '' + mock_execute.side_effect = [('/some/path', ''), + ((), ()), + ((), ())] + mock_find_device.side_effect = ('/dev/something', '') mock_find_device.return_value = '/dev/something' utils.copy_config_from_vmedia() - mock_execute.assert_called_once_with( - 'findmnt', '-n', '-oTARGET', '/dev/something') + mock_execute.assert_has_calls([ + mock.call('findmnt', '-n', '-oTARGET', '/dev/something'), + mock.call('umount', '/some/path', run_as_root=True), + mock.call('umount', '/mnt/config', run_as_root=True)]) + self.assertEqual(3, mock_execute.call_count) mock_copy.assert_not_called() - mock_mount.assert_not_called() + mock_mount.assert_called_once_with('/dev/something') self.assertTrue(mock_booted_from_vmedia.called) + @mock.patch.object(os.path, 'ismount', autospec=True) @mock.patch.object(os, 'makedirs', autospec=True) def test_copy( - self, mock_makedirs, mock_execute, mock_mount, mock_copy, - mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + self, mock_makedirs, mock_ismount, mock_execute, mock_mount, + mock_copy, mock_find_device, mock_check_vmedia, + mock_booted_from_vmedia): + mock_ismount.return_value = False mock_booted_from_vmedia.return_value = True mock_find_device.return_value = '/dev/something' - mock_execute.side_effect = processutils.ProcessExecutionError("") + mock_execute.side_effect = processutils.ProcessExecutionError('meow') path = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(path)) @@ -982,32 +998,43 @@ class TestCopyConfigFromVmedia(testtools.TestCase): mock.call(mock.ANY, '/etc/ironic-python-agent.d/ironic.conf'), ], any_order=True) self.assertTrue(mock_booted_from_vmedia.called) + mock_execute.assert_called_once_with( + 'findmnt', '-n', '-oTARGET', '/dev/something') + @mock.patch.object(os.path, 'ismount', autospec=True) @mock.patch.object(os, 'makedirs', autospec=True) def test_copy_mounted( - self, mock_makedirs, mock_execute, mock_mount, + self, mock_makedirs, mock_ismount, mock_execute, mock_mount, mock_copy, mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_ismount.side_effect = [True, True, False] mock_booted_from_vmedia.return_value = True - mock_find_device.return_value = '/dev/something' + mock_find_device.side_effect = ('/dev/something', '') path = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(path)) - # NOTE(dtantsur): makedirs is mocked - os.mkdir(os.path.join(path, 'etc')) - os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent')) - os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent.d')) - with open(os.path.join(path, 'not copied'), 'wt') as fp: - fp.write('not copied') - with open(os.path.join(path, 'etc', 'ironic-python-agent', - 'ironic.crt'), 'wt') as fp: - fp.write('I am a cert') - with open(os.path.join(path, 'etc', 'ironic-python-agent.d', - 'ironic.conf'), 'wt') as fp: - fp.write('I am a config') + def _fake_mount(dev): + self.assertEqual('/dev/something', dev) + # NOTE(dtantsur): makedirs is mocked + os.mkdir(os.path.join(path, 'etc')) + os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent')) + os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent.d')) + with open(os.path.join(path, 'not copied'), 'wt') as fp: + fp.write('not copied') + with open(os.path.join(path, 'etc', 'ironic-python-agent', + 'ironic.crt'), 'wt') as fp: + fp.write('I am a cert') + with open(os.path.join(path, 'etc', 'ironic-python-agent.d', + 'ironic.conf'), 'wt') as fp: + fp.write('I am a config') + return mock.MagicMock(**{'__enter__.return_value': path}) - mock_execute.return_value = path, '' mock_find_device.return_value = '/dev/something' + mock_mount.side_effect = _fake_mount + mock_execute.side_effect = [(path, ''), + ((), ()), + ((), ()), + ((), ())] utils.copy_config_from_vmedia() @@ -1015,13 +1042,16 @@ class TestCopyConfigFromVmedia(testtools.TestCase): mock.call('/etc/ironic-python-agent', exist_ok=True), mock.call('/etc/ironic-python-agent.d', exist_ok=True), ], any_order=True) - mock_execute.assert_called_once_with( - 'findmnt', '-n', '-oTARGET', '/dev/something') + mock_execute.assert_has_calls([ + mock.call('findmnt', '-n', '-oTARGET', '/dev/something'), + mock.call('umount', mock.ANY, run_as_root=True), + mock.call('umount', '/mnt/config', run_as_root=True), + mock.call('umount', '/mnt/config', run_as_root=True)]) mock_copy.assert_has_calls([ mock.call(mock.ANY, '/etc/ironic-python-agent/ironic.crt'), mock.call(mock.ANY, '/etc/ironic-python-agent.d/ironic.conf'), ], any_order=True) - mock_mount.assert_not_called() + mock_mount.assert_called_once_with('/dev/something') self.assertTrue(mock_booted_from_vmedia.called) @@ -1122,6 +1152,73 @@ class TestCheckVirtualMedia(ironic_agent_base.IronicAgentTest): '/dev/sdh') +@mock.patch.object(shutil, 'copy', autospec=True) +@mock.patch.object(os, 'makedirs', autospec=True) +@mock.patch.object(utils, '_determine_networking_path', autospec=True) +@mock.patch.object(utils, 'execute', autospec=True) +class TestNetworkConfigRefresh(ironic_agent_base.IronicAgentTest): + + def test_tinycore(self, mock_exec, mock_path, mock_makedirs, + mock_copy): + mock_path.return_value = 'tinycore' + utils.trigger_glean_network_refresh() + self.assertEqual(1, mock_path.call_count) + mock_exec.assert_has_calls([ + mock.call('glean', '--distro', 'tinycore', run_as_root=True), + mock.call('/bin/sh', '/opt/network.sh', run_as_root=True)]) + mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/', + exist_ok=True) + + def test_execution_error_is_handled(self, mock_exec, mock_path, + mock_makedirs, mock_copy): + mock_path.return_value = 'tinycore' + mock_exec.side_effect = OSError() + utils.trigger_glean_network_refresh() + self.assertEqual(1, mock_path.call_count) + mock_exec.assert_called_once_with( + 'glean', '--distro', 'tinycore', run_as_root=True) + mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/', + exist_ok=True) + mock_copy.assert_called_once_with( + '/etc/ironic-python-agent.d/network_data.json', + '/mnt/config/openstack/latest/network_data.json') + + def test_networkd(self, mock_exec, mock_path, + mock_makedirs, mock_copy): + mock_path.return_value = 'networkd' + utils.trigger_glean_network_refresh() + self.assertEqual(1, mock_path.call_count) + mock_exec.assert_has_calls([ + mock.call('glean', '--distro', 'networkd', run_as_root=True), + mock.call('systemctl', 'restart', 'systemd-networkd.service', + run_as_root=True)]) + mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/', + exist_ok=True) + mock_copy.assert_called_once_with( + '/etc/ironic-python-agent.d/network_data.json', + '/mnt/config/openstack/latest/network_data.json') + + def test_networkmanager(self, mock_exec, mock_path, + mock_makedirs, mock_copy): + mock_path.return_value = 'networkmanager' + mock_exec.side_effect = [('', ''), + OSError(), + ('', '')] + utils.trigger_glean_network_refresh() + self.assertEqual(1, mock_path.call_count) + mock_exec.assert_has_calls([ + mock.call('glean', '--use-nm', run_as_root=True), + mock.call('systemctl', 'restart', 'NetworkManager', + run_as_root=True), + mock.call('systemctl', 'restart', 'NetworkManager.service', + run_as_root=True)]) + mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/', + exist_ok=True) + mock_copy.assert_called_once_with( + '/etc/ironic-python-agent.d/network_data.json', + '/mnt/config/openstack/latest/network_data.json') + + class TestCheckEarlyLogging(ironic_agent_base.IronicAgentTest): @mock.patch.object(utils, 'LOG', autospec=True) diff --git a/ironic_python_agent/utils.py b/ironic_python_agent/utils.py index c5889e9bd..ee5866102 100644 --- a/ironic_python_agent/utils.py +++ b/ironic_python_agent/utils.py @@ -21,6 +21,7 @@ import glob import io import json import os +import platform import re import shutil import subprocess @@ -33,6 +34,7 @@ from oslo_concurrency import processutils from oslo_config import cfg from oslo_log import log as logging from oslo_utils import units +import psutil import requests import tenacity @@ -126,6 +128,7 @@ def _find_vmedia_device_by_labels(labels): for device in ironic_utils.parse_device_tags(lsblk_output): for label in labels: if label.upper() == device['LABEL'].upper(): + _early_log('Found vmedia candidate %s', device['KNAME']) candidates.append(device['KNAME']) for candidate in candidates: @@ -210,13 +213,21 @@ def _copy_config_from(path): msg = ("Unable to copy vmedia configuration %s to %s: %s" % (src, dest, exc)) raise errors.VirtualMediaBootError(msg) + network_data_path = os.path.join( + path, 'openstack/latest/network_data.json') + if os.path.isfile(network_data_path): + dest_path = '/etc/ironic-python-agent%s' % ext + _early_log('Copying network_data.json to %s', dest_path) + os.makedirs(dest_path, exist_ok=True) + shutil.copy(network_data_path, os.path.join(dest_path, + 'network_data.json')) def _find_mount_point(device): try: path, _e = execute('findmnt', '-n', '-oTARGET', device) - except processutils.ProcessExecutionError: - return + except (processutils.ProcessExecutionError, OSError): + return None else: return path.strip() @@ -233,6 +244,7 @@ def _check_vmedia_device(vmedia_device_file): valid. """ try: + _early_log('Checking device %s vmedia status.', vmedia_device_file) output, _e = execute('lsblk', '-n', '-s', '-P', '-b', '-oKNAME,TRAN,TYPE,SIZE', vmedia_device_file) @@ -243,6 +255,8 @@ def _check_vmedia_device(vmedia_device_file): try: for device in ironic_utils.parse_device_tags(output): if device['TYPE'] == 'part': + # This would exclude any existing configuration drive written + # by ironic previously. _early_log('Excluding device %s from virtual media' 'consideration as it is a partition.', device['KNAME']) @@ -253,7 +267,7 @@ def _check_vmedia_device(vmedia_device_file): # registered for the scsi transport and thus type used. # This will most likely be a qemu driven testing VM, # or an older machine where SCSI transport is directly - # used to convey in a virtual + # used to convey in a virtual media device. return True if device['TYPE'] == 'disk' and device['TRAN'] == 'usb': # We know from experience on HPE machines, with ilo4/5, we see @@ -306,6 +320,9 @@ def copy_config_from_vmedia(): """Copies any configuration from a virtual media device. Copies files under /etc/ironic-python-agent and /etc/ironic-python-agent.d. + + :returns: True when we successfully copied content from the virtual media + device. Otherwise None. """ vmedia_device_file = _find_vmedia_device_by_labels( ['config-2', 'vmedia_boot_iso']) @@ -319,10 +336,84 @@ def copy_config_from_vmedia(): # Determine the device mounted = _find_mount_point(vmedia_device_file) if mounted: - _copy_config_from(mounted) + # Unmount if already mounted by something like glean/cloud init. + execute("umount", mounted, run_as_root=True) + with ironic_utils.mounted(vmedia_device_file) as vmedia_mount_point: + _copy_config_from(vmedia_mount_point) + + # Finally, check mount and ensure unmounted before proceeding. + # NOTE(TheJulia): This is a reserved path by convention for configuration + # drive handling. Since you *can* stack multiple devices, we need to make + # sure no other tooling left anything in an unclean state. + while os.path.ismount("/mnt/config"): + try: + execute("umount", "/mnt/config", run_as_root=True) + _early_log("Sucessfully unmounted /mnt/config as an orphaned " + "source of configuration.") + except (OSError, processutils.ProcessExecutionError): + _early_log("We failed to umount /mnt/config. This may be fatal " + "if virtual media is in use for configuration.") + return False + return True + + +def _determine_networking_path(): + os_ver = platform.uname().version.lower() + if 'tinycore' in os_ver: + # Compact CI oriented IPA Image. + return 'tinycore' + if True in ['networkd' in x.name().lower() + for x in psutil.process_iter(['name'])]: + # Basically, this would be Ubuntu AFAIK. + return 'networkd' else: - with ironic_utils.mounted(vmedia_device_file) as vmedia_mount_point: - _copy_config_from(vmedia_mount_point) + # This is fairly safe to assume at this point. + return 'networkmanager' + + +def trigger_glean_network_refresh(): + """Trigger procesing and refresh of network configuration.""" + + # NOTE(TheJulia): Ironic explicitly doesn't support cloud-init + # as the consumer of this information in ramdisks. This stance + # may change in the future, but as of the beginning of the + # Caracal development cycle, cloud-init is uninstalled + # by ironic-python-agent-builder. + glean_read_path = '/mnt/config/openstack/latest/' + # NOTE(TheJulia) Exist_okay because we might end up re-triggering down + # this path if the agent restarts due to a transient failure. + os.makedirs(glean_read_path, exist_ok=True) + backup_copy = '/etc/ironic-python-agent.d/network_data.json' + network_data = os.path.join(glean_read_path, 'network_data.json') + shutil.copy(backup_copy, network_data) + + _early_log('Working to apply network configuration refresh.') + network_type = _determine_networking_path() + # TODO(TheJulia): Check if python-glean is even in the $PATH. + # Two uniform actions, run glean, then restart all networking. + try: + if network_type == 'networkmanager': + # network manager present + execute('glean', '--use-nm', run_as_root=True) + try: + execute('systemctl', 'restart', 'NetworkManager', + run_as_root=True) + except Exception: + # Inconsistent naming across distributions. + execute('systemctl', 'restart', 'NetworkManager.service', + run_as_root=True) + if network_type == 'networkd': + execute('glean', '--distro', 'networkd', run_as_root=True) + execute('systemctl', 'restart', 'systemd-networkd.service', + run_as_root=True) + if network_type == 'tinycore': + execute('glean', '--distro', 'tinycore', run_as_root=True) + # While a shell script, glean doesn't set this file to + # be executable once it writes it. + execute('/bin/sh', '/opt/network.sh', run_as_root=True) + except Exception as e: + _early_log('Unable to execute configuration refresh for ' + 'configuration drive data. Error: %s' % e) def _get_cached_params(): diff --git a/releasenotes/notes/reload-glean-config-if-other-devices-found-7e9ebb861ed30ad5.yaml b/releasenotes/notes/reload-glean-config-if-other-devices-found-7e9ebb861ed30ad5.yaml new file mode 100644 index 000000000..f78e98528 --- /dev/null +++ b/releasenotes/notes/reload-glean-config-if-other-devices-found-7e9ebb861ed30ad5.yaml @@ -0,0 +1,19 @@ +--- +fixes: + - | + Fixes an issue with the agent where the presence of a configuration drive + from a prior node deployment could prevent networking configuration from + being determined and loaded in DHCP-less use cases. + The agent now attempts to identify the appropriate configuration drive + networking data source utilizing the existing device identification + process, and then triggers glean configuration to be re-generated, + after which networking is restarted. For more information, please + view `bug 2032377 `_. +security: + - | + The ironic agent is now able to re-assert networking configuration + in DHCP-less modes should a prior node's ``config drive`` be + discovered as readable. Normally this issue is not a security + issue as long as ``ironic-python-agent-builder`` is utilized. + Ironic's agent automatically exlcudes possible configuration + drive copies from previously deployed baremetal nodes.