Fix vmedia network config drive handling

When performing DHCP-less deployments, the agent can start and
discover more than one configuration drive present on a host.

For example, a host was previously deployed using Ironic, and
is now being re-deployed again.

If Glean was present in the ramdisk, the glean-early.sh would end
mounting the folder based upon label.

If cloud-init, somehow is still in the ramdisk, the other folder
could somehow get mounted.

This patch, which is intended to be backportable, causes the agent
to unmount any configuration drive folders, mount the most likely
candidate based upon device type, partition, and overall state of
the machine, and then utilize that configuration, if present,
to re-configure and reload networking.

Thus allowing dhcp-less re-deployments to be fixed without
forcing any breaking changes.

It should also be noted that this fix was generated in concert
with an additional tempest test case, because this overall failure
case needed to be reproduced to ensure we had a workable non-breaking
path forward.

Closes-Bug: 2032377
Change-Id: I9a3b3dbb9ca98771ce2decf893eba7a4c1890eee
This commit is contained in:
Julia Kreger
2023-09-15 15:03:30 -07:00
parent 1581f91826
commit 33f01fa3c2
4 changed files with 247 additions and 39 deletions

View File

@@ -30,8 +30,9 @@ def run():
"""Entrypoint for IronicPythonAgent."""
# NOTE(dtantsur): this must happen very early of the files from
# /etc/ironic-python-agent.d won't be loaded
utils.copy_config_from_vmedia()
vmedia_configuration = utils.copy_config_from_vmedia()
if vmedia_configuration:
utils.trigger_glean_network_refresh()
log.register_options(CONF)
CONF(args=sys.argv[1:])
# Debug option comes from oslo.log, allow overriding it via kernel cmdline

View File

@@ -883,7 +883,7 @@ class TestClockSyncUtils(ironic_agent_base.IronicAgentTest):
@mock.patch.object(shutil, 'copy', autospec=True)
@mock.patch.object(ironic_utils, 'mounted', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
class TestCopyConfigFromVmedia(testtools.TestCase):
class TestConfigFromVmedia(testtools.TestCase):
def test_vmedia_found_not_booted_from_vmedia(
self, mock_execute, mock_mount, mock_copy,
@@ -909,45 +909,61 @@ class TestCopyConfigFromVmedia(testtools.TestCase):
mock_check_vmedia.assert_not_called()
self.assertFalse(mock_booted_from_vmedia.called)
@mock.patch.object(os.path, 'ismount', autospec=True)
def test_no_files(
self, mock_execute, mock_mount, mock_copy,
self, mock_ismount, mock_execute, mock_mount, mock_copy,
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
mock_ismount.return_value = False
mock_booted_from_vmedia.return_value = True
temp_path = tempfile.mkdtemp()
self.addCleanup(lambda: shutil.rmtree(temp_path))
mock_execute.side_effect = processutils.ProcessExecutionError
mock_execute.side_effect = [('/mnt/config', ()),
((), ())]
mock_find_device.return_value = '/dev/something'
mock_mount.return_value.__enter__.return_value = temp_path
utils.copy_config_from_vmedia()
mock_mount.assert_called_once_with('/dev/something')
mock_execute.assert_called_once_with('findmnt', '-n', '-oTARGET',
'/dev/something')
mock_execute.assert_has_calls([
mock.call('findmnt', '-n', '-oTARGET', '/dev/something'),
mock.call('umount', '/mnt/config', run_as_root=True)])
self.assertEqual(2, mock_execute.call_count)
mock_copy.assert_not_called()
self.assertTrue(mock_booted_from_vmedia.called)
@mock.patch.object(os.path, 'ismount', autospec=True)
def test_mounted_no_files(
self, mock_execute, mock_mount, mock_copy,
self, mock_ismount, mock_execute, mock_mount, mock_copy,
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
mock_ismount.side_effect = [True, False]
mock_booted_from_vmedia.return_value = True
mock_execute.return_value = '/some/path', ''
mock_execute.side_effect = [('/some/path', ''),
((), ()),
((), ())]
mock_find_device.side_effect = ('/dev/something', '')
mock_find_device.return_value = '/dev/something'
utils.copy_config_from_vmedia()
mock_execute.assert_called_once_with(
'findmnt', '-n', '-oTARGET', '/dev/something')
mock_execute.assert_has_calls([
mock.call('findmnt', '-n', '-oTARGET', '/dev/something'),
mock.call('umount', '/some/path', run_as_root=True),
mock.call('umount', '/mnt/config', run_as_root=True)])
self.assertEqual(3, mock_execute.call_count)
mock_copy.assert_not_called()
mock_mount.assert_not_called()
mock_mount.assert_called_once_with('/dev/something')
self.assertTrue(mock_booted_from_vmedia.called)
@mock.patch.object(os.path, 'ismount', autospec=True)
@mock.patch.object(os, 'makedirs', autospec=True)
def test_copy(
self, mock_makedirs, mock_execute, mock_mount, mock_copy,
mock_find_device, mock_check_vmedia, mock_booted_from_vmedia):
self, mock_makedirs, mock_ismount, mock_execute, mock_mount,
mock_copy, mock_find_device, mock_check_vmedia,
mock_booted_from_vmedia):
mock_ismount.return_value = False
mock_booted_from_vmedia.return_value = True
mock_find_device.return_value = '/dev/something'
mock_execute.side_effect = processutils.ProcessExecutionError("")
mock_execute.side_effect = processutils.ProcessExecutionError('meow')
path = tempfile.mkdtemp()
self.addCleanup(lambda: shutil.rmtree(path))
@@ -982,32 +998,43 @@ class TestCopyConfigFromVmedia(testtools.TestCase):
mock.call(mock.ANY, '/etc/ironic-python-agent.d/ironic.conf'),
], any_order=True)
self.assertTrue(mock_booted_from_vmedia.called)
mock_execute.assert_called_once_with(
'findmnt', '-n', '-oTARGET', '/dev/something')
@mock.patch.object(os.path, 'ismount', autospec=True)
@mock.patch.object(os, 'makedirs', autospec=True)
def test_copy_mounted(
self, mock_makedirs, mock_execute, mock_mount,
self, mock_makedirs, mock_ismount, mock_execute, mock_mount,
mock_copy, mock_find_device, mock_check_vmedia,
mock_booted_from_vmedia):
mock_ismount.side_effect = [True, True, False]
mock_booted_from_vmedia.return_value = True
mock_find_device.return_value = '/dev/something'
mock_find_device.side_effect = ('/dev/something', '')
path = tempfile.mkdtemp()
self.addCleanup(lambda: shutil.rmtree(path))
# NOTE(dtantsur): makedirs is mocked
os.mkdir(os.path.join(path, 'etc'))
os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent'))
os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent.d'))
with open(os.path.join(path, 'not copied'), 'wt') as fp:
fp.write('not copied')
with open(os.path.join(path, 'etc', 'ironic-python-agent',
'ironic.crt'), 'wt') as fp:
fp.write('I am a cert')
with open(os.path.join(path, 'etc', 'ironic-python-agent.d',
'ironic.conf'), 'wt') as fp:
fp.write('I am a config')
def _fake_mount(dev):
self.assertEqual('/dev/something', dev)
# NOTE(dtantsur): makedirs is mocked
os.mkdir(os.path.join(path, 'etc'))
os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent'))
os.mkdir(os.path.join(path, 'etc', 'ironic-python-agent.d'))
with open(os.path.join(path, 'not copied'), 'wt') as fp:
fp.write('not copied')
with open(os.path.join(path, 'etc', 'ironic-python-agent',
'ironic.crt'), 'wt') as fp:
fp.write('I am a cert')
with open(os.path.join(path, 'etc', 'ironic-python-agent.d',
'ironic.conf'), 'wt') as fp:
fp.write('I am a config')
return mock.MagicMock(**{'__enter__.return_value': path})
mock_execute.return_value = path, ''
mock_find_device.return_value = '/dev/something'
mock_mount.side_effect = _fake_mount
mock_execute.side_effect = [(path, ''),
((), ()),
((), ()),
((), ())]
utils.copy_config_from_vmedia()
@@ -1015,13 +1042,16 @@ class TestCopyConfigFromVmedia(testtools.TestCase):
mock.call('/etc/ironic-python-agent', exist_ok=True),
mock.call('/etc/ironic-python-agent.d', exist_ok=True),
], any_order=True)
mock_execute.assert_called_once_with(
'findmnt', '-n', '-oTARGET', '/dev/something')
mock_execute.assert_has_calls([
mock.call('findmnt', '-n', '-oTARGET', '/dev/something'),
mock.call('umount', mock.ANY, run_as_root=True),
mock.call('umount', '/mnt/config', run_as_root=True),
mock.call('umount', '/mnt/config', run_as_root=True)])
mock_copy.assert_has_calls([
mock.call(mock.ANY, '/etc/ironic-python-agent/ironic.crt'),
mock.call(mock.ANY, '/etc/ironic-python-agent.d/ironic.conf'),
], any_order=True)
mock_mount.assert_not_called()
mock_mount.assert_called_once_with('/dev/something')
self.assertTrue(mock_booted_from_vmedia.called)
@@ -1122,6 +1152,73 @@ class TestCheckVirtualMedia(ironic_agent_base.IronicAgentTest):
'/dev/sdh')
@mock.patch.object(shutil, 'copy', autospec=True)
@mock.patch.object(os, 'makedirs', autospec=True)
@mock.patch.object(utils, '_determine_networking_path', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
class TestNetworkConfigRefresh(ironic_agent_base.IronicAgentTest):
def test_tinycore(self, mock_exec, mock_path, mock_makedirs,
mock_copy):
mock_path.return_value = 'tinycore'
utils.trigger_glean_network_refresh()
self.assertEqual(1, mock_path.call_count)
mock_exec.assert_has_calls([
mock.call('glean', '--distro', 'tinycore', run_as_root=True),
mock.call('/bin/sh', '/opt/network.sh', run_as_root=True)])
mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/',
exist_ok=True)
def test_execution_error_is_handled(self, mock_exec, mock_path,
mock_makedirs, mock_copy):
mock_path.return_value = 'tinycore'
mock_exec.side_effect = OSError()
utils.trigger_glean_network_refresh()
self.assertEqual(1, mock_path.call_count)
mock_exec.assert_called_once_with(
'glean', '--distro', 'tinycore', run_as_root=True)
mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/',
exist_ok=True)
mock_copy.assert_called_once_with(
'/etc/ironic-python-agent.d/network_data.json',
'/mnt/config/openstack/latest/network_data.json')
def test_networkd(self, mock_exec, mock_path,
mock_makedirs, mock_copy):
mock_path.return_value = 'networkd'
utils.trigger_glean_network_refresh()
self.assertEqual(1, mock_path.call_count)
mock_exec.assert_has_calls([
mock.call('glean', '--distro', 'networkd', run_as_root=True),
mock.call('systemctl', 'restart', 'systemd-networkd.service',
run_as_root=True)])
mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/',
exist_ok=True)
mock_copy.assert_called_once_with(
'/etc/ironic-python-agent.d/network_data.json',
'/mnt/config/openstack/latest/network_data.json')
def test_networkmanager(self, mock_exec, mock_path,
mock_makedirs, mock_copy):
mock_path.return_value = 'networkmanager'
mock_exec.side_effect = [('', ''),
OSError(),
('', '')]
utils.trigger_glean_network_refresh()
self.assertEqual(1, mock_path.call_count)
mock_exec.assert_has_calls([
mock.call('glean', '--use-nm', run_as_root=True),
mock.call('systemctl', 'restart', 'NetworkManager',
run_as_root=True),
mock.call('systemctl', 'restart', 'NetworkManager.service',
run_as_root=True)])
mock_makedirs.assert_called_once_with('/mnt/config/openstack/latest/',
exist_ok=True)
mock_copy.assert_called_once_with(
'/etc/ironic-python-agent.d/network_data.json',
'/mnt/config/openstack/latest/network_data.json')
class TestCheckEarlyLogging(ironic_agent_base.IronicAgentTest):
@mock.patch.object(utils, 'LOG', autospec=True)

View File

@@ -21,6 +21,7 @@ import glob
import io
import json
import os
import platform
import re
import shutil
import subprocess
@@ -33,6 +34,7 @@ from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import units
import psutil
import requests
import tenacity
@@ -126,6 +128,7 @@ def _find_vmedia_device_by_labels(labels):
for device in ironic_utils.parse_device_tags(lsblk_output):
for label in labels:
if label.upper() == device['LABEL'].upper():
_early_log('Found vmedia candidate %s', device['KNAME'])
candidates.append(device['KNAME'])
for candidate in candidates:
@@ -210,13 +213,21 @@ def _copy_config_from(path):
msg = ("Unable to copy vmedia configuration %s to %s: %s"
% (src, dest, exc))
raise errors.VirtualMediaBootError(msg)
network_data_path = os.path.join(
path, 'openstack/latest/network_data.json')
if os.path.isfile(network_data_path):
dest_path = '/etc/ironic-python-agent%s' % ext
_early_log('Copying network_data.json to %s', dest_path)
os.makedirs(dest_path, exist_ok=True)
shutil.copy(network_data_path, os.path.join(dest_path,
'network_data.json'))
def _find_mount_point(device):
try:
path, _e = execute('findmnt', '-n', '-oTARGET', device)
except processutils.ProcessExecutionError:
return
except (processutils.ProcessExecutionError, OSError):
return None
else:
return path.strip()
@@ -233,6 +244,7 @@ def _check_vmedia_device(vmedia_device_file):
valid.
"""
try:
_early_log('Checking device %s vmedia status.', vmedia_device_file)
output, _e = execute('lsblk', '-n', '-s', '-P', '-b',
'-oKNAME,TRAN,TYPE,SIZE',
vmedia_device_file)
@@ -243,6 +255,8 @@ def _check_vmedia_device(vmedia_device_file):
try:
for device in ironic_utils.parse_device_tags(output):
if device['TYPE'] == 'part':
# This would exclude any existing configuration drive written
# by ironic previously.
_early_log('Excluding device %s from virtual media'
'consideration as it is a partition.',
device['KNAME'])
@@ -253,7 +267,7 @@ def _check_vmedia_device(vmedia_device_file):
# registered for the scsi transport and thus type used.
# This will most likely be a qemu driven testing VM,
# or an older machine where SCSI transport is directly
# used to convey in a virtual
# used to convey in a virtual media device.
return True
if device['TYPE'] == 'disk' and device['TRAN'] == 'usb':
# We know from experience on HPE machines, with ilo4/5, we see
@@ -306,6 +320,9 @@ def copy_config_from_vmedia():
"""Copies any configuration from a virtual media device.
Copies files under /etc/ironic-python-agent and /etc/ironic-python-agent.d.
:returns: True when we successfully copied content from the virtual media
device. Otherwise None.
"""
vmedia_device_file = _find_vmedia_device_by_labels(
['config-2', 'vmedia_boot_iso'])
@@ -319,10 +336,84 @@ def copy_config_from_vmedia():
# Determine the device
mounted = _find_mount_point(vmedia_device_file)
if mounted:
_copy_config_from(mounted)
# Unmount if already mounted by something like glean/cloud init.
execute("umount", mounted, run_as_root=True)
with ironic_utils.mounted(vmedia_device_file) as vmedia_mount_point:
_copy_config_from(vmedia_mount_point)
# Finally, check mount and ensure unmounted before proceeding.
# NOTE(TheJulia): This is a reserved path by convention for configuration
# drive handling. Since you *can* stack multiple devices, we need to make
# sure no other tooling left anything in an unclean state.
while os.path.ismount("/mnt/config"):
try:
execute("umount", "/mnt/config", run_as_root=True)
_early_log("Sucessfully unmounted /mnt/config as an orphaned "
"source of configuration.")
except (OSError, processutils.ProcessExecutionError):
_early_log("We failed to umount /mnt/config. This may be fatal "
"if virtual media is in use for configuration.")
return False
return True
def _determine_networking_path():
os_ver = platform.uname().version.lower()
if 'tinycore' in os_ver:
# Compact CI oriented IPA Image.
return 'tinycore'
if True in ['networkd' in x.name().lower()
for x in psutil.process_iter(['name'])]:
# Basically, this would be Ubuntu AFAIK.
return 'networkd'
else:
with ironic_utils.mounted(vmedia_device_file) as vmedia_mount_point:
_copy_config_from(vmedia_mount_point)
# This is fairly safe to assume at this point.
return 'networkmanager'
def trigger_glean_network_refresh():
"""Trigger procesing and refresh of network configuration."""
# NOTE(TheJulia): Ironic explicitly doesn't support cloud-init
# as the consumer of this information in ramdisks. This stance
# may change in the future, but as of the beginning of the
# Caracal development cycle, cloud-init is uninstalled
# by ironic-python-agent-builder.
glean_read_path = '/mnt/config/openstack/latest/'
# NOTE(TheJulia) Exist_okay because we might end up re-triggering down
# this path if the agent restarts due to a transient failure.
os.makedirs(glean_read_path, exist_ok=True)
backup_copy = '/etc/ironic-python-agent.d/network_data.json'
network_data = os.path.join(glean_read_path, 'network_data.json')
shutil.copy(backup_copy, network_data)
_early_log('Working to apply network configuration refresh.')
network_type = _determine_networking_path()
# TODO(TheJulia): Check if python-glean is even in the $PATH.
# Two uniform actions, run glean, then restart all networking.
try:
if network_type == 'networkmanager':
# network manager present
execute('glean', '--use-nm', run_as_root=True)
try:
execute('systemctl', 'restart', 'NetworkManager',
run_as_root=True)
except Exception:
# Inconsistent naming across distributions.
execute('systemctl', 'restart', 'NetworkManager.service',
run_as_root=True)
if network_type == 'networkd':
execute('glean', '--distro', 'networkd', run_as_root=True)
execute('systemctl', 'restart', 'systemd-networkd.service',
run_as_root=True)
if network_type == 'tinycore':
execute('glean', '--distro', 'tinycore', run_as_root=True)
# While a shell script, glean doesn't set this file to
# be executable once it writes it.
execute('/bin/sh', '/opt/network.sh', run_as_root=True)
except Exception as e:
_early_log('Unable to execute configuration refresh for '
'configuration drive data. Error: %s' % e)
def _get_cached_params():

View File

@@ -0,0 +1,19 @@
---
fixes:
- |
Fixes an issue with the agent where the presence of a configuration drive
from a prior node deployment could prevent networking configuration from
being determined and loaded in DHCP-less use cases.
The agent now attempts to identify the appropriate configuration drive
networking data source utilizing the existing device identification
process, and then triggers glean configuration to be re-generated,
after which networking is restarted. For more information, please
view `bug 2032377 <https://bugs.launchpad.net/ironic/+bug/2032377>`_.
security:
- |
The ironic agent is now able to re-assert networking configuration
in DHCP-less modes should a prior node's ``config drive`` be
discovered as readable. Normally this issue is not a security
issue as long as ``ironic-python-agent-builder`` is utilized.
Ironic's agent automatically exlcudes possible configuration
drive copies from previously deployed baremetal nodes.