From 6ac3f350c049d9dc62c941702c20e67eff2c20a1 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Sun, 28 Apr 2024 18:57:49 -0700 Subject: [PATCH] Unmount config drives If this seems like deja vu, that is because it is. We had this very same issue with the original CoreOS ramdisk. Since we don't control the whole OS of the ramdisk, it only made sense to teach the agent to umount the folder. The folder is referenced already, and the agent does have safeguards in place, but unfortunately this issue led to a rebuild breaking where cloud-init, glean, and the agent were all trying do the right thing as they thought, and there were just multiple /mnt/config folders present in the OS. These are separate issues we also need to try and remedy. What happens is when the device is locked via a mount, the partition table is never updated to the running OS as the mount creates a lock. So the agent ends up thinking, in the case of a rebuild, that everything including creating a configuration drive on that device has been successful, but when you reboot, there is no partition table entry for the new partition as the change was not successfully written. This state prevented the workload from rebooting properly. This change eliminates that possibility moving forward by attempting to ensure that the cloud configuration folder is no longer mounted. Change-Id: I4399dd0934361003cca9ff95a7e3e3ae9bba3dab --- ironic_python_agent/tests/unit/test_utils.py | 43 ++++++++++++++++--- ironic_python_agent/utils.py | 28 ++++++++++++ ...ig-drive-is-umounted-a3985bbb45e89051.yaml | 8 ++++ 3 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/always-make-sure-config-drive-is-umounted-a3985bbb45e89051.yaml diff --git a/ironic_python_agent/tests/unit/test_utils.py b/ironic_python_agent/tests/unit/test_utils.py index fc683d22e..ae1b14a1b 100644 --- a/ironic_python_agent/tests/unit/test_utils.py +++ b/ironic_python_agent/tests/unit/test_utils.py @@ -22,6 +22,7 @@ import shutil import subprocess import tarfile import tempfile +import time from unittest import mock from ironic_lib import utils as ironic_utils @@ -877,6 +878,7 @@ class TestClockSyncUtils(ironic_agent_base.IronicAgentTest): self.assertEqual(0, mock_execute.call_count) +@mock.patch.object(utils, '_unmount_any_config_drives', autospec=True) @mock.patch.object(utils, '_booted_from_vmedia', autospec=True) @mock.patch.object(utils, '_check_vmedia_device', autospec=True) @mock.patch.object(utils, '_find_vmedia_device_by_labels', autospec=True) @@ -887,7 +889,8 @@ class TestCopyConfigFromVmedia(testtools.TestCase): def test_vmedia_found_not_booted_from_vmedia( self, mock_execute, mock_mount, mock_copy, - mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_find_device, mock_check_vmedia, mock_booted_from_vmedia, + mock_unmount_config): mock_booted_from_vmedia.return_value = False mock_find_device.return_value = '/dev/fake' utils.copy_config_from_vmedia() @@ -896,10 +899,12 @@ class TestCopyConfigFromVmedia(testtools.TestCase): mock_copy.assert_not_called() mock_check_vmedia.assert_not_called() self.assertTrue(mock_booted_from_vmedia.called) + self.assertTrue(mock_unmount_config.called) def test_no_vmedia( self, mock_execute, mock_mount, mock_copy, - mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_find_device, mock_check_vmedia, mock_booted_from_vmedia, + mock_unmount_config): mock_booted_from_vmedia.return_value = True mock_find_device.return_value = None utils.copy_config_from_vmedia() @@ -908,10 +913,12 @@ class TestCopyConfigFromVmedia(testtools.TestCase): mock_copy.assert_not_called() mock_check_vmedia.assert_not_called() self.assertFalse(mock_booted_from_vmedia.called) + self.assertTrue(mock_unmount_config.called) def test_no_files( self, mock_execute, mock_mount, mock_copy, - mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_find_device, mock_check_vmedia, mock_booted_from_vmedia, + mock_unmount_config): mock_booted_from_vmedia.return_value = True temp_path = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(temp_path)) @@ -925,10 +932,12 @@ class TestCopyConfigFromVmedia(testtools.TestCase): '/dev/something') mock_copy.assert_not_called() self.assertTrue(mock_booted_from_vmedia.called) + self.assertTrue(mock_unmount_config.called) def test_mounted_no_files( self, mock_execute, mock_mount, mock_copy, - mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_find_device, mock_check_vmedia, mock_booted_from_vmedia, + mock_unmount_config): mock_booted_from_vmedia.return_value = True mock_execute.return_value = '/some/path', '' @@ -939,11 +948,13 @@ class TestCopyConfigFromVmedia(testtools.TestCase): mock_copy.assert_not_called() mock_mount.assert_not_called() self.assertTrue(mock_booted_from_vmedia.called) + self.assertTrue(mock_unmount_config.called) @mock.patch.object(os, 'makedirs', autospec=True) def test_copy( self, mock_makedirs, mock_execute, mock_mount, mock_copy, - mock_find_device, mock_check_vmedia, mock_booted_from_vmedia): + mock_find_device, mock_check_vmedia, mock_booted_from_vmedia, + mock_unmount_config): mock_booted_from_vmedia.return_value = True mock_find_device.return_value = '/dev/something' @@ -982,12 +993,14 @@ class TestCopyConfigFromVmedia(testtools.TestCase): mock.call(mock.ANY, '/etc/ironic-python-agent.d/ironic.conf'), ], any_order=True) self.assertTrue(mock_booted_from_vmedia.called) + self.assertTrue(mock_unmount_config.called) @mock.patch.object(os, 'makedirs', autospec=True) def test_copy_mounted( self, mock_makedirs, mock_execute, mock_mount, mock_copy, mock_find_device, mock_check_vmedia, - mock_booted_from_vmedia): + mock_booted_from_vmedia, + mock_unmount_config): mock_booted_from_vmedia.return_value = True mock_find_device.return_value = '/dev/something' path = tempfile.mkdtemp() @@ -1023,6 +1036,7 @@ class TestCopyConfigFromVmedia(testtools.TestCase): ], any_order=True) mock_mount.assert_not_called() self.assertTrue(mock_booted_from_vmedia.called) + self.assertTrue(mock_unmount_config.called) @mock.patch.object(requests, 'get', autospec=True) @@ -1143,3 +1157,20 @@ class TestCheckEarlyLogging(ironic_agent_base.IronicAgentTest): expected_calls = [mock.call('Early logging: %s', 'line 1.'), mock.call('Early logging: %s', 'line 2 message')] info.assert_has_calls(expected_calls) + + +class TestUnmountOfConfig(ironic_agent_base.IronicAgentTest): + + @mock.patch.object(utils, '_early_log', autospec=True) + @mock.patch.object(os.path, 'ismount', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + def test__unmount_any_config_drives(self, mock_sleep, mock_exec, + mock_ismount, mock_log,): + mock_ismount.side_effect = iter([True, True, False]) + utils._unmount_any_config_drives() + self.assertEqual(2, mock_sleep.call_count) + self.assertEqual(2, mock_log.call_count) + mock_exec.assert_has_calls([ + mock.call('umount', '/mnt/config'), + mock.call('umount', '/mnt/config')]) diff --git a/ironic_python_agent/utils.py b/ironic_python_agent/utils.py index e2f6db787..ae8bcf646 100644 --- a/ironic_python_agent/utils.py +++ b/ironic_python_agent/utils.py @@ -311,18 +311,27 @@ def copy_config_from_vmedia(): ['config-2', 'vmedia_boot_iso']) if not vmedia_device_file: _early_log('No virtual media device detected') + _unmount_any_config_drives() return if not _booted_from_vmedia(): _early_log('Cannot use configuration from virtual media as the ' 'agent was not booted from virtual media.') + _unmount_any_config_drives() return # Determine the device mounted = _find_mount_point(vmedia_device_file) if mounted: + # In this case a utility like a configuration drive tool + # has *already* mounted the device we believe to be the + # configuration drive. _copy_config_from(mounted) else: with ironic_utils.mounted(vmedia_device_file) as vmedia_mount_point: + # In this case, we use a temporary folder and extract the contents + # for our configuration. _copy_config_from(vmedia_mount_point) + # As a last act, just make sure there is nothing else mounted. + _unmount_any_config_drives() def _get_cached_params(): @@ -944,3 +953,22 @@ def find_in_lshw(lshw, by_id=None, by_class=None, recursive=False, **fields): yield child if recursive: yield from find_in_lshw(child, by_id, recursive=True, **fields) + + +def _unmount_any_config_drives(): + """Umount anything mounted to /mnt/config + + As part of the configuration drive model, utilities like cloud-init + and glean leverage a folder at /mnt/config to convey configuration + to a booting OS. + + The possibility exists that one of the utilties mounted one or multiple + such folders, even if the configuration was not used, and this can + result in locked devices which can prevent rebuild operations from + completing successfully as as long as the folder is mounted, it is + a "locked" device to the operating system. + """ + while os.path.ismount('/mnt/config'): + _early_log('Issuing an umount command for /mnt/config...') + execute('umount', '/mnt/config') + time.sleep(1) diff --git a/releasenotes/notes/always-make-sure-config-drive-is-umounted-a3985bbb45e89051.yaml b/releasenotes/notes/always-make-sure-config-drive-is-umounted-a3985bbb45e89051.yaml new file mode 100644 index 000000000..e8e6dc173 --- /dev/null +++ b/releasenotes/notes/always-make-sure-config-drive-is-umounted-a3985bbb45e89051.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes an issue where configuration drive volumes which are mounted + by the operating system could remain mounted and cause a lock to be + held, which may conflict with actions such as ``rebuild``. + The agent now always makes sure the folder used by Glean and Cloud-init + is not mounted.