diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py index d3bf04af2..e3766f7ec 100644 --- a/ironic_python_agent/extensions/standby.py +++ b/ironic_python_agent/extensions/standby.py @@ -1020,8 +1020,8 @@ class StandbyExtension(base.BaseAgentExtension): 'but received "%s".') % command) raise errors.InvalidCommandParamsError(msg) try: - self.sync() - except errors.CommandExecutionError as e: + hardware.dispatch_to_all_managers('full_sync') + except Exception as e: LOG.warning('Failed to sync file system buffers: % s', e) try: @@ -1062,13 +1062,7 @@ class StandbyExtension(base.BaseAgentExtension): :raises: CommandExecutionError if flushing file system buffers fails. """ - LOG.debug('Flushing file system buffers') - try: - utils.execute('sync') - except processutils.ProcessExecutionError as e: - error_msg = 'Flushing file system buffers failed. Error: %s' % e - LOG.error(error_msg) - raise errors.CommandExecutionError(error_msg) + hardware.dispatch_to_all_managers('full_sync') @base.sync_command('get_partition_uuids') def get_partition_uuids(self): diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 0b6926554..7f7741579 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -1308,6 +1308,15 @@ class HardwareManager(object, metaclass=abc.ABCMeta): """ raise errors.IncompatibleHardwareMethodError() + def full_sync(self): + """Synchronize all caches to the disk. + + This method will be called on *all* managers before the ramdisk + is powered off externally. It is expected to try flush all caches + to the disk to avoid data loss. + """ + raise errors.IncompatibleHardwareMethodError() + class GenericHardwareManager(HardwareManager): HARDWARE_MANAGER_NAME = 'generic_hardware_manager' @@ -3341,6 +3350,32 @@ class GenericHardwareManager(HardwareManager): _collect_udev(io_dict) + def full_sync(self): + LOG.debug('Flushing file system buffers') + try: + utils.execute('sync') + except processutils.ProcessExecutionError as e: + error_msg = f'Flushing file system buffers failed: {e}' + LOG.error(error_msg) + # If sync fails, the machine is probably in a bad state and we + # better not continue. + raise errors.CommandExecutionError(error_msg) + + LOG.debug('Flushing device caches') + try: + # https://www.kernel.org/doc/Documentation/sysctl/vm.txt + with open('/proc/sys/vm/drop_caches', 'wb') as fp: + fp.write(b'3') + except OSError as e: + LOG.warning('Unable to tell the kernel to drop caches: %s', e) + + for blkdev in dispatch_to_managers('list_block_devices'): + try: + utils.execute('blockdev', '--flushbufs', blkdev.name) + except (processutils.ProcessExecutionError, OSError) as e: + LOG.warning('Cannot flush buffers of device %s: %s', + blkdev.name, e) + def _collect_udev(io_dict): """Collect device properties from udev.""" diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py index c5d467fc1..bff9576d7 100644 --- a/ironic_python_agent/tests/unit/extensions/test_standby.py +++ b/ironic_python_agent/tests/unit/extensions/test_standby.py @@ -1243,55 +1243,69 @@ class TestStandbyExtension(base.IronicAgentTest): self.agent_extension._run_shutdown_command, 'boot') @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_run_shutdown_command_fails(self, execute_mock): + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_run_shutdown_command_fails(self, dispatch_mock, execute_mock): execute_mock.side_effect = processutils.ProcessExecutionError self.assertRaises(errors.SystemRebootError, self.agent_extension._run_shutdown_command, 'reboot') + dispatch_mock.assert_called_once_with('full_sync') @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_run_shutdown_command_valid(self, execute_mock): + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_run_shutdown_command_valid(self, dispatch_mock, execute_mock): execute_mock.return_value = ('', '') self.agent_extension._run_shutdown_command('poweroff') - calls = [mock.call('sync'), + calls = [mock.call('hwclock', '-v', '--systohc'), mock.call('poweroff', use_standard_locale=True)] execute_mock.assert_has_calls(calls) + dispatch_mock.assert_called_once_with('full_sync') @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_run_shutdown_command_valid_poweroff_sysrq(self, execute_mock): + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_run_shutdown_command_valid_poweroff_sysrq(self, dispatch_mock, + execute_mock): execute_mock.side_effect = [ - ('', ''), ('', ''), + ('', ''), processutils.ProcessExecutionError(''), - ('', '')] + ('', ''), + ] self.agent_extension._run_shutdown_command('poweroff') calls = [mock.call('hwclock', '-v', '--systohc'), - mock.call('sync'), mock.call('poweroff', use_standard_locale=True), mock.call("echo o > /proc/sysrq-trigger", shell=True)] execute_mock.assert_has_calls(calls) + dispatch_mock.assert_called_once_with('full_sync') @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_run_shutdown_command_valid_reboot_sysrq(self, execute_mock): - execute_mock.side_effect = [('', ''), ('', ''), ('', - 'Running in chroot, ignoring request.'), - ('', '')] + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_run_shutdown_command_valid_reboot_sysrq(self, dispatch_mock, + execute_mock): + execute_mock.side_effect = [ + ('', ''), + ('', 'Running in chroot, ignoring request.'), + ('', ''), + ] self.agent_extension._run_shutdown_command('reboot') - calls = [mock.call('sync'), + calls = [mock.call('hwclock', '-v', '--systohc'), mock.call('reboot', use_standard_locale=True), mock.call("echo b > /proc/sysrq-trigger", shell=True)] execute_mock.assert_has_calls(calls) + dispatch_mock.assert_called_once_with('full_sync') @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_run_image(self, execute_mock): + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_run_image(self, dispatch_mock, execute_mock): execute_mock.return_value = ('', '') success_result = self.agent_extension.run_image() success_result.join() - calls = [mock.call('sync'), + calls = [mock.call('hwclock', '-v', '--systohc'), mock.call('reboot', use_standard_locale=True)] execute_mock.assert_has_calls(calls) + dispatch_mock.assert_called_once_with('full_sync') self.assertEqual('SUCCEEDED', success_result.command_status) execute_mock.reset_mock() @@ -1301,35 +1315,37 @@ class TestStandbyExtension(base.IronicAgentTest): failed_result = self.agent_extension.run_image() failed_result.join() - execute_mock.assert_any_call('sync') self.assertEqual('FAILED', failed_result.command_status) @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_power_off(self, execute_mock): + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_power_off(self, dispatch_mock, execute_mock): execute_mock.return_value = ('', '') success_result = self.agent_extension.power_off() success_result.join() - calls = [mock.call('sync'), - mock.call('poweroff', use_standard_locale=True)] - execute_mock.assert_has_calls(calls) + execute_mock.assert_has_calls([ + mock.call('hwclock', '-v', '--systohc'), + mock.call('poweroff', use_standard_locale=True), + ]) + dispatch_mock.assert_called_once_with('full_sync') self.assertEqual('SUCCEEDED', success_result.command_status) execute_mock.reset_mock() - execute_mock.return_value = ('', '') execute_mock.side_effect = processutils.ProcessExecutionError failed_result = self.agent_extension.power_off() failed_result.join() - execute_mock.assert_any_call('sync') self.assertEqual('FAILED', failed_result.command_status) @mock.patch('ironic_python_agent.utils.determine_time_method', autospec=True) @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_power_off_with_ntp_server(self, execute_mock, mock_timemethod): + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_power_off_with_ntp_server(self, dispatch_mock, execute_mock, + mock_timemethod): self.config(fail_if_clock_not_set=False) self.config(ntp_server='192.168.1.1') execute_mock.return_value = ('', '') @@ -1340,7 +1356,6 @@ class TestStandbyExtension(base.IronicAgentTest): calls = [mock.call('ntpdate', '192.168.1.1'), mock.call('hwclock', '-v', '--systohc'), - mock.call('sync'), mock.call('poweroff', use_standard_locale=True)] execute_mock.assert_has_calls(calls) self.assertEqual('SUCCEEDED', success_result.command_status) @@ -1356,19 +1371,12 @@ class TestStandbyExtension(base.IronicAgentTest): execute_mock.assert_any_call('ntpdate', '192.168.1.1') self.assertEqual('FAILED', failed_result.command_status) - @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_sync(self, execute_mock): + @mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True) + def test_sync(self, dispatch_mock): result = self.agent_extension.sync() - execute_mock.assert_called_once_with('sync') + dispatch_mock.assert_called_once_with('full_sync') self.assertEqual('SUCCEEDED', result.command_status) - @mock.patch('ironic_python_agent.utils.execute', autospec=True) - def test_sync_error(self, execute_mock): - execute_mock.side_effect = processutils.ProcessExecutionError - self.assertRaises( - errors.CommandExecutionError, self.agent_extension.sync) - execute_mock.assert_called_once_with('sync') - @mock.patch('ironic_python_agent.extensions.standby._write_image', autospec=True) @mock.patch('ironic_python_agent.extensions.standby._download_image', diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index d161f50c8..4a7ce520c 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -6546,3 +6546,50 @@ class TestListNetworkInterfaces(base.IronicAgentTest): self.assertEqual('eth0.101', interfaces[3].name) self.assertEqual('eth1.102', interfaces[4].name) self.assertEqual('eth1.103', interfaces[5].name) + + +@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True) +@mock.patch.object(il_utils, 'execute', autospec=True) +class TestFullSync(base.IronicAgentTest): + def setUp(self): + super().setUp() + self.hardware = hardware.GenericHardwareManager() + + def test_sync_fails(self, mock_execute, mock_dispatch): + mock_execute.side_effect = processutils.ProcessExecutionError + self.assertRaises(errors.CommandExecutionError, + self.hardware.full_sync) + + def test_full_sync(self, mock_execute, mock_dispatch): + mock_dispatch.return_value = [ + hardware.BlockDevice('/dev/sda', '', 42, False), + hardware.BlockDevice('/dev/nvme0n1', '', 42, True), + ] + with mock.patch.object(hardware, 'open', mock.mock_open()) as mock_opn: + self.hardware.full_sync() + mock_opn.return_value.write.assert_called_once_with(b'3') + mock_execute.assert_has_calls([ + mock.call('sync'), + mock.call('blockdev', '--flushbufs', '/dev/sda'), + mock.call('blockdev', '--flushbufs', '/dev/nvme0n1'), + ]) + + def test_optional_calls_fail(self, mock_execute, mock_dispatch): + mock_dispatch.return_value = [ + hardware.BlockDevice('/dev/sda', '', 42, False), + hardware.BlockDevice('/dev/nvme0n1', '', 42, True), + ] + mock_execute.side_effect = [ + ('', ''), + processutils.ProcessExecutionError, + processutils.ProcessExecutionError, + ] + with mock.patch.object(hardware, 'open', mock.mock_open()) as mock_opn: + mock_opn.return_value.write.side_effect = OSError + self.hardware.full_sync() + mock_opn.return_value.write.assert_called_once_with(b'3') + mock_execute.assert_has_calls([ + mock.call('sync'), + mock.call('blockdev', '--flushbufs', '/dev/sda'), + mock.call('blockdev', '--flushbufs', '/dev/nvme0n1'), + ]) diff --git a/releasenotes/notes/full-sync-d2ec6b248a73f04a.yaml b/releasenotes/notes/full-sync-d2ec6b248a73f04a.yaml new file mode 100644 index 000000000..1baacd9f9 --- /dev/null +++ b/releasenotes/notes/full-sync-d2ec6b248a73f04a.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + When synchronizing the disk caches at the end of a deployment, the agent + now also instructs the kernel to drop the virtual memory caches and + tells each block devices to sync its internal cache.