A hardware manager call for a full sync before shutdown

This is largely required for the future lockdown command but can also be
used before the normal shutdown, especially in the sync command which is
currently used before an out-of-band shutdown command is issued.

In addition to a plain sync, the new command also tells the kernel to
drop its cached and issues a low-level sync command to each block
device.

Partial-Bug: #2077432
Change-Id: I3fc87b20bc5387a466b24ebc19b9982e4e368d20
This commit is contained in:
Dmitry Tantsur
2024-10-01 15:47:31 +02:00
parent b44250a1be
commit 5aa0c1a2bb
5 changed files with 132 additions and 42 deletions

View File

@@ -1020,8 +1020,8 @@ class StandbyExtension(base.BaseAgentExtension):
'but received "%s".') % command)
raise errors.InvalidCommandParamsError(msg)
try:
self.sync()
except errors.CommandExecutionError as e:
hardware.dispatch_to_all_managers('full_sync')
except Exception as e:
LOG.warning('Failed to sync file system buffers: % s', e)
try:
@@ -1062,13 +1062,7 @@ class StandbyExtension(base.BaseAgentExtension):
:raises: CommandExecutionError if flushing file system buffers fails.
"""
LOG.debug('Flushing file system buffers')
try:
utils.execute('sync')
except processutils.ProcessExecutionError as e:
error_msg = 'Flushing file system buffers failed. Error: %s' % e
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)
hardware.dispatch_to_all_managers('full_sync')
@base.sync_command('get_partition_uuids')
def get_partition_uuids(self):

View File

@@ -1308,6 +1308,15 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
"""
raise errors.IncompatibleHardwareMethodError()
def full_sync(self):
"""Synchronize all caches to the disk.
This method will be called on *all* managers before the ramdisk
is powered off externally. It is expected to try flush all caches
to the disk to avoid data loss.
"""
raise errors.IncompatibleHardwareMethodError()
class GenericHardwareManager(HardwareManager):
HARDWARE_MANAGER_NAME = 'generic_hardware_manager'
@@ -3341,6 +3350,32 @@ class GenericHardwareManager(HardwareManager):
_collect_udev(io_dict)
def full_sync(self):
LOG.debug('Flushing file system buffers')
try:
utils.execute('sync')
except processutils.ProcessExecutionError as e:
error_msg = f'Flushing file system buffers failed: {e}'
LOG.error(error_msg)
# If sync fails, the machine is probably in a bad state and we
# better not continue.
raise errors.CommandExecutionError(error_msg)
LOG.debug('Flushing device caches')
try:
# https://www.kernel.org/doc/Documentation/sysctl/vm.txt
with open('/proc/sys/vm/drop_caches', 'wb') as fp:
fp.write(b'3')
except OSError as e:
LOG.warning('Unable to tell the kernel to drop caches: %s', e)
for blkdev in dispatch_to_managers('list_block_devices'):
try:
utils.execute('blockdev', '--flushbufs', blkdev.name)
except (processutils.ProcessExecutionError, OSError) as e:
LOG.warning('Cannot flush buffers of device %s: %s',
blkdev.name, e)
def _collect_udev(io_dict):
"""Collect device properties from udev."""

View File

@@ -1243,55 +1243,69 @@ class TestStandbyExtension(base.IronicAgentTest):
self.agent_extension._run_shutdown_command, 'boot')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_fails(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_fails(self, dispatch_mock, execute_mock):
execute_mock.side_effect = processutils.ProcessExecutionError
self.assertRaises(errors.SystemRebootError,
self.agent_extension._run_shutdown_command, 'reboot')
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_valid(self, dispatch_mock, execute_mock):
execute_mock.return_value = ('', '')
self.agent_extension._run_shutdown_command('poweroff')
calls = [mock.call('sync'),
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('poweroff', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid_poweroff_sysrq(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_valid_poweroff_sysrq(self, dispatch_mock,
execute_mock):
execute_mock.side_effect = [
('', ''), ('', ''),
('', ''),
processutils.ProcessExecutionError(''),
('', '')]
('', ''),
]
self.agent_extension._run_shutdown_command('poweroff')
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('sync'),
mock.call('poweroff', use_standard_locale=True),
mock.call("echo o > /proc/sysrq-trigger", shell=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid_reboot_sysrq(self, execute_mock):
execute_mock.side_effect = [('', ''), ('', ''), ('',
'Running in chroot, ignoring request.'),
('', '')]
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_valid_reboot_sysrq(self, dispatch_mock,
execute_mock):
execute_mock.side_effect = [
('', ''),
('', 'Running in chroot, ignoring request.'),
('', ''),
]
self.agent_extension._run_shutdown_command('reboot')
calls = [mock.call('sync'),
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('reboot', use_standard_locale=True),
mock.call("echo b > /proc/sysrq-trigger", shell=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_image(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_image(self, dispatch_mock, execute_mock):
execute_mock.return_value = ('', '')
success_result = self.agent_extension.run_image()
success_result.join()
calls = [mock.call('sync'),
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('reboot', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
self.assertEqual('SUCCEEDED', success_result.command_status)
execute_mock.reset_mock()
@@ -1301,35 +1315,37 @@ class TestStandbyExtension(base.IronicAgentTest):
failed_result = self.agent_extension.run_image()
failed_result.join()
execute_mock.assert_any_call('sync')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_power_off(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_power_off(self, dispatch_mock, execute_mock):
execute_mock.return_value = ('', '')
success_result = self.agent_extension.power_off()
success_result.join()
calls = [mock.call('sync'),
mock.call('poweroff', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
execute_mock.assert_has_calls([
mock.call('hwclock', '-v', '--systohc'),
mock.call('poweroff', use_standard_locale=True),
])
dispatch_mock.assert_called_once_with('full_sync')
self.assertEqual('SUCCEEDED', success_result.command_status)
execute_mock.reset_mock()
execute_mock.return_value = ('', '')
execute_mock.side_effect = processutils.ProcessExecutionError
failed_result = self.agent_extension.power_off()
failed_result.join()
execute_mock.assert_any_call('sync')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.determine_time_method',
autospec=True)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_power_off_with_ntp_server(self, execute_mock, mock_timemethod):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_power_off_with_ntp_server(self, dispatch_mock, execute_mock,
mock_timemethod):
self.config(fail_if_clock_not_set=False)
self.config(ntp_server='192.168.1.1')
execute_mock.return_value = ('', '')
@@ -1340,7 +1356,6 @@ class TestStandbyExtension(base.IronicAgentTest):
calls = [mock.call('ntpdate', '192.168.1.1'),
mock.call('hwclock', '-v', '--systohc'),
mock.call('sync'),
mock.call('poweroff', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
self.assertEqual('SUCCEEDED', success_result.command_status)
@@ -1356,19 +1371,12 @@ class TestStandbyExtension(base.IronicAgentTest):
execute_mock.assert_any_call('ntpdate', '192.168.1.1')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_sync(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_sync(self, dispatch_mock):
result = self.agent_extension.sync()
execute_mock.assert_called_once_with('sync')
dispatch_mock.assert_called_once_with('full_sync')
self.assertEqual('SUCCEEDED', result.command_status)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_sync_error(self, execute_mock):
execute_mock.side_effect = processutils.ProcessExecutionError
self.assertRaises(
errors.CommandExecutionError, self.agent_extension.sync)
execute_mock.assert_called_once_with('sync')
@mock.patch('ironic_python_agent.extensions.standby._write_image',
autospec=True)
@mock.patch('ironic_python_agent.extensions.standby._download_image',

View File

@@ -6546,3 +6546,50 @@ class TestListNetworkInterfaces(base.IronicAgentTest):
self.assertEqual('eth0.101', interfaces[3].name)
self.assertEqual('eth1.102', interfaces[4].name)
self.assertEqual('eth1.103', interfaces[5].name)
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
@mock.patch.object(il_utils, 'execute', autospec=True)
class TestFullSync(base.IronicAgentTest):
def setUp(self):
super().setUp()
self.hardware = hardware.GenericHardwareManager()
def test_sync_fails(self, mock_execute, mock_dispatch):
mock_execute.side_effect = processutils.ProcessExecutionError
self.assertRaises(errors.CommandExecutionError,
self.hardware.full_sync)
def test_full_sync(self, mock_execute, mock_dispatch):
mock_dispatch.return_value = [
hardware.BlockDevice('/dev/sda', '', 42, False),
hardware.BlockDevice('/dev/nvme0n1', '', 42, True),
]
with mock.patch.object(hardware, 'open', mock.mock_open()) as mock_opn:
self.hardware.full_sync()
mock_opn.return_value.write.assert_called_once_with(b'3')
mock_execute.assert_has_calls([
mock.call('sync'),
mock.call('blockdev', '--flushbufs', '/dev/sda'),
mock.call('blockdev', '--flushbufs', '/dev/nvme0n1'),
])
def test_optional_calls_fail(self, mock_execute, mock_dispatch):
mock_dispatch.return_value = [
hardware.BlockDevice('/dev/sda', '', 42, False),
hardware.BlockDevice('/dev/nvme0n1', '', 42, True),
]
mock_execute.side_effect = [
('', ''),
processutils.ProcessExecutionError,
processutils.ProcessExecutionError,
]
with mock.patch.object(hardware, 'open', mock.mock_open()) as mock_opn:
mock_opn.return_value.write.side_effect = OSError
self.hardware.full_sync()
mock_opn.return_value.write.assert_called_once_with(b'3')
mock_execute.assert_has_calls([
mock.call('sync'),
mock.call('blockdev', '--flushbufs', '/dev/sda'),
mock.call('blockdev', '--flushbufs', '/dev/nvme0n1'),
])

View File

@@ -0,0 +1,6 @@
---
features:
- |
When synchronizing the disk caches at the end of a deployment, the agent
now also instructs the kernel to drop the virtual memory caches and
tells each block devices to sync its internal cache.