Merge "A hardware manager call for a full sync before shutdown"

This commit is contained in:
Zuul 2024-11-07 15:07:12 +00:00 committed by Gerrit Code Review
commit 4f9f461ce9
5 changed files with 132 additions and 42 deletions

View File

@ -1017,8 +1017,8 @@ class StandbyExtension(base.BaseAgentExtension):
'but received "%s".') % command)
raise errors.InvalidCommandParamsError(msg)
try:
self.sync()
except errors.CommandExecutionError as e:
hardware.dispatch_to_all_managers('full_sync')
except Exception as e:
LOG.warning('Failed to sync file system buffers: % s', e)
try:
@ -1059,13 +1059,7 @@ class StandbyExtension(base.BaseAgentExtension):
:raises: CommandExecutionError if flushing file system buffers fails.
"""
LOG.debug('Flushing file system buffers')
try:
utils.execute('sync')
except processutils.ProcessExecutionError as e:
error_msg = 'Flushing file system buffers failed. Error: %s' % e
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)
hardware.dispatch_to_all_managers('full_sync')
@base.sync_command('get_partition_uuids')
def get_partition_uuids(self):

View File

@ -1314,6 +1314,15 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
"""
raise errors.IncompatibleHardwareMethodError()
def full_sync(self):
"""Synchronize all caches to the disk.
This method will be called on *all* managers before the ramdisk
is powered off externally. It is expected to try flush all caches
to the disk to avoid data loss.
"""
raise errors.IncompatibleHardwareMethodError()
class GenericHardwareManager(HardwareManager):
HARDWARE_MANAGER_NAME = 'generic_hardware_manager'
@ -3347,6 +3356,32 @@ class GenericHardwareManager(HardwareManager):
_collect_udev(io_dict)
def full_sync(self):
LOG.debug('Flushing file system buffers')
try:
utils.execute('sync')
except processutils.ProcessExecutionError as e:
error_msg = f'Flushing file system buffers failed: {e}'
LOG.error(error_msg)
# If sync fails, the machine is probably in a bad state and we
# better not continue.
raise errors.CommandExecutionError(error_msg)
LOG.debug('Flushing device caches')
try:
# https://www.kernel.org/doc/Documentation/sysctl/vm.txt
with open('/proc/sys/vm/drop_caches', 'wb') as fp:
fp.write(b'3')
except OSError as e:
LOG.warning('Unable to tell the kernel to drop caches: %s', e)
for blkdev in dispatch_to_managers('list_block_devices'):
try:
utils.execute('blockdev', '--flushbufs', blkdev.name)
except (processutils.ProcessExecutionError, OSError) as e:
LOG.warning('Cannot flush buffers of device %s: %s',
blkdev.name, e)
def _collect_udev(io_dict):
"""Collect device properties from udev."""

View File

@ -1250,55 +1250,69 @@ class TestStandbyExtension(base.IronicAgentTest):
self.agent_extension._run_shutdown_command, 'boot')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_fails(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_fails(self, dispatch_mock, execute_mock):
execute_mock.side_effect = processutils.ProcessExecutionError
self.assertRaises(errors.SystemRebootError,
self.agent_extension._run_shutdown_command, 'reboot')
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_valid(self, dispatch_mock, execute_mock):
execute_mock.return_value = ('', '')
self.agent_extension._run_shutdown_command('poweroff')
calls = [mock.call('sync'),
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('poweroff', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid_poweroff_sysrq(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_valid_poweroff_sysrq(self, dispatch_mock,
execute_mock):
execute_mock.side_effect = [
('', ''), ('', ''),
('', ''),
processutils.ProcessExecutionError(''),
('', '')]
('', ''),
]
self.agent_extension._run_shutdown_command('poweroff')
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('sync'),
mock.call('poweroff', use_standard_locale=True),
mock.call("echo o > /proc/sysrq-trigger", shell=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid_reboot_sysrq(self, execute_mock):
execute_mock.side_effect = [('', ''), ('', ''), ('',
'Running in chroot, ignoring request.'),
('', '')]
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_shutdown_command_valid_reboot_sysrq(self, dispatch_mock,
execute_mock):
execute_mock.side_effect = [
('', ''),
('', 'Running in chroot, ignoring request.'),
('', ''),
]
self.agent_extension._run_shutdown_command('reboot')
calls = [mock.call('sync'),
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('reboot', use_standard_locale=True),
mock.call("echo b > /proc/sysrq-trigger", shell=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_image(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_run_image(self, dispatch_mock, execute_mock):
execute_mock.return_value = ('', '')
success_result = self.agent_extension.run_image()
success_result.join()
calls = [mock.call('sync'),
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('reboot', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
dispatch_mock.assert_called_once_with('full_sync')
self.assertEqual('SUCCEEDED', success_result.command_status)
execute_mock.reset_mock()
@ -1308,35 +1322,37 @@ class TestStandbyExtension(base.IronicAgentTest):
failed_result = self.agent_extension.run_image()
failed_result.join()
execute_mock.assert_any_call('sync')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_power_off(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_power_off(self, dispatch_mock, execute_mock):
execute_mock.return_value = ('', '')
success_result = self.agent_extension.power_off()
success_result.join()
calls = [mock.call('sync'),
mock.call('poweroff', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
execute_mock.assert_has_calls([
mock.call('hwclock', '-v', '--systohc'),
mock.call('poweroff', use_standard_locale=True),
])
dispatch_mock.assert_called_once_with('full_sync')
self.assertEqual('SUCCEEDED', success_result.command_status)
execute_mock.reset_mock()
execute_mock.return_value = ('', '')
execute_mock.side_effect = processutils.ProcessExecutionError
failed_result = self.agent_extension.power_off()
failed_result.join()
execute_mock.assert_any_call('sync')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.determine_time_method',
autospec=True)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_power_off_with_ntp_server(self, execute_mock, mock_timemethod):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_power_off_with_ntp_server(self, dispatch_mock, execute_mock,
mock_timemethod):
self.config(fail_if_clock_not_set=False)
self.config(ntp_server='192.168.1.1')
execute_mock.return_value = ('', '')
@ -1347,7 +1363,6 @@ class TestStandbyExtension(base.IronicAgentTest):
calls = [mock.call('ntpdate', '192.168.1.1'),
mock.call('hwclock', '-v', '--systohc'),
mock.call('sync'),
mock.call('poweroff', use_standard_locale=True)]
execute_mock.assert_has_calls(calls)
self.assertEqual('SUCCEEDED', success_result.command_status)
@ -1363,19 +1378,12 @@ class TestStandbyExtension(base.IronicAgentTest):
execute_mock.assert_any_call('ntpdate', '192.168.1.1')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_sync(self, execute_mock):
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
def test_sync(self, dispatch_mock):
result = self.agent_extension.sync()
execute_mock.assert_called_once_with('sync')
dispatch_mock.assert_called_once_with('full_sync')
self.assertEqual('SUCCEEDED', result.command_status)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_sync_error(self, execute_mock):
execute_mock.side_effect = processutils.ProcessExecutionError
self.assertRaises(
errors.CommandExecutionError, self.agent_extension.sync)
execute_mock.assert_called_once_with('sync')
@mock.patch('ironic_python_agent.extensions.standby._write_image',
autospec=True)
@mock.patch('ironic_python_agent.extensions.standby._download_image',

View File

@ -6690,3 +6690,50 @@ class TestListNetworkInterfaces(base.IronicAgentTest):
self.assertEqual('eth0.101', interfaces[3].name)
self.assertEqual('eth1.102', interfaces[4].name)
self.assertEqual('eth1.103', interfaces[5].name)
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
@mock.patch.object(il_utils, 'execute', autospec=True)
class TestFullSync(base.IronicAgentTest):
def setUp(self):
super().setUp()
self.hardware = hardware.GenericHardwareManager()
def test_sync_fails(self, mock_execute, mock_dispatch):
mock_execute.side_effect = processutils.ProcessExecutionError
self.assertRaises(errors.CommandExecutionError,
self.hardware.full_sync)
def test_full_sync(self, mock_execute, mock_dispatch):
mock_dispatch.return_value = [
hardware.BlockDevice('/dev/sda', '', 42, False),
hardware.BlockDevice('/dev/nvme0n1', '', 42, True),
]
with mock.patch.object(hardware, 'open', mock.mock_open()) as mock_opn:
self.hardware.full_sync()
mock_opn.return_value.write.assert_called_once_with(b'3')
mock_execute.assert_has_calls([
mock.call('sync'),
mock.call('blockdev', '--flushbufs', '/dev/sda'),
mock.call('blockdev', '--flushbufs', '/dev/nvme0n1'),
])
def test_optional_calls_fail(self, mock_execute, mock_dispatch):
mock_dispatch.return_value = [
hardware.BlockDevice('/dev/sda', '', 42, False),
hardware.BlockDevice('/dev/nvme0n1', '', 42, True),
]
mock_execute.side_effect = [
('', ''),
processutils.ProcessExecutionError,
processutils.ProcessExecutionError,
]
with mock.patch.object(hardware, 'open', mock.mock_open()) as mock_opn:
mock_opn.return_value.write.side_effect = OSError
self.hardware.full_sync()
mock_opn.return_value.write.assert_called_once_with(b'3')
mock_execute.assert_has_calls([
mock.call('sync'),
mock.call('blockdev', '--flushbufs', '/dev/sda'),
mock.call('blockdev', '--flushbufs', '/dev/nvme0n1'),
])

View File

@ -0,0 +1,6 @@
---
features:
- |
When synchronizing the disk caches at the end of a deployment, the agent
now also instructs the kernel to drop the virtual memory caches and
tells each block devices to sync its internal cache.