diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index 81fca31fa..ff16dadef 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -371,6 +371,8 @@ class IronicPythonAgent(base.ExecuteCommandMixin): # Get the UUID so we can heartbeat to Ironic. Raises LookupNodeError # if there is an issue (uncaught, restart agent) self.started_at = _time() + # Attempt to sync the software clock + utils.sync_clock(ignore_errors=True) # Cached hw managers at runtime, not load time. See bug 1490008. hardware.get_managers() diff --git a/ironic_python_agent/config.py b/ironic_python_agent/config.py index 1b4bd4d1d..a3a43389f 100644 --- a/ironic_python_agent/config.py +++ b/ironic_python_agent/config.py @@ -219,6 +219,15 @@ cli_opts = [ 'the bare metal introspection service when the ' '``ironic-collect-introspection-data`` program is ' 'executing in daemon mode.'), + cfg.StrOpt('ntp_server', + default=APARAMS.get('ipa-ntp-server', None), + help='Address of a single NTP server against which the ' + 'agent should sync the hardware clock prior to ' + 'rebooting to an instance.'), + cfg.BoolOpt('fail_if_clock_not_set', + default=False, + help='If operations should fail if the clock time sync ' + 'fails to complete successfully.'), ] CONF.register_cli_opts(cli_opts) diff --git a/ironic_python_agent/errors.py b/ironic_python_agent/errors.py index 244b7bcb7..d263af5f4 100644 --- a/ironic_python_agent/errors.py +++ b/ironic_python_agent/errors.py @@ -331,3 +331,9 @@ class DeviceNotFound(NotFound): # RESTError. class InspectionError(Exception): """Failure during inspection.""" + + +class ClockSyncError(RESTError): + """Error raised when attempting to sync the system clock.""" + + message = 'Error syncing system clock' diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py index c2f8b5c0c..03fe35b26 100644 --- a/ironic_python_agent/extensions/standby.py +++ b/ironic_python_agent/extensions/standby.py @@ -627,6 +627,11 @@ class StandbyExtension(base.BaseAgentExtension): :raises: SystemRebootError if the command errors out with an unsuccessful exit code. """ + # TODO(TheJulia): When we have deploy/clean steps, we should remove + # this upon shutdown. The clock sync deploy step can run before + # completing other operations. + self._sync_clock(ignore_errors=True) + if command not in ('reboot', 'poweroff'): msg = (('Expected the command "poweroff" or "reboot" ' 'but received "%s".') % command) @@ -673,3 +678,28 @@ class StandbyExtension(base.BaseAgentExtension): error_msg = 'Flushing file system buffers failed. Error: %s' % e LOG.error(error_msg) raise errors.CommandExecutionError(error_msg) + + # TODO(TheJulia): Once we have deploy/clean steps, this should + # become a step, which we ideally have enabled by default. + def _sync_clock(self, ignore_errors=False): + """Sync the clock to a configured NTP server. + + :param ignore_errors: Boolean option to indicate if the + errors should be fatal. This option + does not override the fail_if_clock_not_set + configuration option. + :raises: ClockSyncError if a failure is encountered and + errors are not ignored. + """ + try: + utils.sync_clock(ignore_errors=ignore_errors) + # Sync the system hardware clock from the software clock, + # as they are independent and the HW clock can still drift + # with long running ramdisks. + utils.execute('hwclock', '-v', '--systohc') + except (processutils.ProcessExecutionError, + errors.CommandExecutionError) as e: + msg = 'Failed to sync hardware clock: %s' % e + LOG.error(msg) + if CONF.fail_if_clock_not_set or not ignore_errors: + raise errors.ClockSyncError(msg) diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py index 6b888f43a..0e65d9091 100644 --- a/ironic_python_agent/tests/unit/extensions/test_standby.py +++ b/ironic_python_agent/tests/unit/extensions/test_standby.py @@ -960,12 +960,13 @@ class TestStandbyExtension(base.IronicAgentTest): @mock.patch('ironic_python_agent.utils.execute', autospec=True) def test_run_shutdown_command_valid_poweroff_sysrq(self, execute_mock): - execute_mock.side_effect = [('', ''), ('', + execute_mock.side_effect = [('', ''), ('', ''), ('', 'Running in chroot, ignoring request.'), ('', '')] self.agent_extension._run_shutdown_command('poweroff') - calls = [mock.call('sync'), + calls = [mock.call('hwclock', '-v', '--systohc'), + mock.call('sync'), mock.call('poweroff', use_standard_locale=True, check_exit_code=[0]), mock.call("echo o > /proc/sysrq-trigger", shell=True)] @@ -973,7 +974,7 @@ class TestStandbyExtension(base.IronicAgentTest): @mock.patch('ironic_python_agent.utils.execute', autospec=True) def test_run_shutdown_command_valid_reboot_sysrq(self, execute_mock): - execute_mock.side_effect = [('', ''), ('', + execute_mock.side_effect = [('', ''), ('', ''), ('', 'Running in chroot, ignoring request.'), ('', '')] @@ -1029,6 +1030,37 @@ class TestStandbyExtension(base.IronicAgentTest): execute_mock.assert_any_call('sync') self.assertEqual('FAILED', failed_result.command_status) + @mock.patch('ironic_python_agent.utils.determine_time_method', + autospec=True) + @mock.patch('ironic_python_agent.utils.execute', autospec=True) + def test_power_off_with_ntp_server(self, execute_mock, mock_timemethod): + self.config(fail_if_clock_not_set=False) + self.config(ntp_server='192.168.1.1') + execute_mock.return_value = ('', '') + mock_timemethod.return_value = 'ntpdate' + + success_result = self.agent_extension.power_off() + success_result.join() + + calls = [mock.call('ntpdate', '192.168.1.1'), + mock.call('hwclock', '-v', '--systohc'), + mock.call('sync'), + mock.call('poweroff', use_standard_locale=True, + check_exit_code=[0])] + execute_mock.assert_has_calls(calls) + self.assertEqual('SUCCEEDED', success_result.command_status) + + self.config(fail_if_clock_not_set=True) + execute_mock.reset_mock() + execute_mock.return_value = ('', '') + execute_mock.side_effect = processutils.ProcessExecutionError + + failed_result = self.agent_extension.power_off() + failed_result.join() + + execute_mock.assert_any_call('ntpdate', '192.168.1.1') + self.assertEqual('FAILED', failed_result.command_status) + @mock.patch('ironic_python_agent.utils.execute', autospec=True) def test_sync(self, execute_mock): result = self.agent_extension.sync() @@ -1169,6 +1201,33 @@ class TestStandbyExtension(base.IronicAgentTest): '/dev/fake') self.assertEqual(expected_msg, result_msg) + @mock.patch('ironic_python_agent.utils.determine_time_method', + autospec=True) + @mock.patch('ironic_python_agent.utils.execute', autospec=True) + def test__sync_clock(self, execute_mock, mock_timemethod): + self.config(ntp_server='192.168.1.1') + self.config(fail_if_clock_not_set=True) + execute_mock.return_value = ('', '') + mock_timemethod.return_value = 'chronyd' + + self.agent_extension._sync_clock() + + calls = [mock.call('chronyd', check_exit_code=[0, 1]), + mock.call('chronyc', 'add', 'server', '192.168.1.1'), + mock.call('chronyc', 'makestep'), + mock.call('hwclock', '-v', '--systohc')] + execute_mock.assert_has_calls(calls) + + execute_mock.reset_mock() + execute_mock.side_effect = [ + ('', ''), ('', ''), ('', ''), + processutils.ProcessExecutionError('boop') + ] + + self.assertRaises(errors.ClockSyncError, + self.agent_extension._sync_clock) + execute_mock.assert_any_call('hwclock', '-v', '--systohc') + @mock.patch('hashlib.md5', autospec=True) @mock.patch('requests.get', autospec=True) diff --git a/ironic_python_agent/tests/unit/test_utils.py b/ironic_python_agent/tests/unit/test_utils.py index 8eb505efc..eb6eacd3f 100644 --- a/ironic_python_agent/tests/unit/test_utils.py +++ b/ironic_python_agent/tests/unit/test_utils.py @@ -674,3 +674,108 @@ class TestRemoveKeys(testtools.TestCase): 'key': 'value', 'other': [{'configdrive': '<...>'}, 'string', 0]} self.assertEqual(expected, utils.remove_large_keys(value)) + + +@mock.patch.object(utils, 'execute', autospec=True) +class TestClockSyncUtils(ironic_agent_base.IronicAgentTest): + + def test_determine_time_method_none(self, mock_execute): + mock_execute.side_effect = OSError + self.assertIsNone(utils.determine_time_method()) + + def test_determine_time_method_ntpdate(self, mock_execute): + mock_execute.side_effect = [ + OSError, # No chronyd found + ('', ''), # Returns nothing on ntpdate call + ] + calls = [mock.call('chronyd', '-h'), + mock.call('ntpdate', '-v', check_exit_code=[0, 1])] + return_value = utils.determine_time_method() + self.assertEqual('ntpdate', return_value) + mock_execute.assert_has_calls(calls) + + def test_determine_time_method_chronyd(self, mock_execute): + mock_execute.side_effect = [ + ('', ''), # Returns nothing on ntpdate call + ] + calls = [mock.call('chronyd', '-h')] + return_value = utils.determine_time_method() + self.assertEqual('chronyd', return_value) + mock_execute.assert_has_calls(calls) + + @mock.patch.object(utils, 'determine_time_method', autospec=True) + def test_sync_clock_ntp(self, mock_time_method, mock_execute): + self.config(ntp_server='192.168.1.1') + mock_time_method.return_value = 'ntpdate' + utils.sync_clock() + mock_execute.assert_has_calls([mock.call('ntpdate', '192.168.1.1')]) + + @mock.patch.object(utils, 'determine_time_method', autospec=True) + def test_sync_clock_ntp_raises_exception(self, mock_time_method, + mock_execute): + self.config(ntp_server='192.168.1.1') + self.config(fail_if_clock_not_set=True) + mock_time_method.return_value = 'ntpdate' + mock_execute.side_effect = processutils.ProcessExecutionError() + self.assertRaises(errors.CommandExecutionError, utils.sync_clock) + + @mock.patch.object(utils, 'determine_time_method', autospec=True) + def test_sync_clock_chrony(self, mock_time_method, mock_execute): + self.config(ntp_server='192.168.1.1') + mock_time_method.return_value = 'chronyd' + utils.sync_clock() + mock_execute.assert_has_calls([ + mock.call('chronyd', check_exit_code=[0, 1]), + mock.call('chronyc', 'add', 'server', '192.168.1.1'), + mock.call('chronyc', 'makestep'), + ]) + + @mock.patch.object(utils, 'determine_time_method', autospec=True) + def test_sync_clock_chrony_already_present(self, mock_time_method, + mock_execute): + self.config(ntp_server='192.168.1.1') + mock_time_method.return_value = 'chronyd' + mock_execute.side_effect = [ + ('', ''), + processutils.ProcessExecutionError( + stderr='Source already present'), + ('', ''), + ] + utils.sync_clock() + mock_execute.assert_has_calls([ + mock.call('chronyd', check_exit_code=[0, 1]), + mock.call('chronyc', 'add', 'server', '192.168.1.1'), + mock.call('chronyc', 'makestep'), + ]) + + @mock.patch.object(utils, 'determine_time_method', autospec=True) + def test_sync_clock_chrony_failure(self, mock_time_method, mock_execute): + self.config(ntp_server='192.168.1.1') + self.config(fail_if_clock_not_set=True) + mock_time_method.return_value = 'chronyd' + mock_execute.side_effect = [ + ('', ''), + processutils.ProcessExecutionError(stderr='time verboten'), + ] + self.assertRaisesRegex(errors.CommandExecutionError, + 'Error occured adding ntp', + utils.sync_clock) + mock_execute.assert_has_calls([ + mock.call('chronyd', check_exit_code=[0, 1]), + mock.call('chronyc', 'add', 'server', '192.168.1.1'), + ]) + + @mock.patch.object(utils, 'determine_time_method', autospec=True) + def test_sync_clock_none(self, mock_time_method, mock_execute): + self.config(ntp_server='192.168.1.1') + mock_time_method.return_value = None + utils.sync_clock(ignore_errors=True) + self.assertEqual(0, mock_execute.call_count) + + @mock.patch.object(utils, 'determine_time_method', autospec=True) + def test_sync_clock_ntp_server_is_none(self, mock_time_method, + mock_execute): + self.config(ntp_server=None) + mock_time_method.return_value = None + utils.sync_clock() + self.assertEqual(0, mock_execute.call_count) diff --git a/ironic_python_agent/utils.py b/ironic_python_agent/utils.py index 5f1c0dc2f..ccbc42f77 100644 --- a/ironic_python_agent/utils.py +++ b/ironic_python_agent/utils.py @@ -27,6 +27,7 @@ import time from ironic_lib import utils as ironic_utils from oslo_concurrency import processutils +from oslo_config import cfg from oslo_log import log as logging from oslo_serialization import base64 from oslo_utils import units @@ -35,6 +36,7 @@ from ironic_python_agent import errors LOG = logging.getLogger(__name__) +CONF = cfg.CONF # Agent parameters can be passed by kernel command-line arguments and/or # by virtual media. Virtual media parameters passed would be available @@ -486,3 +488,91 @@ def remove_large_keys(var): return var.__class__(map(remove_large_keys, var)) else: return var + + +def determine_time_method(): + """Helper method to determine what time utility is present. + + :returns: "ntpdate" if ntpdate has been found, "chrony" if chrony + was located, and None if neither are located. If both tools + are present, "chrony" will supercede "ntpdate". + """ + try: + execute('chronyd', '-h') + return 'chronyd' + except OSError: + LOG.debug('Command \'chronyd\' not found for time sync.') + try: + execute('ntpdate', '-v', check_exit_code=[0, 1]) + return 'ntpdate' + except OSError: + LOG.debug('Command \'ntpdate\' not found for time sync.') + return None + + +def sync_clock(ignore_errors=False): + """Syncs the software clock of the system. + + This method syncs the system software clock if a NTP server + was defined in the "[DEFAULT]ntp_server" configuration + parameter. This method does NOT attempt to sync the hardware + clock. + + It will try to use either ntpdate or chrony to sync the software + clock of the system. If neither is found, an exception is raised. + + :param ignore_errors: Boolean value default False that allows for + the method to be called and ultimately not + raise an exception. This may be useful for + opportunistically attempting to sync the + system software clock. + :raises: CommandExecutionError if an error is encountered while + attempting to sync the software clock. + """ + + if not CONF.ntp_server: + return + + method = determine_time_method() + + if method == 'ntpdate': + try: + execute('ntpdate', CONF.ntp_server) + LOG.debug('Set software clock using ntpdate') + except processutils.ProcessExecutionError as e: + msg = ('Failed to sync with ntp server: ' + '%s: %s' % (CONF.ntp_server, e)) + LOG.error(msg) + if CONF.fail_if_clock_not_set or not ignore_errors: + raise errors.CommandExecutionError(msg) + elif method == 'chronyd': + try: + # 0 should be if chronyd started + # 1 if already running + execute('chronyd', check_exit_code=[0, 1]) + # NOTE(TheJulia): Once started, chronyd forks and stays in the + # background as a server service, it will continue to keep the + # clock in sync. + try: + execute('chronyc', 'add', 'server', CONF.ntp_server) + except processutils.ProcessExecutionError as e: + if 'Source already present' not in str(e): + msg = 'Error occured adding ntp server: %s' % e + LOG.error(msg) + raise errors.CommandExecutionError(msg) + # Force the clock to sync now. + execute('chronyc', 'makestep') + LOG.debug('Set software clock using chrony') + except (processutils.ProcessExecutionError, + errors.CommandExecutionError) as e: + msg = ('Failed to sync time using chrony to ntp server: ' + '%s: %s' % (CONF.ntp_server, e)) + LOG.error(msg) + if CONF.fail_if_clock_not_set or not ignore_errors: + raise errors.CommandExecutionError(msg) + else: + msg = ('Unable to sync clock, available methods of ' + '\'ntpdate\' or \'chrony\' not found.') + LOG.error(msg) + if CONF.fail_if_clock_not_set or not ignore_errors: + raise errors.CommandExecutionError(msg) diff --git a/releasenotes/notes/set-clock-prior-to-poweroff-af6ec210aad8b45a.yaml b/releasenotes/notes/set-clock-prior-to-poweroff-af6ec210aad8b45a.yaml new file mode 100644 index 000000000..ae327d778 --- /dev/null +++ b/releasenotes/notes/set-clock-prior-to-poweroff-af6ec210aad8b45a.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Adds a feature where IPA will utilize a ``[DEFAULT]ntp_server`` or + ``ipa-ntp-server`` kernel command line argument to cause the agent + to attempt to sync the clock to the NTP source. The agent also attempts + to sync the software clock to the NTP time source, and assert an update + to the hardware clock prior to powering the machine off.