Add NTP time sync

Attempt to sync the clock and save it to the hardware clock.

This feature supports use of chrony or ntpdate.

Sem-Ver: feature
Change-Id: I178d7614429d582e742d9cba6d0fa3ae099775e3
Story: 1619054
Task: 11591
This commit is contained in:
Julia Kreger 2020-02-14 13:45:12 -08:00
parent 48ef7c9188
commit cee4bfc4bc
8 changed files with 312 additions and 3 deletions

View File

@ -369,6 +369,8 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
# Get the UUID so we can heartbeat to Ironic. Raises LookupNodeError
# if there is an issue (uncaught, restart agent)
self.started_at = _time()
# Attempt to sync the software clock
utils.sync_clock(ignore_errors=True)
# Cached hw managers at runtime, not load time. See bug 1490008.
hardware.get_managers()

View File

@ -219,6 +219,15 @@ cli_opts = [
'the bare metal introspection service when the '
'``ironic-collect-introspection-data`` program is '
'executing in daemon mode.'),
cfg.StrOpt('ntp_server',
default=APARAMS.get('ipa-ntp-server', None),
help='Address of a single NTP server against which the '
'agent should sync the hardware clock prior to '
'rebooting to an instance.'),
cfg.BoolOpt('fail_if_clock_not_set',
default=False,
help='If operations should fail if the clock time sync '
'fails to complete successfully.'),
]
CONF.register_cli_opts(cli_opts)

View File

@ -331,3 +331,9 @@ class DeviceNotFound(NotFound):
# RESTError.
class InspectionError(Exception):
"""Failure during inspection."""
class ClockSyncError(RESTError):
"""Error raised when attempting to sync the system clock."""
message = 'Error syncing system clock'

View File

@ -617,6 +617,11 @@ class StandbyExtension(base.BaseAgentExtension):
:raises: SystemRebootError if the command errors out with an
unsuccessful exit code.
"""
# TODO(TheJulia): When we have deploy/clean steps, we should remove
# this upon shutdown. The clock sync deploy step can run before
# completing other operations.
self._sync_clock(ignore_errors=True)
if command not in ('reboot', 'poweroff'):
msg = (('Expected the command "poweroff" or "reboot" '
'but received "%s".') % command)
@ -663,3 +668,28 @@ class StandbyExtension(base.BaseAgentExtension):
error_msg = 'Flushing file system buffers failed. Error: %s' % e
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)
# TODO(TheJulia): Once we have deploy/clean steps, this should
# become a step, which we ideally have enabled by default.
def _sync_clock(self, ignore_errors=False):
"""Sync the clock to a configured NTP server.
:param ignore_errors: Boolean option to indicate if the
errors should be fatal. This option
does not override the fail_if_clock_not_set
configuration option.
:raises: ClockSyncError if a failure is encountered and
errors are not ignored.
"""
try:
utils.sync_clock(ignore_errors=ignore_errors)
# Sync the system hardware clock from the software clock,
# as they are independent and the HW clock can still drift
# with long running ramdisks.
utils.execute('hwclock', '-v', '--systohc')
except (processutils.ProcessExecutionError,
errors.CommandExecutionError) as e:
msg = 'Failed to sync hardware clock: %s' % e
LOG.error(msg)
if CONF.fail_if_clock_not_set or not ignore_errors:
raise errors.ClockSyncError(msg)

View File

@ -953,12 +953,13 @@ class TestStandbyExtension(base.IronicAgentTest):
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid_poweroff_sysrq(self, execute_mock):
execute_mock.side_effect = [('', ''), ('',
execute_mock.side_effect = [('', ''), ('', ''), ('',
'Running in chroot, ignoring request.'),
('', '')]
self.agent_extension._run_shutdown_command('poweroff')
calls = [mock.call('sync'),
calls = [mock.call('hwclock', '-v', '--systohc'),
mock.call('sync'),
mock.call('poweroff', use_standard_locale=True,
check_exit_code=[0]),
mock.call("echo o > /proc/sysrq-trigger", shell=True)]
@ -966,7 +967,7 @@ class TestStandbyExtension(base.IronicAgentTest):
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_run_shutdown_command_valid_reboot_sysrq(self, execute_mock):
execute_mock.side_effect = [('', ''), ('',
execute_mock.side_effect = [('', ''), ('', ''), ('',
'Running in chroot, ignoring request.'),
('', '')]
@ -1022,6 +1023,37 @@ class TestStandbyExtension(base.IronicAgentTest):
execute_mock.assert_any_call('sync')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.determine_time_method',
autospec=True)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_power_off_with_ntp_server(self, execute_mock, mock_timemethod):
self.config(fail_if_clock_not_set=False)
self.config(ntp_server='192.168.1.1')
execute_mock.return_value = ('', '')
mock_timemethod.return_value = 'ntpdate'
success_result = self.agent_extension.power_off()
success_result.join()
calls = [mock.call('ntpdate', '192.168.1.1'),
mock.call('hwclock', '-v', '--systohc'),
mock.call('sync'),
mock.call('poweroff', use_standard_locale=True,
check_exit_code=[0])]
execute_mock.assert_has_calls(calls)
self.assertEqual('SUCCEEDED', success_result.command_status)
self.config(fail_if_clock_not_set=True)
execute_mock.reset_mock()
execute_mock.return_value = ('', '')
execute_mock.side_effect = processutils.ProcessExecutionError
failed_result = self.agent_extension.power_off()
failed_result.join()
execute_mock.assert_any_call('ntpdate', '192.168.1.1')
self.assertEqual('FAILED', failed_result.command_status)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test_sync(self, execute_mock):
result = self.agent_extension.sync()
@ -1162,6 +1194,33 @@ class TestStandbyExtension(base.IronicAgentTest):
'/dev/fake')
self.assertEqual(expected_msg, result_msg)
@mock.patch('ironic_python_agent.utils.determine_time_method',
autospec=True)
@mock.patch('ironic_python_agent.utils.execute', autospec=True)
def test__sync_clock(self, execute_mock, mock_timemethod):
self.config(ntp_server='192.168.1.1')
self.config(fail_if_clock_not_set=True)
execute_mock.return_value = ('', '')
mock_timemethod.return_value = 'chronyd'
self.agent_extension._sync_clock()
calls = [mock.call('chronyd', check_exit_code=[0, 1]),
mock.call('chronyc', 'add', 'server', '192.168.1.1'),
mock.call('chronyc', 'makestep'),
mock.call('hwclock', '-v', '--systohc')]
execute_mock.assert_has_calls(calls)
execute_mock.reset_mock()
execute_mock.side_effect = [
('', ''), ('', ''), ('', ''),
processutils.ProcessExecutionError('boop')
]
self.assertRaises(errors.ClockSyncError,
self.agent_extension._sync_clock)
execute_mock.assert_any_call('hwclock', '-v', '--systohc')
@mock.patch('hashlib.md5', autospec=True)
@mock.patch('requests.get', autospec=True)

View File

@ -674,3 +674,108 @@ class TestRemoveKeys(testtools.TestCase):
'key': 'value',
'other': [{'configdrive': '<...>'}, 'string', 0]}
self.assertEqual(expected, utils.remove_large_keys(value))
@mock.patch.object(utils, 'execute', autospec=True)
class TestClockSyncUtils(ironic_agent_base.IronicAgentTest):
def test_determine_time_method_none(self, mock_execute):
mock_execute.side_effect = OSError
self.assertIsNone(utils.determine_time_method())
def test_determine_time_method_ntpdate(self, mock_execute):
mock_execute.side_effect = [
OSError, # No chronyd found
('', ''), # Returns nothing on ntpdate call
]
calls = [mock.call('chronyd', '-h'),
mock.call('ntpdate', '-v', check_exit_code=[0, 1])]
return_value = utils.determine_time_method()
self.assertEqual('ntpdate', return_value)
mock_execute.assert_has_calls(calls)
def test_determine_time_method_chronyd(self, mock_execute):
mock_execute.side_effect = [
('', ''), # Returns nothing on ntpdate call
]
calls = [mock.call('chronyd', '-h')]
return_value = utils.determine_time_method()
self.assertEqual('chronyd', return_value)
mock_execute.assert_has_calls(calls)
@mock.patch.object(utils, 'determine_time_method', autospec=True)
def test_sync_clock_ntp(self, mock_time_method, mock_execute):
self.config(ntp_server='192.168.1.1')
mock_time_method.return_value = 'ntpdate'
utils.sync_clock()
mock_execute.assert_has_calls([mock.call('ntpdate', '192.168.1.1')])
@mock.patch.object(utils, 'determine_time_method', autospec=True)
def test_sync_clock_ntp_raises_exception(self, mock_time_method,
mock_execute):
self.config(ntp_server='192.168.1.1')
self.config(fail_if_clock_not_set=True)
mock_time_method.return_value = 'ntpdate'
mock_execute.side_effect = processutils.ProcessExecutionError()
self.assertRaises(errors.CommandExecutionError, utils.sync_clock)
@mock.patch.object(utils, 'determine_time_method', autospec=True)
def test_sync_clock_chrony(self, mock_time_method, mock_execute):
self.config(ntp_server='192.168.1.1')
mock_time_method.return_value = 'chronyd'
utils.sync_clock()
mock_execute.assert_has_calls([
mock.call('chronyd', check_exit_code=[0, 1]),
mock.call('chronyc', 'add', 'server', '192.168.1.1'),
mock.call('chronyc', 'makestep'),
])
@mock.patch.object(utils, 'determine_time_method', autospec=True)
def test_sync_clock_chrony_already_present(self, mock_time_method,
mock_execute):
self.config(ntp_server='192.168.1.1')
mock_time_method.return_value = 'chronyd'
mock_execute.side_effect = [
('', ''),
processutils.ProcessExecutionError(
stderr='Source already present'),
('', ''),
]
utils.sync_clock()
mock_execute.assert_has_calls([
mock.call('chronyd', check_exit_code=[0, 1]),
mock.call('chronyc', 'add', 'server', '192.168.1.1'),
mock.call('chronyc', 'makestep'),
])
@mock.patch.object(utils, 'determine_time_method', autospec=True)
def test_sync_clock_chrony_failure(self, mock_time_method, mock_execute):
self.config(ntp_server='192.168.1.1')
self.config(fail_if_clock_not_set=True)
mock_time_method.return_value = 'chronyd'
mock_execute.side_effect = [
('', ''),
processutils.ProcessExecutionError(stderr='time verboten'),
]
self.assertRaisesRegex(errors.CommandExecutionError,
'Error occured adding ntp',
utils.sync_clock)
mock_execute.assert_has_calls([
mock.call('chronyd', check_exit_code=[0, 1]),
mock.call('chronyc', 'add', 'server', '192.168.1.1'),
])
@mock.patch.object(utils, 'determine_time_method', autospec=True)
def test_sync_clock_none(self, mock_time_method, mock_execute):
self.config(ntp_server='192.168.1.1')
mock_time_method.return_value = None
utils.sync_clock(ignore_errors=True)
self.assertEqual(0, mock_execute.call_count)
@mock.patch.object(utils, 'determine_time_method', autospec=True)
def test_sync_clock_ntp_server_is_none(self, mock_time_method,
mock_execute):
self.config(ntp_server=None)
mock_time_method.return_value = None
utils.sync_clock()
self.assertEqual(0, mock_execute.call_count)

View File

@ -27,6 +27,7 @@ import time
from ironic_lib import utils as ironic_utils
from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import base64
from oslo_utils import units
@ -35,6 +36,7 @@ from ironic_python_agent import errors
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
# Agent parameters can be passed by kernel command-line arguments and/or
# by virtual media. Virtual media parameters passed would be available
@ -486,3 +488,91 @@ def remove_large_keys(var):
return var.__class__(map(remove_large_keys, var))
else:
return var
def determine_time_method():
"""Helper method to determine what time utility is present.
:returns: "ntpdate" if ntpdate has been found, "chrony" if chrony
was located, and None if neither are located. If both tools
are present, "chrony" will supercede "ntpdate".
"""
try:
execute('chronyd', '-h')
return 'chronyd'
except OSError:
LOG.debug('Command \'chronyd\' not found for time sync.')
try:
execute('ntpdate', '-v', check_exit_code=[0, 1])
return 'ntpdate'
except OSError:
LOG.debug('Command \'ntpdate\' not found for time sync.')
return None
def sync_clock(ignore_errors=False):
"""Syncs the software clock of the system.
This method syncs the system software clock if a NTP server
was defined in the "[DEFAULT]ntp_server" configuration
parameter. This method does NOT attempt to sync the hardware
clock.
It will try to use either ntpdate or chrony to sync the software
clock of the system. If neither is found, an exception is raised.
:param ignore_errors: Boolean value default False that allows for
the method to be called and ultimately not
raise an exception. This may be useful for
opportunistically attempting to sync the
system software clock.
:raises: CommandExecutionError if an error is encountered while
attempting to sync the software clock.
"""
if not CONF.ntp_server:
return
method = determine_time_method()
if method == 'ntpdate':
try:
execute('ntpdate', CONF.ntp_server)
LOG.debug('Set software clock using ntpdate')
except processutils.ProcessExecutionError as e:
msg = ('Failed to sync with ntp server: '
'%s: %s' % (CONF.ntp_server, e))
LOG.error(msg)
if CONF.fail_if_clock_not_set or not ignore_errors:
raise errors.CommandExecutionError(msg)
elif method == 'chronyd':
try:
# 0 should be if chronyd started
# 1 if already running
execute('chronyd', check_exit_code=[0, 1])
# NOTE(TheJulia): Once started, chronyd forks and stays in the
# background as a server service, it will continue to keep the
# clock in sync.
try:
execute('chronyc', 'add', 'server', CONF.ntp_server)
except processutils.ProcessExecutionError as e:
if 'Source already present' not in str(e):
msg = 'Error occured adding ntp server: %s' % e
LOG.error(msg)
raise errors.CommandExecutionError(msg)
# Force the clock to sync now.
execute('chronyc', 'makestep')
LOG.debug('Set software clock using chrony')
except (processutils.ProcessExecutionError,
errors.CommandExecutionError) as e:
msg = ('Failed to sync time using chrony to ntp server: '
'%s: %s' % (CONF.ntp_server, e))
LOG.error(msg)
if CONF.fail_if_clock_not_set or not ignore_errors:
raise errors.CommandExecutionError(msg)
else:
msg = ('Unable to sync clock, available methods of '
'\'ntpdate\' or \'chrony\' not found.')
LOG.error(msg)
if CONF.fail_if_clock_not_set or not ignore_errors:
raise errors.CommandExecutionError(msg)

View File

@ -0,0 +1,8 @@
---
features:
- |
Adds a feature where IPA will utilize a ``[DEFAULT]ntp_server`` or
``ipa-ntp-server`` kernel command line argument to cause the agent
to attempt to sync the clock to the NTP source. The agent also attempts
to sync the software clock to the NTP time source, and assert an update
to the hardware clock prior to powering the machine off.