diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index 1cfe2e202..f0f145ece 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -14,9 +14,7 @@ import collections import ipaddress -import os import random -import select import socket import threading import time @@ -79,13 +77,6 @@ class IronicPythonAgentHeartbeater(threading.Thread): min_jitter_multiplier = 0.3 max_jitter_multiplier = 0.6 - # Exponential backoff values used in case of an error. In reality we will - # only wait a portion of either of these delays based on the jitter - # multipliers. - initial_delay = 1.0 - max_delay = 300.0 - backoff_factor = 2.7 - def __init__(self, agent): """Initialize the heartbeat thread. @@ -94,39 +85,19 @@ class IronicPythonAgentHeartbeater(threading.Thread): """ super(IronicPythonAgentHeartbeater, self).__init__() self.agent = agent + self.stop_event = threading.Event() self.api = agent.api_client - self.error_delay = self.initial_delay - self.reader = None - self.writer = None + self.interval = 0 def run(self): """Start the heartbeat thread.""" # The first heartbeat happens immediately - LOG.info('starting heartbeater') - interval = 0 + LOG.info('Starting heartbeater') self.agent.set_agent_advertise_addr() - self.reader, self.writer = os.pipe() - p = select.poll() - p.register(self.reader, select.POLLIN) - try: - while True: - if p.poll(interval * 1000): - if os.read(self.reader, 1).decode() == 'a': - break - - self.do_heartbeat() - interval_multiplier = random.uniform( - self.min_jitter_multiplier, - self.max_jitter_multiplier) - interval = self.agent.heartbeat_timeout * interval_multiplier - log_msg = 'sleeping before next heartbeat, interval: {}' - LOG.info(log_msg.format(interval)) - finally: - os.close(self.reader) - os.close(self.writer) - self.reader = None - self.writer = None + while not self.stop_event.wait(self.interval): + self.do_heartbeat() + eventlet.sleep(0) def do_heartbeat(self): """Send a heartbeat to Ironic.""" @@ -136,28 +107,28 @@ class IronicPythonAgentHeartbeater(threading.Thread): advertise_address=self.agent.advertise_address, advertise_protocol=self.agent.advertise_protocol, ) - self.error_delay = self.initial_delay LOG.info('heartbeat successful') except errors.HeartbeatConflictError: LOG.warning('conflict error sending heartbeat to {}'.format( self.agent.api_url)) - self.error_delay = min(self.error_delay * self.backoff_factor, - self.max_delay) except Exception: LOG.exception('error sending heartbeat to {}'.format( self.agent.api_url)) - self.error_delay = min(self.error_delay * self.backoff_factor, - self.max_delay) + finally: + interval_multiplier = random.uniform(self.min_jitter_multiplier, + self.max_jitter_multiplier) + self.interval = self.agent.heartbeat_timeout * interval_multiplier + log_msg = 'sleeping before next heartbeat, interval: {0}' + LOG.info(log_msg.format(self.interval)) def force_heartbeat(self): - os.write(self.writer, b'b') + self.do_heartbeat() def stop(self): """Stop the heartbeat thread.""" - if self.writer is not None: - LOG.info('stopping heartbeater') - os.write(self.writer, b'a') - return self.join() + LOG.info('stopping heartbeater') + self.stop_event.set() + return self.join() class IronicPythonAgent(base.ExecuteCommandMixin): diff --git a/ironic_python_agent/tests/unit/test_agent.py b/ironic_python_agent/tests/unit/test_agent.py index 013eb83d9..a72fe60aa 100644 --- a/ironic_python_agent/tests/unit/test_agent.py +++ b/ironic_python_agent/tests/unit/test_agent.py @@ -60,21 +60,19 @@ class TestHeartbeater(ironic_agent_base.IronicAgentTest): hardware.HardwareManager) self.heartbeater.stop_event = mock.Mock() - @mock.patch('os.read', autospec=True) - @mock.patch('select.poll', autospec=True) @mock.patch('ironic_python_agent.agent._time', autospec=True) @mock.patch('random.uniform', autospec=True) - def test_heartbeat(self, mock_uniform, mock_time, mock_poll, mock_read): + def test_heartbeat(self, mock_uniform, mock_time): time_responses = [] uniform_responses = [] heartbeat_responses = [] - poll_responses = [] - expected_poll_calls = [] + wait_responses = [] + expected_stop_calls = [] # FIRST RUN: # initial delay is 0 - expected_poll_calls.append(mock.call(0)) - poll_responses.append(False) + expected_stop_calls.append(mock.call(0)) + wait_responses.append(False) # next heartbeat due at t=100 heartbeat_responses.append(100) # random interval multiplier is 0.5 @@ -84,8 +82,8 @@ class TestHeartbeater(ironic_agent_base.IronicAgentTest): # SECOND RUN: # 50 * .5 = 25 - expected_poll_calls.append(mock.call(1000 * 25.0)) - poll_responses.append(False) + expected_stop_calls.append(mock.call(25.0)) + wait_responses.append(False) # next heartbeat due at t=180 heartbeat_responses.append(180) # random interval multiplier is 0.4 @@ -95,36 +93,34 @@ class TestHeartbeater(ironic_agent_base.IronicAgentTest): # THIRD RUN: # 50 * .4 = 20 - expected_poll_calls.append(mock.call(1000 * 20.0)) - poll_responses.append(False) + expected_stop_calls.append(mock.call(20.0)) + wait_responses.append(False) # this heartbeat attempt fails heartbeat_responses.append(Exception('uh oh!')) - # we check the time to generate a fake deadline, now t=125 - time_responses.append(125) # random interval multiplier is 0.5 uniform_responses.append(0.5) + # we check the time to generate a fake deadline, now t=125 + time_responses.append(125) # time is now 125.5 time_responses.append(125.5) # FOURTH RUN: - # 50 * .5 = 25 - expected_poll_calls.append(mock.call(1000 * 25.0)) + # 50 * .5 = 20 + expected_stop_calls.append(mock.call(25.0)) # Stop now - poll_responses.append(True) - mock_read.return_value = b'a' + wait_responses.append(True) # Hook it up and run it mock_time.side_effect = time_responses mock_uniform.side_effect = uniform_responses self.mock_agent.heartbeat_timeout = 50 self.heartbeater.api.heartbeat.side_effect = heartbeat_responses - mock_poll.return_value.poll.side_effect = poll_responses + self.heartbeater.stop_event.wait.side_effect = wait_responses self.heartbeater.run() # Validate expectations - self.assertEqual(expected_poll_calls, - mock_poll.return_value.poll.call_args_list) - self.assertEqual(2.7, self.heartbeater.error_delay) + self.assertEqual(expected_stop_calls, + self.heartbeater.stop_event.wait.call_args_list) @mock.patch.object(hardware, '_md_scan_and_assemble', lambda: None) diff --git a/releasenotes/notes/change-heartbeat-method-d0119406137022e3.yaml b/releasenotes/notes/change-heartbeat-method-d0119406137022e3.yaml new file mode 100644 index 000000000..9d000f723 --- /dev/null +++ b/releasenotes/notes/change-heartbeat-method-d0119406137022e3.yaml @@ -0,0 +1,7 @@ +--- +upgrade: + - | + IPA heartbeat intervals now rely on accurate clock time. Any clean or + deploy steps which attempt to sync the clock may cause heartbeats to not + be emitted. IPA syncs time at startup and shutdown, so these steps should + not be required.