Stop aborting the process on periodic task failures

It's nice to know when a periodic task fails, but aborting the whole
process makes it fragile in the events of e.g. problems with accessing
ironic, even if transient.

Simplify the overly complex unit tests and split the dual-purpose task.

Story: #2007622
Task: #39653
Change-Id: I64f5ca6e377a4cdfa08319ef9973b591cc552121
changes/88/725688/2
Dmitry Tantsur 2 years ago
parent 41b3323602
commit 6e38efffea
  1. 22
      ironic_inspector/conductor/manager.py
  2. 37
      ironic_inspector/test/unit/test_manager.py
  3. 4
      releasenotes/notes/periodics-18bf7fb57777c043.yaml

@ -14,7 +14,6 @@
import sys
import traceback as traceback_mod
import eventlet
from eventlet import semaphore
from futurist import periodics
from ironic_lib import mdns
@ -85,9 +84,14 @@ class ConductorManager(object):
spacing=CONF.clean_up_period
)(periodic_clean_up)
sync_with_ironic_ = periodics.periodic(
spacing=CONF.clean_up_period
)(sync_with_ironic)
self._periodics_worker = periodics.PeriodicWorker(
callables=[(driver.get_periodic_sync_task(), None, None),
(periodic_clean_up_, None, None)],
(periodic_clean_up_, None, None),
(sync_with_ironic_, None, None)],
executor_factory=periodics.ExistingExecutor(utils.executor()),
on_failure=self._periodics_watchdog)
@ -154,8 +158,6 @@ class ConductorManager(object):
LOG.exception("The periodic %(callable)s failed with: %(exception)s", {
'exception': ''.join(traceback_mod.format_exception(*exc_info)),
'callable': reflection.get_callable_name(callable_)})
# NOTE(milan): spawn new thread otherwise waiting would block
eventlet.spawn(self.del_host)
@messaging.expected_exceptions(utils.Error)
def do_introspection(self, context, node_id, token=None,
@ -189,16 +191,8 @@ class ConductorManager(object):
def periodic_clean_up(): # pragma: no cover
try:
if node_cache.clean_up():
pxe_filter.driver().sync(ir_utils.get_client())
except Exception:
LOG.exception('Periodic clean up of node cache failed')
try:
sync_with_ironic()
except Exception:
LOG.exception('Periodic sync of node list with ironic failed')
if node_cache.clean_up():
pxe_filter.driver().sync(ir_utils.get_client())
def sync_with_ironic():

@ -55,8 +55,6 @@ class TestManagerInitHost(BaseManagerTest):
'validate_processing_hooks')).mock
self.mock_filter = self.useFixture(fixtures.MockPatchObject(
manager.pxe_filter, 'driver')).mock.return_value
self.mock_periodic = self.useFixture(fixtures.MockPatchObject(
manager.periodics, 'periodic')).mock
self.mock_PeriodicWorker = self.useFixture(fixtures.MockPatchObject(
manager.periodics, 'PeriodicWorker')).mock
self.mock_executor = self.useFixture(fixtures.MockPatchObject(
@ -67,27 +65,13 @@ class TestManagerInitHost(BaseManagerTest):
manager.sys, 'exit')).mock
def assert_periodics(self):
outer_cleanup_decorator_call = mock.call(
spacing=CONF.clean_up_period)
self.mock_periodic.assert_has_calls([
outer_cleanup_decorator_call,
mock.call()(manager.periodic_clean_up)])
inner_decorator = self.mock_periodic.return_value
inner_cleanup_decorator_call = mock.call(
manager.periodic_clean_up)
inner_decorator.assert_has_calls([inner_cleanup_decorator_call])
self.mock_ExistingExecutor.assert_called_once_with(
self.mock_executor.return_value)
periodic_worker = self.mock_PeriodicWorker.return_value
periodic_sync = self.mock_filter.get_periodic_sync_task.return_value
callables = [(periodic_sync, None, None),
(inner_decorator.return_value, None, None)]
self.mock_PeriodicWorker.assert_called_once_with(
callables=callables,
callables=mock.ANY,
executor_factory=self.mock_ExistingExecutor.return_value,
on_failure=self.manager._periodics_watchdog)
self.assertIs(periodic_worker, self.manager._periodics_worker)
@ -304,25 +288,6 @@ class TestManagerDelHost(BaseManagerTest):
mock_coordinator.stop.called_once_with()
class TestManagerPeriodicWatchDog(BaseManagerTest):
def setUp(self):
super(TestManagerPeriodicWatchDog, self).setUp()
self.mock_get_callable_name = self.useFixture(fixtures.MockPatchObject(
manager.reflection, 'get_callable_name')).mock
self.mock_spawn = self.useFixture(fixtures.MockPatchObject(
manager.eventlet, 'spawn')).mock
def test__periodics_watchdog(self):
error = RuntimeError('Oops!')
self.manager._periodics_watchdog(
callable_=None, activity=None, spacing=None,
exc_info=(None, error, None), traceback=None)
self.mock_get_callable_name.assert_called_once_with(None)
self.mock_spawn.assert_called_once_with(self.manager.del_host)
class TestManagerIntrospect(BaseManagerTest):
@mock.patch.object(introspect, 'introspect', autospec=True)
def test_do_introspect(self, introspect_mock):

@ -0,0 +1,4 @@
---
fixes:
- |
No longer aborts the whole process if one periodic task fails.
Loading…
Cancel
Save