diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py index 7287ea4ebf..ef97fd1135 100644 --- a/ironic/conductor/base_manager.py +++ b/ironic/conductor/base_manager.py @@ -321,13 +321,16 @@ class BaseConductorManager(object): # This is only used in tests currently. Delete it? self._periodic_task_callables = periodic_task_callables + def keepalive_halt(self): + self._keepalive_evt.set() + def del_host(self, deregister=True, clear_node_reservations=True): # Conductor deregistration fails if called on non-initialized # conductor (e.g. when rpc server is unreachable). if not hasattr(self, 'conductor'): return self._shutdown = True - self._keepalive_evt.set() + self.keepalive_halt() if clear_node_reservations: # clear all locks held by this conductor before deregistering @@ -469,7 +472,7 @@ class BaseConductorManager(object): return while not self._keepalive_evt.is_set(): try: - self.conductor.touch() + self.conductor.touch(online=not self._shutdown) except db_exception.DBConnectionError: LOG.warning('Conductor could not connect to database ' 'while heartbeating.') diff --git a/ironic/db/api.py b/ironic/db/api.py index 8aeda4b293..f5a097a887 100644 --- a/ironic/db/api.py +++ b/ironic/db/api.py @@ -585,10 +585,16 @@ class Connection(object, metaclass=abc.ABCMeta): """ @abc.abstractmethod - def touch_conductor(self, hostname): + def touch_conductor(self, hostname, online=True): """Mark a conductor as active by updating its 'updated_at' property. + Calling periodically with ``online=False`` will result in the conductor + appearing unregistered, but recently enough to prevent other conductors + failing orphan nodes. This improves the behaviour of graceful and drain + shutdown. + :param hostname: The hostname of this conductor service. + :param online: Whether the conductor is online. :raises: ConductorNotFound """ diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index a6ab523e01..b3bda9802c 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -1392,13 +1392,13 @@ class Connection(api.Connection): raise exception.ConductorNotFound(conductor=hostname) @oslo_db_api.retry_on_deadlock - def touch_conductor(self, hostname): + def touch_conductor(self, hostname, online=True): with _session_for_write() as session: query = sa.update(models.Conductor).where( models.Conductor.hostname == hostname ).values({ 'updated_at': timeutils.utcnow(), - 'online': True} + 'online': online} ).execution_options(synchronize_session=False) res = session.execute(query) count = res.rowcount diff --git a/ironic/objects/conductor.py b/ironic/objects/conductor.py index 307e218c5f..6c35803173 100644 --- a/ironic/objects/conductor.py +++ b/ironic/objects/conductor.py @@ -111,9 +111,9 @@ class Conductor(base.IronicObject, object_base.VersionedObjectDictCompat): # methods can be used in the future to replace current explicit RPC calls. # Implications of calling new remote procedures should be thought through. # @object_base.remotable - def touch(self, context=None): + def touch(self, context=None, online=True): """Touch this conductor's DB record, marking it as up-to-date.""" - self.dbapi.touch_conductor(self.hostname) + self.dbapi.touch_conductor(self.hostname, online=online) # NOTE(xek): We don't want to enable RPC on this call just yet. Remotable # methods can be used in the future to replace current explicit RPC calls. diff --git a/ironic/tests/unit/conductor/test_base_manager.py b/ironic/tests/unit/conductor/test_base_manager.py index 8838cff076..b8b60b01ba 100644 --- a/ironic/tests/unit/conductor/test_base_manager.py +++ b/ironic/tests/unit/conductor/test_base_manager.py @@ -359,7 +359,7 @@ class KeepAliveTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_is_sqlite.return_value = False self.service._conductor_service_record_keepalive() self.assertEqual(1, mock_is_sqlite.call_count) - mock_touch.assert_called_once_with(self.hostname) + mock_touch.assert_called_once_with(self.hostname, online=True) @mock.patch.object(common_utils, 'is_ironic_using_sqlite', autospec=True) def test__conductor_service_record_keepalive_failed_db_conn( diff --git a/ironic/tests/unit/db/test_conductor.py b/ironic/tests/unit/db/test_conductor.py index a16ef08b5c..59c280076f 100644 --- a/ironic/tests/unit/db/test_conductor.py +++ b/ironic/tests/unit/db/test_conductor.py @@ -156,6 +156,21 @@ class DbConductorTestCase(base.DbTestCase): c = self.dbapi.get_conductor(c.hostname) self.assertEqual(test_time, timeutils.normalize_time(c.updated_at)) + @mock.patch.object(timeutils, 'utcnow', autospec=True) + def test_touch_conductor_offline(self, mock_utcnow): + test_time = datetime.datetime(2000, 1, 1, 0, 0) + mock_utcnow.return_value = test_time + c = self._create_test_cdr() + self.assertEqual(test_time, timeutils.normalize_time(c.updated_at)) + + test_time = datetime.datetime(2000, 1, 1, 0, 1) + mock_utcnow.return_value = test_time + self.dbapi.touch_conductor(c.hostname, online=False) + self.assertRaises( + exception.ConductorNotFound, + self.dbapi.get_conductor, + c.hostname) + def test_touch_conductor_not_found(self): # A conductor's heartbeat will not create a new record, # it will only update existing ones diff --git a/ironic/tests/unit/objects/test_conductor.py b/ironic/tests/unit/objects/test_conductor.py index 4b3c954904..f9c7c37c59 100644 --- a/ironic/tests/unit/objects/test_conductor.py +++ b/ironic/tests/unit/objects/test_conductor.py @@ -77,7 +77,7 @@ class TestConductorObject(db_base.DbTestCase): c = objects.Conductor.get_by_hostname(self.context, host) c.touch(self.context) mock_get_cdr.assert_called_once_with(host, online=True) - mock_touch_cdr.assert_called_once_with(host) + mock_touch_cdr.assert_called_once_with(host, online=True) def test_refresh(self): host = self.fake_conductor['hostname']