Restart consoles on conductor startup
Some nodes' console_enabled may be True but the corresponding shellinabox services stopped while starting conductors, so try to start consoles on conductor startup. Change-Id: Ida5fda35340d62e08c779655183ef82211cd8703 Closes-Bug: #1525790
This commit is contained in:
parent
0ad5b13b5a
commit
d27e9e4a8f
@ -15,6 +15,7 @@
|
||||
import inspect
|
||||
import threading
|
||||
|
||||
import eventlet
|
||||
import futurist
|
||||
from futurist import periodics
|
||||
from futurist import rejection
|
||||
@ -176,6 +177,13 @@ class BaseConductorManager(object):
|
||||
states.DEPLOYING, 'provision_updated_at',
|
||||
last_error=last_error)
|
||||
|
||||
# Start consoles if it set enabled in a greenthread.
|
||||
try:
|
||||
self._spawn_worker(self._start_consoles,
|
||||
ironic_context.get_admin_context())
|
||||
except exception.NoFreeConductorWorker:
|
||||
LOG.warning(_LW('Failed to start worker for restarting consoles.'))
|
||||
|
||||
# Spawn a dedicated greenthread for the keepalive
|
||||
try:
|
||||
self._spawn_worker(self._conductor_service_record_keepalive)
|
||||
@ -371,3 +379,48 @@ class BaseConductorManager(object):
|
||||
workers_count += 1
|
||||
if workers_count >= CONF.conductor.periodic_max_workers:
|
||||
break
|
||||
|
||||
def _start_consoles(self, context):
|
||||
"""Start consoles if set enabled.
|
||||
|
||||
:param: context: request context
|
||||
"""
|
||||
filters = {'console_enabled': True}
|
||||
|
||||
node_iter = self.iter_nodes(filters=filters)
|
||||
|
||||
for node_uuid, driver in node_iter:
|
||||
try:
|
||||
with task_manager.acquire(context, node_uuid, shared=False,
|
||||
purpose='start console') as task:
|
||||
try:
|
||||
LOG.debug('Trying to start console of node %(node)s',
|
||||
{'node': node_uuid})
|
||||
task.driver.console.start_console(task)
|
||||
LOG.info(_LI('Successfully started console of node '
|
||||
'%(node)s'), {'node': node_uuid})
|
||||
except Exception as err:
|
||||
msg = (_('Failed to start console of node %(node)s '
|
||||
'while starting the conductor, so changing '
|
||||
'the console_enabled status to False, error: '
|
||||
'%(err)s')
|
||||
% {'node': node_uuid, 'err': err})
|
||||
LOG.error(msg)
|
||||
# If starting console failed, set node console_enabled
|
||||
# back to False and set node's last error.
|
||||
task.node.last_error = msg
|
||||
task.node.console_enabled = False
|
||||
task.node.save()
|
||||
except exception.NodeLocked:
|
||||
LOG.warning(_LW('Node %(node)s is locked while trying to '
|
||||
'start console on conductor startup'),
|
||||
{'node': node_uuid})
|
||||
continue
|
||||
except exception.NodeNotFound:
|
||||
LOG.warning(_LW("During starting console on conductor "
|
||||
"startup, node %(node)s was not found"),
|
||||
{'node': node_uuid})
|
||||
continue
|
||||
finally:
|
||||
# Yield on every iteration
|
||||
eventlet.sleep(0)
|
||||
|
@ -220,6 +220,8 @@ class Connection(api.Connection):
|
||||
(datetime.timedelta(
|
||||
seconds=filters['inspection_started_before'])))
|
||||
query = query.filter(models.Node.inspection_started_at < limit)
|
||||
if 'console_enabled' in filters:
|
||||
query = query.filter_by(console_enabled=filters['console_enabled'])
|
||||
|
||||
return query
|
||||
|
||||
|
@ -544,6 +544,8 @@ class ConsoleInterface(object):
|
||||
def start_console(self, task):
|
||||
"""Start a remote console for the task's node.
|
||||
|
||||
This method should not raise an exception if console already started.
|
||||
|
||||
:param task: a TaskManager instance containing the node to act on.
|
||||
"""
|
||||
|
||||
|
@ -18,11 +18,13 @@ from futurist import periodics
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
from oslo_db import exception as db_exception
|
||||
from oslo_utils import uuidutils
|
||||
|
||||
from ironic.common import driver_factory
|
||||
from ironic.common import exception
|
||||
from ironic.conductor import base_manager
|
||||
from ironic.conductor import manager
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.drivers import base as drivers_base
|
||||
from ironic import objects
|
||||
from ironic.tests import base as tests_base
|
||||
@ -218,3 +220,84 @@ class ManagerSpawnWorkerTestCase(tests_base.TestCase):
|
||||
|
||||
self.assertRaises(exception.NoFreeConductorWorker,
|
||||
self.service._spawn_worker, 'fake')
|
||||
|
||||
|
||||
class StartConsolesTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
tests_db_base.DbTestCase):
|
||||
def test__start_consoles(self):
|
||||
obj_utils.create_test_node(self.context,
|
||||
driver='fake',
|
||||
console_enabled=True)
|
||||
obj_utils.create_test_node(
|
||||
self.context,
|
||||
uuid=uuidutils.generate_uuid(),
|
||||
driver='fake',
|
||||
console_enabled=True
|
||||
)
|
||||
obj_utils.create_test_node(
|
||||
self.context,
|
||||
uuid=uuidutils.generate_uuid(),
|
||||
driver='fake'
|
||||
)
|
||||
self._start_service()
|
||||
with mock.patch.object(self.driver.console,
|
||||
'start_console') as mock_start_console:
|
||||
self.service._start_consoles(self.context)
|
||||
self.assertEqual(2, mock_start_console.call_count)
|
||||
|
||||
def test__start_consoles_no_console_enabled(self):
|
||||
obj_utils.create_test_node(self.context,
|
||||
driver='fake',
|
||||
console_enabled=False)
|
||||
self._start_service()
|
||||
with mock.patch.object(self.driver.console,
|
||||
'start_console') as mock_start_console:
|
||||
self.service._start_consoles(self.context)
|
||||
self.assertFalse(mock_start_console.called)
|
||||
|
||||
def test__start_consoles_failed(self):
|
||||
test_node = obj_utils.create_test_node(self.context,
|
||||
driver='fake',
|
||||
console_enabled=True)
|
||||
self._start_service()
|
||||
with mock.patch.object(self.driver.console,
|
||||
'start_console') as mock_start_console:
|
||||
mock_start_console.side_effect = Exception()
|
||||
self.service._start_consoles(self.context)
|
||||
mock_start_console.assert_called_once_with(mock.ANY)
|
||||
test_node.refresh()
|
||||
self.assertFalse(test_node.console_enabled)
|
||||
self.assertIsNotNone(test_node.last_error)
|
||||
|
||||
@mock.patch.object(base_manager, 'LOG')
|
||||
def test__start_consoles_node_locked(self, log_mock):
|
||||
test_node = obj_utils.create_test_node(self.context,
|
||||
driver='fake',
|
||||
console_enabled=True,
|
||||
reservation='fake-host')
|
||||
self._start_service()
|
||||
with mock.patch.object(self.driver.console,
|
||||
'start_console') as mock_start_console:
|
||||
self.service._start_consoles(self.context)
|
||||
self.assertFalse(mock_start_console.called)
|
||||
test_node.refresh()
|
||||
self.assertTrue(test_node.console_enabled)
|
||||
self.assertIsNone(test_node.last_error)
|
||||
self.assertTrue(log_mock.warning.called)
|
||||
|
||||
@mock.patch.object(base_manager, 'LOG')
|
||||
def test__start_consoles_node_not_found(self, log_mock):
|
||||
test_node = obj_utils.create_test_node(self.context,
|
||||
driver='fake',
|
||||
console_enabled=True)
|
||||
self._start_service()
|
||||
with mock.patch.object(task_manager, 'acquire') as mock_acquire:
|
||||
mock_acquire.side_effect = exception.NodeNotFound(node='not found')
|
||||
with mock.patch.object(self.driver.console,
|
||||
'start_console') as mock_start_console:
|
||||
self.service._start_consoles(self.context)
|
||||
self.assertFalse(mock_start_console.called)
|
||||
test_node.refresh()
|
||||
self.assertTrue(test_node.console_enabled)
|
||||
self.assertIsNone(test_node.last_error)
|
||||
self.assertTrue(log_mock.warning.called)
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
fixes:
|
||||
- Some nodes' console may be enabled but the corresponding console
|
||||
services stopped while starting conductors, this tries to start
|
||||
consoles on conductor startup to make the status consistent.
|
Loading…
Reference in New Issue
Block a user