Allow enabling fast-track per node

This is useful when some nodes need the "agent" power interface, while
the others can be deployed normally.

Change-Id: Ief7df40c83ef03d0ec5ae92d09ceffd39d3c12a3
This commit is contained in:
Dmitry Tantsur 2021-12-08 12:26:48 +01:00
parent 79311c88df
commit 2a6cdf4b24
13 changed files with 165 additions and 25 deletions

View File

@ -25,6 +25,13 @@ Fast track is off by default and should be enabled in the configuration:
[deploy] [deploy]
fast_track = true fast_track = true
Starting with the Yoga release series, it can also be enabled or disabled per
node:
.. code-block:: console
baremetal node set <node> --driver-info fast_track=true
Inspection Inspection
---------- ----------

View File

@ -69,11 +69,13 @@ def convert_with_links(node):
class LookupController(rest.RestController): class LookupController(rest.RestController):
"""Controller handling node lookup for a deploy ramdisk.""" """Controller handling node lookup for a deploy ramdisk."""
@property def lookup_allowed(self, node):
def lookup_allowed_states(self): if utils.fast_track_enabled(node):
if CONF.deploy.fast_track: return (
return states.FASTTRACK_LOOKUP_ALLOWED_STATES node.provision_state in states.FASTTRACK_LOOKUP_ALLOWED_STATES
return states.LOOKUP_ALLOWED_STATES )
else:
return node.provision_state in states.LOOKUP_ALLOWED_STATES
@method.expose() @method.expose()
@args.validate(addresses=args.string_list, node_uuid=args.uuid) @args.validate(addresses=args.string_list, node_uuid=args.uuid)
@ -135,8 +137,7 @@ class LookupController(rest.RestController):
# at all and nodes in a wrong state by different error messages. # at all and nodes in a wrong state by different error messages.
raise exception.NotFound() raise exception.NotFound()
if (CONF.api.restrict_lookup if CONF.api.restrict_lookup and not self.lookup_allowed(node):
and node.provision_state not in self.lookup_allowed_states):
raise exception.NotFound() raise exception.NotFound()
if api_utils.allow_agent_token(): if api_utils.allow_agent_token():

View File

@ -36,6 +36,7 @@ from oslo_log import log as logging
from oslo_serialization import jsonutils from oslo_serialization import jsonutils
from oslo_utils import fileutils from oslo_utils import fileutils
from oslo_utils import netutils from oslo_utils import netutils
from oslo_utils import strutils
from oslo_utils import timeutils from oslo_utils import timeutils
import psutil import psutil
import pytz import pytz
@ -654,3 +655,15 @@ def remove_large_keys(var):
return var.__class__(map(remove_large_keys, var)) return var.__class__(map(remove_large_keys, var))
else: else:
return var return var
def fast_track_enabled(node):
is_enabled = node.driver_info.get('fast_track')
if is_enabled is None:
return CONF.deploy.fast_track
else:
try:
return strutils.bool_from_string(is_enabled, strict=True)
except ValueError as exc:
raise exception.InvalidParameterValue(
_("Invalid value of fast_track: %s") % exc)

View File

@ -36,6 +36,7 @@ from ironic.common.i18n import _
from ironic.common import network from ironic.common import network
from ironic.common import nova from ironic.common import nova
from ironic.common import states from ironic.common import states
from ironic.common import utils
from ironic.conductor import notification_utils as notify_utils from ironic.conductor import notification_utils as notify_utils
from ironic.conductor import task_manager from ironic.conductor import task_manager
from ironic.objects import fields from ironic.objects import fields
@ -1085,7 +1086,7 @@ def fast_track_able(task):
configuration is present, and no last_error is present for configuration is present, and no last_error is present for
the node indicating that there was a recent failure. the node indicating that there was a recent failure.
""" """
return (CONF.deploy.fast_track return (utils.fast_track_enabled(task.node)
# TODO(TheJulia): Network model aside, we should be able to # TODO(TheJulia): Network model aside, we should be able to
# fast-track through initial sequence to complete deployment. # fast-track through initial sequence to complete deployment.
# This needs to be validated. # This needs to be validated.

View File

@ -28,7 +28,8 @@ opts = [
'spaces.')), 'spaces.')),
cfg.BoolOpt('power_off', default=True, cfg.BoolOpt('power_off', default=True,
help=_('whether to power off a node after inspection ' help=_('whether to power off a node after inspection '
'finishes')), 'finishes. Ignored for nodes that have fast '
'track mode enabled.')),
cfg.StrOpt('callback_endpoint_override', cfg.StrOpt('callback_endpoint_override',
help=_('endpoint to use as a callback for posting back ' help=_('endpoint to use as a callback for posting back '
'introspection data when boot is managed by ironic. ' 'introspection data when boot is managed by ironic. '

View File

@ -440,12 +440,11 @@ class HeartbeatMixin(object):
""" """
return self.process_next_step(task, 'clean') return self.process_next_step(task, 'clean')
@property def heartbeat_allowed(self, node):
def heartbeat_allowed_states(self): if utils.fast_track_enabled(node):
"""Define node states where heartbeating is allowed""" return node.provision_state in FASTTRACK_HEARTBEAT_ALLOWED
if CONF.deploy.fast_track: else:
return FASTTRACK_HEARTBEAT_ALLOWED return node.provision_state in HEARTBEAT_ALLOWED
return HEARTBEAT_ALLOWED
def _heartbeat_in_maintenance(self, task): def _heartbeat_in_maintenance(self, task):
node = task.node node = task.node
@ -560,8 +559,8 @@ class HeartbeatMixin(object):
agent_status agent_status
""" """
# NOTE(pas-ha) immediately skip the rest if nothing to do # NOTE(pas-ha) immediately skip the rest if nothing to do
if (task.node.provision_state not in self.heartbeat_allowed_states if (not self.heartbeat_allowed(task.node)
and not manager_utils.fast_track_able(task)): and not manager_utils.fast_track_able(task)):
LOG.error('Heartbeat from node %(node)s in unsupported ' LOG.error('Heartbeat from node %(node)s in unsupported '
'provision state %(state)s, not taking any action.', 'provision state %(state)s, not taking any action.',
{'node': task.node.uuid, {'node': task.node.uuid,

View File

@ -23,6 +23,7 @@ import tenacity
from ironic.common import exception from ironic.common import exception
from ironic.common.i18n import _ from ironic.common.i18n import _
from ironic.common import states from ironic.common import states
from ironic.common import utils
from ironic.conductor import utils as cond_utils from ironic.conductor import utils as cond_utils
from ironic.drivers import base from ironic.drivers import base
from ironic.drivers.modules import agent_client from ironic.drivers.modules import agent_client
@ -40,10 +41,6 @@ class AgentPower(base.PowerInterface):
def __init__(self): def __init__(self):
super(AgentPower, self).__init__() super(AgentPower, self).__init__()
if not CONF.deploy.fast_track:
raise exception.InvalidParameterValue(
_('[deploy]fast_track must be True to enable the agent '
'power interface'))
self._client = agent_client.AgentClient() self._client = agent_client.AgentClient()
def get_properties(self): def get_properties(self):
@ -61,9 +58,9 @@ class AgentPower(base.PowerInterface):
""" """
# NOTE(dtantsur): the fast_track option is mutable, so we have to check # NOTE(dtantsur): the fast_track option is mutable, so we have to check
# it again on validation. # it again on validation.
if not CONF.deploy.fast_track: if not utils.fast_track_enabled(task.node):
raise exception.InvalidParameterValue( raise exception.InvalidParameterValue(
_('[deploy]fast_track must be True to enable the agent ' _('Fast track mode must be enabled to use the agent '
'power interface')) 'power interface'))
# TODO(dtantsur): support ACTIVE nodes # TODO(dtantsur): support ACTIVE nodes
if not cond_utils.agent_is_alive(task.node): if not cond_utils.agent_is_alive(task.node):

View File

@ -122,7 +122,7 @@ def _tear_down_managed_boot(task):
LOG.exception('Unable to remove inspection network for node %s', LOG.exception('Unable to remove inspection network for node %s',
task.node.uuid) task.node.uuid)
if CONF.inspector.power_off: if CONF.inspector.power_off and not utils.fast_track_enabled(task.node):
try: try:
cond_utils.node_power_action(task, states.POWER_OFF) cond_utils.node_power_action(task, states.POWER_OFF)
except Exception as exc: except Exception as exc:
@ -195,7 +195,7 @@ def _start_managed_inspection(task):
endpoint = _get_callback_endpoint(client) endpoint = _get_callback_endpoint(client)
params = dict(_parse_kernel_params(), params = dict(_parse_kernel_params(),
**{'ipa-inspection-callback-url': endpoint}) **{'ipa-inspection-callback-url': endpoint})
if CONF.deploy.fast_track: if utils.fast_track_enabled(task.node):
params['ipa-api-url'] = deploy_utils.get_ironic_api_url() params['ipa-api-url'] = deploy_utils.get_ironic_api_url()
cond_utils.node_power_action(task, states.POWER_OFF) cond_utils.node_power_action(task, states.POWER_OFF)

View File

@ -971,6 +971,7 @@ class DeployingErrorHandlerTestCase(db_base.DbTestCase):
self.node.provision_state = states.DEPLOYING self.node.provision_state = states.DEPLOYING
self.node.last_error = None self.node.last_error = None
self.node.deploy_step = None self.node.deploy_step = None
self.node.driver_info = {}
self.node.driver_internal_info = {} self.node.driver_internal_info = {}
self.node.id = obj_utils.create_test_node(self.context, self.node.id = obj_utils.create_test_node(self.context,
driver='fake-hardware').id driver='fake-hardware').id
@ -1983,6 +1984,37 @@ class FastTrackTestCase(db_base.DbTestCase):
self.context, self.node.uuid, shared=False) as task: self.context, self.node.uuid, shared=False) as task:
self.assertTrue(conductor_utils.is_fast_track(task)) self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_via_driver_info(self, mock_get_power):
self.config(fast_track=False, group='deploy')
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = True
self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_via_driver_info_string(self, mock_get_power):
self.config(fast_track=False, group='deploy')
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = 'yes'
self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_disabled_in_driver_info(self, mock_get_power):
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = False
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_disabled_in_driver_info_string(self,
mock_get_power):
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = 'false'
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_config_false(self, mock_get_power): def test_is_fast_track_config_false(self, mock_get_power):
self.config(fast_track=False, group='deploy') self.config(fast_track=False, group='deploy')
mock_get_power.return_value = states.POWER_ON mock_get_power.return_value = states.POWER_ON

View File

@ -501,6 +501,25 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
task.node.driver_internal_info['agent_last_heartbeat']) task.node.driver_internal_info['agent_last_heartbeat'])
self.assertEqual(provision_state, task.node.provision_state) self.assertEqual(provision_state, task.node.provision_state)
def test_heartbeat_records_fast_track_via_driver_info(self):
for provision_state in [states.ENROLL, states.MANAGEABLE,
states.AVAILABLE]:
self.node.driver_internal_info = {}
self.node.driver_info = {'fast_track': True}
self.node.provision_state = provision_state
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '3.2.0')
self.assertEqual('http://127.0.0.1:8080',
task.node.driver_internal_info['agent_url'])
self.assertEqual('3.2.0',
task.node.driver_internal_info[
'agent_version'])
self.assertIsNotNone(
task.node.driver_internal_info['agent_last_heartbeat'])
self.assertEqual(provision_state, task.node.provision_state)
class AgentRescueTests(AgentDeployMixinBaseTest): class AgentRescueTests(AgentDeployMixinBaseTest):

View File

@ -61,6 +61,11 @@ class AgentPowerTest(db_base.DbTestCase):
self.assertRaises(exception.InvalidParameterValue, self.assertRaises(exception.InvalidParameterValue,
self.power.validate, self.task) self.power.validate, self.task)
def test_validate_no_fast_track(self):
self.node.driver_info['fast_track'] = False
self.assertRaises(exception.InvalidParameterValue,
self.power.validate, self.task)
def test_get_power_state(self): def test_get_power_state(self):
self.assertEqual(states.POWER_ON, self.assertEqual(states.POWER_ON,
self.power.get_power_state(self.task)) self.power.get_power_state(self.task))

View File

@ -290,6 +290,44 @@ class InspectHardwareTestCase(BaseTestCase):
self.assertFalse(self.driver.network.remove_inspection_network.called) self.assertFalse(self.driver.network.remove_inspection_network.called)
self.assertFalse(self.driver.boot.clean_up_ramdisk.called) self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
@mock.patch('ironic.drivers.modules.deploy_utils.get_ironic_api_url',
autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(inspect_utils, 'create_ports_if_not_exist',
autospec=True)
def test_managed_fast_track_via_driver_info(
self, mock_create_ports_if_not_exist, mock_get_system,
mock_ironic_url, mock_client):
CONF.set_override('extra_kernel_params',
'ipa-inspection-collectors=default,logs '
'ipa-collect-dhcp=1',
group='inspector')
endpoint = 'http://192.169.0.42:5050/v1'
mock_ironic_url.return_value = 'http://192.169.0.42:6385'
mock_client.return_value.get_endpoint.return_value = endpoint
mock_introspect = mock_client.return_value.start_introspection
self.task.node.driver_info = {'fast_track': True}
self.iface.validate(self.task)
self.assertEqual(states.INSPECTWAIT,
self.iface.inspect_hardware(self.task))
mock_introspect.assert_called_once_with(self.node.uuid,
manage_boot=False)
self.driver.boot.prepare_ramdisk.assert_called_once_with(
self.task, ramdisk_params={
'ipa-inspection-callback-url': endpoint + '/continue',
'ipa-inspection-collectors': 'default,logs',
'ipa-collect-dhcp': '1',
'ipa-api-url': 'http://192.169.0.42:6385',
})
self.driver.network.add_inspection_network.assert_called_once_with(
self.task)
self.driver.power.set_power_state.assert_has_calls([
mock.call(self.task, states.POWER_OFF, timeout=None),
mock.call(self.task, states.POWER_ON, timeout=None),
])
self.assertFalse(self.driver.network.remove_inspection_network.called)
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
@mock.patch.object(task_manager, 'acquire', autospec=True) @mock.patch.object(task_manager, 'acquire', autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True) @mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(inspect_utils, 'create_ports_if_not_exist', @mock.patch.object(inspect_utils, 'create_ports_if_not_exist',
@ -406,6 +444,23 @@ class CheckStatusTestCase(BaseTestCase):
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task) self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called) self.assertFalse(self.driver.power.set_power_state.called)
def test_status_ok_managed_no_power_off_on_fast_track(self, mock_client):
CONF.set_override('fast_track', True, group='deploy')
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,
error=None,
spec=['is_finished', 'error'])
inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('done')
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def test_status_error(self, mock_client): def test_status_error(self, mock_client):
mock_get = mock_client.return_value.get_introspection mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True, mock_get.return_value = mock.Mock(is_finished=True,

View File

@ -0,0 +1,10 @@
---
features:
- |
Fast track mode can now be enabled or disabled per node::
baremetal node set <node> --driver-info fast_track=true
upgrade:
- |
The configuration option ``[inspector]power_off`` is now ignored for nodes
that have fast-track enabled. These nodes are never powered off.