Allow enabling fast-track per node

This is useful when some nodes need the "agent" power interface, while
the others can be deployed normally.

Change-Id: Ief7df40c83ef03d0ec5ae92d09ceffd39d3c12a3
This commit is contained in:
Dmitry Tantsur 2021-12-08 12:26:48 +01:00
parent 79311c88df
commit 2a6cdf4b24
13 changed files with 165 additions and 25 deletions

View File

@ -25,6 +25,13 @@ Fast track is off by default and should be enabled in the configuration:
[deploy]
fast_track = true
Starting with the Yoga release series, it can also be enabled or disabled per
node:
.. code-block:: console
baremetal node set <node> --driver-info fast_track=true
Inspection
----------

View File

@ -69,11 +69,13 @@ def convert_with_links(node):
class LookupController(rest.RestController):
"""Controller handling node lookup for a deploy ramdisk."""
@property
def lookup_allowed_states(self):
if CONF.deploy.fast_track:
return states.FASTTRACK_LOOKUP_ALLOWED_STATES
return states.LOOKUP_ALLOWED_STATES
def lookup_allowed(self, node):
if utils.fast_track_enabled(node):
return (
node.provision_state in states.FASTTRACK_LOOKUP_ALLOWED_STATES
)
else:
return node.provision_state in states.LOOKUP_ALLOWED_STATES
@method.expose()
@args.validate(addresses=args.string_list, node_uuid=args.uuid)
@ -135,8 +137,7 @@ class LookupController(rest.RestController):
# at all and nodes in a wrong state by different error messages.
raise exception.NotFound()
if (CONF.api.restrict_lookup
and node.provision_state not in self.lookup_allowed_states):
if CONF.api.restrict_lookup and not self.lookup_allowed(node):
raise exception.NotFound()
if api_utils.allow_agent_token():

View File

@ -36,6 +36,7 @@ from oslo_log import log as logging
from oslo_serialization import jsonutils
from oslo_utils import fileutils
from oslo_utils import netutils
from oslo_utils import strutils
from oslo_utils import timeutils
import psutil
import pytz
@ -654,3 +655,15 @@ def remove_large_keys(var):
return var.__class__(map(remove_large_keys, var))
else:
return var
def fast_track_enabled(node):
is_enabled = node.driver_info.get('fast_track')
if is_enabled is None:
return CONF.deploy.fast_track
else:
try:
return strutils.bool_from_string(is_enabled, strict=True)
except ValueError as exc:
raise exception.InvalidParameterValue(
_("Invalid value of fast_track: %s") % exc)

View File

@ -36,6 +36,7 @@ from ironic.common.i18n import _
from ironic.common import network
from ironic.common import nova
from ironic.common import states
from ironic.common import utils
from ironic.conductor import notification_utils as notify_utils
from ironic.conductor import task_manager
from ironic.objects import fields
@ -1085,7 +1086,7 @@ def fast_track_able(task):
configuration is present, and no last_error is present for
the node indicating that there was a recent failure.
"""
return (CONF.deploy.fast_track
return (utils.fast_track_enabled(task.node)
# TODO(TheJulia): Network model aside, we should be able to
# fast-track through initial sequence to complete deployment.
# This needs to be validated.

View File

@ -28,7 +28,8 @@ opts = [
'spaces.')),
cfg.BoolOpt('power_off', default=True,
help=_('whether to power off a node after inspection '
'finishes')),
'finishes. Ignored for nodes that have fast '
'track mode enabled.')),
cfg.StrOpt('callback_endpoint_override',
help=_('endpoint to use as a callback for posting back '
'introspection data when boot is managed by ironic. '

View File

@ -440,12 +440,11 @@ class HeartbeatMixin(object):
"""
return self.process_next_step(task, 'clean')
@property
def heartbeat_allowed_states(self):
"""Define node states where heartbeating is allowed"""
if CONF.deploy.fast_track:
return FASTTRACK_HEARTBEAT_ALLOWED
return HEARTBEAT_ALLOWED
def heartbeat_allowed(self, node):
if utils.fast_track_enabled(node):
return node.provision_state in FASTTRACK_HEARTBEAT_ALLOWED
else:
return node.provision_state in HEARTBEAT_ALLOWED
def _heartbeat_in_maintenance(self, task):
node = task.node
@ -560,8 +559,8 @@ class HeartbeatMixin(object):
agent_status
"""
# NOTE(pas-ha) immediately skip the rest if nothing to do
if (task.node.provision_state not in self.heartbeat_allowed_states
and not manager_utils.fast_track_able(task)):
if (not self.heartbeat_allowed(task.node)
and not manager_utils.fast_track_able(task)):
LOG.error('Heartbeat from node %(node)s in unsupported '
'provision state %(state)s, not taking any action.',
{'node': task.node.uuid,

View File

@ -23,6 +23,7 @@ import tenacity
from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import states
from ironic.common import utils
from ironic.conductor import utils as cond_utils
from ironic.drivers import base
from ironic.drivers.modules import agent_client
@ -40,10 +41,6 @@ class AgentPower(base.PowerInterface):
def __init__(self):
super(AgentPower, self).__init__()
if not CONF.deploy.fast_track:
raise exception.InvalidParameterValue(
_('[deploy]fast_track must be True to enable the agent '
'power interface'))
self._client = agent_client.AgentClient()
def get_properties(self):
@ -61,9 +58,9 @@ class AgentPower(base.PowerInterface):
"""
# NOTE(dtantsur): the fast_track option is mutable, so we have to check
# it again on validation.
if not CONF.deploy.fast_track:
if not utils.fast_track_enabled(task.node):
raise exception.InvalidParameterValue(
_('[deploy]fast_track must be True to enable the agent '
_('Fast track mode must be enabled to use the agent '
'power interface'))
# TODO(dtantsur): support ACTIVE nodes
if not cond_utils.agent_is_alive(task.node):

View File

@ -122,7 +122,7 @@ def _tear_down_managed_boot(task):
LOG.exception('Unable to remove inspection network for node %s',
task.node.uuid)
if CONF.inspector.power_off:
if CONF.inspector.power_off and not utils.fast_track_enabled(task.node):
try:
cond_utils.node_power_action(task, states.POWER_OFF)
except Exception as exc:
@ -195,7 +195,7 @@ def _start_managed_inspection(task):
endpoint = _get_callback_endpoint(client)
params = dict(_parse_kernel_params(),
**{'ipa-inspection-callback-url': endpoint})
if CONF.deploy.fast_track:
if utils.fast_track_enabled(task.node):
params['ipa-api-url'] = deploy_utils.get_ironic_api_url()
cond_utils.node_power_action(task, states.POWER_OFF)

View File

@ -971,6 +971,7 @@ class DeployingErrorHandlerTestCase(db_base.DbTestCase):
self.node.provision_state = states.DEPLOYING
self.node.last_error = None
self.node.deploy_step = None
self.node.driver_info = {}
self.node.driver_internal_info = {}
self.node.id = obj_utils.create_test_node(self.context,
driver='fake-hardware').id
@ -1983,6 +1984,37 @@ class FastTrackTestCase(db_base.DbTestCase):
self.context, self.node.uuid, shared=False) as task:
self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_via_driver_info(self, mock_get_power):
self.config(fast_track=False, group='deploy')
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = True
self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_via_driver_info_string(self, mock_get_power):
self.config(fast_track=False, group='deploy')
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = 'yes'
self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_disabled_in_driver_info(self, mock_get_power):
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = False
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_disabled_in_driver_info_string(self,
mock_get_power):
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.driver_info['fast_track'] = 'false'
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_config_false(self, mock_get_power):
self.config(fast_track=False, group='deploy')
mock_get_power.return_value = states.POWER_ON

View File

@ -501,6 +501,25 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
task.node.driver_internal_info['agent_last_heartbeat'])
self.assertEqual(provision_state, task.node.provision_state)
def test_heartbeat_records_fast_track_via_driver_info(self):
for provision_state in [states.ENROLL, states.MANAGEABLE,
states.AVAILABLE]:
self.node.driver_internal_info = {}
self.node.driver_info = {'fast_track': True}
self.node.provision_state = provision_state
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '3.2.0')
self.assertEqual('http://127.0.0.1:8080',
task.node.driver_internal_info['agent_url'])
self.assertEqual('3.2.0',
task.node.driver_internal_info[
'agent_version'])
self.assertIsNotNone(
task.node.driver_internal_info['agent_last_heartbeat'])
self.assertEqual(provision_state, task.node.provision_state)
class AgentRescueTests(AgentDeployMixinBaseTest):

View File

@ -61,6 +61,11 @@ class AgentPowerTest(db_base.DbTestCase):
self.assertRaises(exception.InvalidParameterValue,
self.power.validate, self.task)
def test_validate_no_fast_track(self):
self.node.driver_info['fast_track'] = False
self.assertRaises(exception.InvalidParameterValue,
self.power.validate, self.task)
def test_get_power_state(self):
self.assertEqual(states.POWER_ON,
self.power.get_power_state(self.task))

View File

@ -290,6 +290,44 @@ class InspectHardwareTestCase(BaseTestCase):
self.assertFalse(self.driver.network.remove_inspection_network.called)
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
@mock.patch('ironic.drivers.modules.deploy_utils.get_ironic_api_url',
autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(inspect_utils, 'create_ports_if_not_exist',
autospec=True)
def test_managed_fast_track_via_driver_info(
self, mock_create_ports_if_not_exist, mock_get_system,
mock_ironic_url, mock_client):
CONF.set_override('extra_kernel_params',
'ipa-inspection-collectors=default,logs '
'ipa-collect-dhcp=1',
group='inspector')
endpoint = 'http://192.169.0.42:5050/v1'
mock_ironic_url.return_value = 'http://192.169.0.42:6385'
mock_client.return_value.get_endpoint.return_value = endpoint
mock_introspect = mock_client.return_value.start_introspection
self.task.node.driver_info = {'fast_track': True}
self.iface.validate(self.task)
self.assertEqual(states.INSPECTWAIT,
self.iface.inspect_hardware(self.task))
mock_introspect.assert_called_once_with(self.node.uuid,
manage_boot=False)
self.driver.boot.prepare_ramdisk.assert_called_once_with(
self.task, ramdisk_params={
'ipa-inspection-callback-url': endpoint + '/continue',
'ipa-inspection-collectors': 'default,logs',
'ipa-collect-dhcp': '1',
'ipa-api-url': 'http://192.169.0.42:6385',
})
self.driver.network.add_inspection_network.assert_called_once_with(
self.task)
self.driver.power.set_power_state.assert_has_calls([
mock.call(self.task, states.POWER_OFF, timeout=None),
mock.call(self.task, states.POWER_ON, timeout=None),
])
self.assertFalse(self.driver.network.remove_inspection_network.called)
self.assertFalse(self.driver.boot.clean_up_ramdisk.called)
@mock.patch.object(task_manager, 'acquire', autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
@mock.patch.object(inspect_utils, 'create_ports_if_not_exist',
@ -406,6 +444,23 @@ class CheckStatusTestCase(BaseTestCase):
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def test_status_ok_managed_no_power_off_on_fast_track(self, mock_client):
CONF.set_override('fast_track', True, group='deploy')
utils.set_node_nested_field(self.node, 'driver_internal_info',
'inspector_manage_boot', True)
self.node.save()
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,
error=None,
spec=['is_finished', 'error'])
inspector._check_status(self.task)
mock_get.assert_called_once_with(self.node.uuid)
self.task.process_event.assert_called_once_with('done')
self.driver.network.remove_inspection_network.assert_called_once_with(
self.task)
self.driver.boot.clean_up_ramdisk.assert_called_once_with(self.task)
self.assertFalse(self.driver.power.set_power_state.called)
def test_status_error(self, mock_client):
mock_get = mock_client.return_value.get_introspection
mock_get.return_value = mock.Mock(is_finished=True,

View File

@ -0,0 +1,10 @@
---
features:
- |
Fast track mode can now be enabled or disabled per node::
baremetal node set <node> --driver-info fast_track=true
upgrade:
- |
The configuration option ``[inspector]power_off`` is now ignored for nodes
that have fast-track enabled. These nodes are never powered off.