Browse Source

Merge "Block ability update callback_url"

changes/26/694726/1
Zuul 2 weeks ago
parent
commit
d7e7abe63f
8 changed files with 107 additions and 15 deletions
  1. +13
    -0
      ironic/api/controllers/v1/ramdisk.py
  2. +20
    -1
      ironic/conductor/manager.py
  3. +15
    -0
      ironic/conductor/utils.py
  4. +12
    -0
      ironic/tests/unit/api/controllers/v1/test_ramdisk.py
  5. +28
    -10
      ironic/tests/unit/conductor/test_manager.py
  6. +7
    -1
      ironic/tests/unit/conductor/test_utils.py
  7. +2
    -3
      ironic/tests/unit/drivers/modules/test_agent_base_vendor.py
  8. +10
    -0
      releasenotes/notes/prevent-callback-url-from-being-updated-41d50b20fb236e82.yaml

+ 13
- 0
ironic/api/controllers/v1/ramdisk.py View File

@@ -184,6 +184,19 @@ class HeartbeatController(rest.RestController):
policy.authorize('baremetal:node:ipa_heartbeat', cdict, cdict)

rpc_node = api_utils.get_rpc_node_with_suffix(node_ident)
dii = rpc_node['driver_internal_info']
agent_url = dii.get('agent_url')
# If we have an agent_url on file, and we get something different
# we should fail because this is unexpected behavior of the agent.
if (agent_url is not None
and agent_url != callback_url):
LOG.error('Received heartbeat for node %(node)s with '
'callback URL %(url)s. This is not expected, '
'and the heartbeat will not be processed.',
{'node': rpc_node.uuid, 'url': callback_url})
raise exception.Invalid(
_('Detected change in ramdisk provided '
'"callback_url"'))

try:
topic = api.request.rpcapi.get_topic_for(rpc_node)

+ 20
- 1
ironic/conductor/manager.py View File

@@ -598,6 +598,8 @@ class ConductorManager(base_manager.BaseConductorManager):
node_id, purpose='node rescue') as task:

node = task.node
# Record of any pre-existing agent_url should be removed.
utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('rescuing'),
node=node.uuid)
@@ -697,6 +699,9 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, node_id,
purpose='node unrescue') as task:
node = task.node
# Record of any pre-existing agent_url should be removed,
# Not that there should be.
utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('unrescuing'),
node=node.uuid)
@@ -776,6 +781,7 @@ class ConductorManager(base_manager.BaseConductorManager):
info_message = _('Rescue operation aborted for node %s.') % node.uuid
last_error = _('By request, the rescue operation was aborted.')
node.refresh()
utils.remove_agent_url(node)
node.last_error = last_error
node.save()
LOG.info(info_message)
@@ -819,6 +825,8 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, node_id, shared=False,
purpose='node deployment') as task:
node = task.node
# Record of any pre-existing agent_url should be removed.
utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('provisioning'),
node=node.uuid)
@@ -972,6 +980,8 @@ class ConductorManager(base_manager.BaseConductorManager):

with task_manager.acquire(context, node_id, shared=False,
purpose='node tear down') as task:
# Record of any pre-existing agent_url should be removed.
utils.remove_agent_url(task.node)
if task.node.protected:
raise exception.NodeProtected(node=task.node.uuid)

@@ -1168,7 +1178,8 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, node_id, shared=False,
purpose='node manual cleaning') as task:
node = task.node

# Record of any pre-existing agent_url should be removed.
utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('cleaning'),
node=node.uuid)
@@ -1473,6 +1484,8 @@ class ConductorManager(base_manager.BaseConductorManager):
driver_internal_info.pop('clean_step_index', None)
driver_internal_info.pop('cleaning_reboot', None)
driver_internal_info.pop('cleaning_polling', None)
# Remove agent_url
driver_internal_info.pop('agent_url', None)
node.driver_internal_info = driver_internal_info
node.save()
try:
@@ -1562,6 +1575,7 @@ class ConductorManager(base_manager.BaseConductorManager):
info.pop('cleaning_reboot', None)
info.pop('cleaning_polling', None)
info.pop('skip_current_clean_step', None)
info.pop('agent_url', None)
node.driver_internal_info = info
node.save()
LOG.info(info_message)
@@ -3979,6 +3993,8 @@ def _do_next_deploy_step(task, step_index, conductor_id):
driver_internal_info.pop('deploy_step_index', None)
driver_internal_info.pop('deployment_reboot', None)
driver_internal_info.pop('deployment_polling', None)
# Remove the agent_url cached from the deployment.
driver_internal_info.pop('agent_url', None)
node.driver_internal_info = driver_internal_info
node.save()

@@ -4202,6 +4218,9 @@ def _do_inspect_hardware(task):
log_func("Failed to inspect node %(node)s: %(err)s",
{'node': node.uuid, 'err': e})

# Remove agent_url, while not strictly needed for the inspection path,
# lets just remove it out of good practice.
utils.remove_agent_url(node)
try:
new_state = task.driver.inspect.inspect_hardware(task)
except exception.IronicException as e:

+ 15
- 0
ironic/conductor/utils.py View File

@@ -417,6 +417,9 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
info.pop('cleaning_reboot', None)
info.pop('cleaning_polling', None)
info.pop('skip_current_clean_step', None)
# We don't need to keep the old agent URL
# as it should change upon the next cleaning attempt.
info.pop('agent_url', None)
node.driver_internal_info = info
# For manual cleaning, the target provision state is MANAGEABLE, whereas
# for automated cleaning, it is AVAILABLE.
@@ -477,6 +480,9 @@ def deploying_error_handler(task, logmsg, errmsg, traceback=False,
info.pop('deployment_reboot', None)
info.pop('deployment_polling', None)
info.pop('skip_current_deploy_step', None)
# Remove agent_url since it will be re-asserted
# upon the next deployment attempt.
info.pop('agent_url', None)
node.driver_internal_info = info

if cleanup_err:
@@ -521,6 +527,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True):
try:
node_power_action(task, states.POWER_OFF)
task.driver.rescue.clean_up(task)
remove_agent_url(node)
node.last_error = msg
except exception.IronicException as e:
node.last_error = (_('Rescue operation was unsuccessful, clean up '
@@ -535,6 +542,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True):
LOG.exception('Rescue failed for node %(node)s, an exception was '
'encountered while aborting.', {'node': node.uuid})
finally:
remove_agent_url(node)
node.save()

if set_fail_state:
@@ -914,3 +922,10 @@ def is_fast_track(task):
task.node.driver_internal_info.get('agent_last_heartbeat'),
CONF.deploy.fast_track_timeout)
and task.driver.power.get_power_state(task) == states.POWER_ON)


def remove_agent_url(node):
"""Helper to remove the agent_url record."""
info = node.driver_internal_info
info.pop('agent_url', None)
node.driver_internal_info = info

+ 12
- 0
ironic/tests/unit/api/controllers/v1/test_ramdisk.py View File

@@ -255,3 +255,15 @@ class TestHeartbeat(test_api_base.BaseApiTest):
headers={api_base.Version.string: '1.35'},
expect_errors=True)
self.assertEqual(http_client.BAD_REQUEST, response.status_int)

@mock.patch.object(rpcapi.ConductorAPI, 'heartbeat', autospec=True)
def test_heartbeat_rejects_different_callback_url(self, mock_heartbeat):
node = obj_utils.create_test_node(
self.context,
driver_internal_info={'agent_url': 'url'})
response = self.post_json(
'/heartbeat/%s' % node.uuid,
{'callback_url': 'url2'},
headers={api_base.Version.string: str(api_v1.max_version())},
expect_errors=True)
self.assertEqual(http_client.BAD_REQUEST, response.status_int)

+ 28
- 10
ironic/tests/unit/conductor/test_manager.py View File

@@ -1352,9 +1352,11 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
autospec=True) as mock_spawn:
mock_spawn.return_value = thread

node = obj_utils.create_test_node(self.context,
driver='fake-hardware',
provision_state=states.AVAILABLE)
node = obj_utils.create_test_node(
self.context,
driver='fake-hardware',
provision_state=states.AVAILABLE,
driver_internal_info={'agent_url': 'url'})

self.service.do_node_deploy(self.context, node.uuid)
self._stop_service()
@@ -1369,6 +1371,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
mock.ANY, None)
mock_iwdi.assert_called_once_with(self.context, node.instance_info)
self.assertFalse(node.driver_internal_info['is_whole_disk_image'])
self.assertNotIn('agent_url', node.driver_internal_info)

@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test_do_node_deploy_rebuild_active_state_old(self, mock_deploy,
@@ -2673,7 +2676,8 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
def test__do_next_deploy_step_all(self, mock_execute):
# Run all steps from start to finish (all synchronous)
driver_internal_info = {'deploy_step_index': None,
'deploy_steps': self.deploy_steps}
'deploy_steps': self.deploy_steps,
'agent_url': 'url'}
self._start_service()
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
@@ -2695,6 +2699,7 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
self.assertIsNone(node.driver_internal_info['deploy_steps'])
mock_execute.assert_has_calls = [mock.call(self.deploy_steps[0]),
mock.call(self.deploy_steps[1])]
self.assertNotIn('agent_url', node.driver_internal_info)

@mock.patch.object(conductor_utils, 'LOG', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
@@ -3869,7 +3874,8 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
self.context, driver='fake-hardware',
provision_state=states.CLEANING,
target_provision_state=states.AVAILABLE,
last_error=None)
last_error=None,
driver_internal_info={'agent_url': 'url'})
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
self.service._do_node_clean(task)
@@ -3879,6 +3885,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
# Assert that the node was cleaned
self.assertTrue(mock_validate.called)
self.assertIn('clean_steps', node.driver_internal_info)
self.assertNotIn('agent_url', node.driver_internal_info)

@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
autospec=True)
@@ -4614,7 +4621,8 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
task = self._create_task(
node_attrs=dict(driver='fake-hardware',
provision_state=states.ACTIVE,
instance_info={}))
instance_info={},
driver_internal_info={'agent_url': 'url'}))
mock_acquire.side_effect = self._get_acquire_side_effect(task)
self.service.do_node_rescue(self.context, task.node.uuid,
"password")
@@ -4624,6 +4632,7 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
call_args=(self.service._do_node_rescue, task),
err_handler=conductor_utils.spawn_rescue_error_handler)
self.assertIn('rescue_password', task.node.instance_info)
self.assertNotIn('agent_url', task.node.driver_internal_info)

def test_do_node_rescue_invalid_state(self):
self._start_service()
@@ -4755,9 +4764,12 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
self._start_service()
task = self._create_task(
node_attrs=dict(driver='fake-hardware',
provision_state=states.RESCUE))
provision_state=states.RESCUE,
driver_internal_info={'agent_url': 'url'}))
mock_acquire.side_effect = self._get_acquire_side_effect(task)
self.service.do_node_unrescue(self.context, task.node.uuid)
task.node.refresh()
self.assertNotIn('agent_url', task.node.driver_internal_info)
task.process_event.assert_called_once_with(
'unrescue',
callback=self.service._spawn_worker,
@@ -4891,12 +4903,14 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.RESCUEFAIL,
target_provision_state=states.RESCUE)
target_provision_state=states.RESCUE,
driver_internal_info={'agent_url': 'url'})
with task_manager.acquire(self.context, node.uuid) as task:
self.service._do_node_rescue_abort(task)
clean_up_mock.assert_called_once_with(task.driver.rescue, task)
self.assertIsNotNone(task.node.last_error)
self.assertFalse(task.node.maintenance)
self.assertNotIn('agent_url', task.node.driver_internal_info)

@mock.patch.object(fake.FakeRescue, 'clean_up', autospec=True)
def test__do_node_rescue_abort_clean_up_fail(self, clean_up_mock):
@@ -7887,8 +7901,10 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_ok(self, mock_inspect):
self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake-hardware',
provision_state=states.INSPECTING)
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.INSPECTING,
driver_internal_info={'agent_url': 'url'})
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.MANAGEABLE
manager._do_inspect_hardware(task)
@@ -7897,6 +7913,8 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
self.assertEqual(states.NOSTATE, node.target_provision_state)
self.assertIsNone(node.last_error)
mock_inspect.assert_called_once_with(mock.ANY)
task.node.refresh()
self.assertNotIn('agent_url', task.node.driver_internal_info)

@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_return_inspecting(self, mock_inspect):

+ 7
- 1
ironic/tests/unit/conductor/test_utils.py View File

@@ -920,6 +920,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
info['deployment_reboot'] = True
info['deployment_polling'] = True
info['skip_current_deploy_step'] = True
info['agent_url'] = 'url'
conductor_utils.deploying_error_handler(self.task, self.logmsg,
self.errmsg)

@@ -932,6 +933,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
self.assertNotIn('deployment_polling', self.node.driver_internal_info)
self.assertNotIn('skip_current_deploy_step',
self.node.driver_internal_info)
self.assertNotIn('agent_url', self.node.driver_internal_info)
self.task.process_event.assert_called_once_with('fail')

def _test_deploying_error_handler_cleanup(self, exc, expected_str):
@@ -1059,7 +1061,8 @@ class ErrorHandlersTestCase(tests_base.TestCase):
'cleaning_reboot': True,
'cleaning_polling': True,
'skip_current_clean_step': True,
'clean_step_index': 0}
'clean_step_index': 0,
'agent_url': 'url'}
msg = 'error bar'
conductor_utils.cleaning_error_handler(self.task, msg)
self.node.save.assert_called_once_with()
@@ -1080,6 +1083,7 @@ class ErrorHandlersTestCase(tests_base.TestCase):
else:
self.task.process_event.assert_called_once_with('fail',
target_state=None)
self.assertNotIn('agent_url', self.node.driver_internal_info)

def test_cleaning_error_handler(self):
self._test_cleaning_error_handler()
@@ -1254,12 +1258,14 @@ class ErrorHandlersTestCase(tests_base.TestCase):
def _test_rescuing_error_handler(self, node_power_mock,
set_state=True):
self.node.provision_state = states.RESCUEWAIT
self.node.driver_internal_info.update({'agent_url': 'url'})
conductor_utils.rescuing_error_handler(self.task,
'some exception for node',
set_fail_state=set_state)
node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF)
self.task.driver.rescue.clean_up.assert_called_once_with(self.task)
self.node.save.assert_called_once_with()
self.assertNotIn('agent_url', self.node.driver_internal_info)
if set_state:
self.assertTrue(self.task.process_event.called)
else:

+ 2
- 3
ironic/tests/unit/drivers/modules/test_agent_base_vendor.py View File

@@ -269,9 +269,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
shared=True) as task:
self.deploy.heartbeat(task, agent_url, '3.2.0')
self.assertFalse(task.shared)
self.assertEqual(
agent_url,
task.node.driver_internal_info['agent_url'])
self.assertIsNone(
task.node.driver_internal_info.get('agent_url', None))
self.assertEqual(
'3.2.0',
task.node.driver_internal_info['agent_version'])

+ 10
- 0
releasenotes/notes/prevent-callback-url-from-being-updated-41d50b20fb236e82.yaml View File

@@ -0,0 +1,10 @@
---
security:
- |
Prevents additional updates of an agent ``callback_url`` through the agent
heartbeat ``/v1/heartbeat/<node_uuid>`` endpoint as the ``callback_url``
should remain stable through the cleaning, provisioning, or rescue
processes. Should anything such as an unexpected agent reboot cause the
``callback_url``, heartbeat operations will now be ignored.
More information can be found at
`story 2006773 <https://storyboard.openstack.org/#!/story/2006773>`_.

Loading…
Cancel
Save