Update the cache if we don't have a root device hint

Or at least try to.

Some deployments just don't use root device hints, and this is okay.

However, other deployments need root device hints, and with fast
track mode in ramdisks, we created a situation where the node cache
could be updated by a human or software between the time the agent
was started, and the deployment was requested.

As a result, the agent has been updated to check if we have a hint
and if we don't, update the cache from the node lookup endpoint.

This is not needed when the inband deploy steps are executed, as
the process of updating the steps does force the node cache to be
updated.

Change-Id: I27201319f31cdc01605a3c5ae9ef4b4218e4a3f6
Story: 2008039
Task: 40701
This commit is contained in:
Julia Kreger 2020-08-19 18:44:39 -07:00
parent ba6ca246f5
commit d3c3d4dabe
8 changed files with 161 additions and 20 deletions

View File

@ -472,6 +472,10 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
node_uuid=uuid)
LOG.debug('Received lookup results: %s', content)
self.process_lookup_data(content)
# Save the API url in case we need it later.
hardware.save_api_client(
self.api_client, self.lookup_timeout,
self.lookup_interval)
elif cfg.CONF.inspection_callback_url:
LOG.info('No ipa-api-url configured, Heartbeat and lookup '

View File

@ -185,7 +185,8 @@ class ISCSIExtension(base.BaseAgentExtension):
if iqn is None:
iqn = 'iqn.2008-10.org.openstack:%s' % uuidutils.generate_uuid()
device = hardware.dispatch_to_managers('get_os_install_device')
device = hardware.dispatch_to_managers('get_os_install_device',
permit_refresh=True)
if wipe_disk_metadata:
disk_utils.destroy_disk_metadata(

View File

@ -627,7 +627,8 @@ class StandbyExtension(base.BaseAgentExtension):
:raises: ImageWriteError if writing the image fails.
"""
LOG.debug('Caching image %s', image_info['id'])
device = hardware.dispatch_to_managers('get_os_install_device')
device = hardware.dispatch_to_managers('get_os_install_device',
permit_refresh=True)
msg = 'image ({}) already present on device {} '
@ -669,7 +670,8 @@ class StandbyExtension(base.BaseAgentExtension):
large to store on the given device.
"""
LOG.debug('Preparing image %s', image_info['id'])
device = hardware.dispatch_to_managers('get_os_install_device')
device = hardware.dispatch_to_managers('get_os_install_device',
permit_refresh=True)
disk_format = image_info.get('disk_format')
stream_raw_images = image_info.get('stream_raw_images', False)

View File

@ -53,7 +53,9 @@ UNIT_CONVERTER.define('bytes = []')
UNIT_CONVERTER.define('MB = 1048576 bytes')
_MEMORY_ID_RE = re.compile(r'^memory(:\d+)?$')
NODE = None
API_CLIENT = None
API_LOOKUP_TIMEOUT = None
API_LOOKUP_INTERVAL = None
SUPPORTED_SOFTWARE_RAID_LEVELS = frozenset(['0', '1', '1+0', '5', '6'])
RAID_APPLY_CONFIGURATION_ARGSINFO = {
@ -470,6 +472,40 @@ def list_all_block_devices(block_type='disk',
return devices
def save_api_client(client=None, timeout=None, interval=None):
"""Preserves access to the API client for potential later re-use."""
global API_CLIENT, API_LOOKUP_TIMEOUT, API_LOOKUP_INTERVAL
if client and timeout and interval and not API_CLIENT:
API_CLIENT = client
API_LOOKUP_TIMEOUT = timeout
API_LOOKUP_INTERVAL = interval
def update_cached_node():
"""Attmepts to update the node cache via the API"""
cached_node = get_cached_node()
if API_CLIENT:
LOG.info('Agent is requesting to perform an explicit node cache '
'update. This is to pickup any chanages in the cache '
'before deployment.')
try:
if cached_node is None:
uuid = None
else:
uuid = cached_node['uuid']
content = API_CLIENT.lookup_node(
hardware_info=list_hardware_info(use_cache=True),
timeout=API_LOOKUP_TIMEOUT,
starting_interval=API_LOOKUP_INTERVAL,
uuid=uuid)
cache_node(content['node'])
return content['node']
except Exception as exc:
LOG.warning('Failed to update node cache. Error %s', exc)
return cached_node
class HardwareSupport(object):
"""Example priorities for hardware managers.
@ -597,7 +633,7 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
def get_memory(self):
raise errors.IncompatibleHardwareMethodError
def get_os_install_device(self):
def get_os_install_device(self, permit_refresh=False):
raise errors.IncompatibleHardwareMethodError
def get_bmc_address(self):
@ -1052,13 +1088,18 @@ class GenericHardwareManager(HardwareManager):
)
return block_devices
def get_os_install_device(self):
def get_os_install_device(self, permit_refresh=False):
cached_node = get_cached_node()
root_device_hints = None
if cached_node is not None:
root_device_hints = (
cached_node['instance_info'].get('root_device')
or cached_node['properties'].get('root_device'))
if permit_refresh and not root_device_hints:
cached_node = update_cached_node()
root_device_hints = (
cached_node['instance_info'].get('root_device')
or cached_node['properties'].get('root_device'))
LOG.debug('Looking for a device matching root hints %s',
root_device_hints)

View File

@ -69,7 +69,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest):
'--op', 'bind', '--tid', '1',
'--initiator-address', 'ALL')]
mock_execute.assert_has_calls(expected)
mock_dispatch.assert_called_once_with('get_os_install_device')
mock_dispatch.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertEqual({'iscsi_target_iqn': self.fake_iqn},
result.command_result)
self.assertFalse(mock_destroy.called)
@ -98,7 +99,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest):
'--op', 'bind', '--tid', '1',
'--initiator-address', 'ALL')]
mock_execute.assert_has_calls(expected)
mock_dispatch.assert_called_once_with('get_os_install_device')
mock_dispatch.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertEqual({'iscsi_target_iqn': self.fake_iqn},
result.command_result)
@ -119,7 +121,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest):
'--op', 'show', attempts=10)]
mock_execute.assert_has_calls(expected)
mock_dispatch.assert_called_once_with('get_os_install_device')
mock_dispatch.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertFalse(mock_destroy.called)
@mock.patch.object(iscsi, '_wait_for_tgtd', autospec=True)
@ -138,7 +141,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest):
'target', '--op', 'new', '--tid', '1',
'--targetname', self.fake_iqn)]
mock_execute.assert_has_calls(expected)
mock_dispatch.assert_called_once_with('get_os_install_device')
mock_dispatch.assert_called_once_with('get_os_install_device',
permit_refresh=True)
def test_start_iscsi_target_fail_command_not_exist(self, mock_execute,
mock_dispatch,

View File

@ -569,7 +569,8 @@ class TestStandbyExtension(base.IronicAgentTest):
async_result.join()
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertEqual(image_info['id'],
self.agent_extension.cached_image_id)
self.assertEqual('SUCCEEDED', async_result.command_status)
@ -595,7 +596,8 @@ class TestStandbyExtension(base.IronicAgentTest):
async_result.join()
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertEqual(image_info['id'],
self.agent_extension.cached_image_id)
self.assertEqual('SUCCEEDED', async_result.command_status)
@ -626,7 +628,8 @@ class TestStandbyExtension(base.IronicAgentTest):
async_result.join()
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertEqual(image_info['id'],
self.agent_extension.cached_image_id)
self.assertEqual('SUCCEEDED', async_result.command_status)
@ -654,7 +657,8 @@ class TestStandbyExtension(base.IronicAgentTest):
async_result.join()
self.assertFalse(download_mock.called)
self.assertFalse(write_mock.called)
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertEqual(image_info['id'],
self.agent_extension.cached_image_id)
self.assertEqual('SUCCEEDED', async_result.command_status)
@ -699,7 +703,8 @@ class TestStandbyExtension(base.IronicAgentTest):
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
configdrive_copy_mock.assert_called_once_with(image_info['node_uuid'],
'manager',
'configdrive_data')
@ -749,7 +754,8 @@ class TestStandbyExtension(base.IronicAgentTest):
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertFalse(configdrive_copy_mock.called)
self.assertEqual('SUCCEEDED', async_result.command_status)
@ -821,7 +827,8 @@ class TestStandbyExtension(base.IronicAgentTest):
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertEqual(0, configdrive_copy_mock.call_count)
self.assertEqual('SUCCEEDED', async_result.command_status)
@ -871,7 +878,8 @@ class TestStandbyExtension(base.IronicAgentTest):
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
self.assertFalse(configdrive_copy_mock.called)
self.assertEqual('FAILED', async_result.command_status)
@ -913,7 +921,8 @@ class TestStandbyExtension(base.IronicAgentTest):
download_mock.assert_called_once_with(image_info)
write_mock.assert_called_once_with(image_info, 'manager')
dispatch_mock.assert_called_once_with('get_os_install_device')
dispatch_mock.assert_called_once_with('get_os_install_device',
permit_refresh=True)
configdrive_copy_mock.assert_called_once_with(image_info['node_uuid'],
'manager',
'configdrive_data')
@ -967,7 +976,8 @@ class TestStandbyExtension(base.IronicAgentTest):
)
async_result.join()
dispatch_mock.assert_any_call('get_os_install_device')
dispatch_mock.assert_any_call('get_os_install_device',
permit_refresh=True)
self.assertFalse(configdrive_copy_mock.called)
# Assert we've streamed the image or not

View File

@ -1467,6 +1467,46 @@ class TestGenericHardwareManager(base.IronicAgentTest):
mock_cached_node.assert_called_once_with()
mock_dev.assert_called_once_with()
@mock.patch.object(hardware, 'update_cached_node', autospec=True)
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
@mock.patch.object(hardware, 'get_cached_node', autospec=True)
def test_get_os_install_device_no_root_device(self, mock_cached_node,
mock_dev,
mock_update):
mock_cached_node.return_value = {'properties': {},
'uuid': 'node1',
'instance_info': {}}
mock_dev.return_value = [
hardware.BlockDevice(name='/dev/sda',
model='TinyUSB Drive',
size=3116853504,
rotational=False,
vendor='Super Vendor',
wwn='wwn0',
wwn_with_extension='wwn0ven0',
wwn_vendor_extension='ven0',
serial='serial0'),
hardware.BlockDevice(name='/dev/sdb',
model='magical disk',
size=10737418240,
rotational=True,
vendor='fake-vendor',
wwn='fake-wwn',
wwn_with_extension='fake-wwnven0',
wwn_vendor_extension='ven0',
serial='fake-serial',
by_path='/dev/disk/by-path/1:0:0:0'),
]
mock_update.return_value = {'properties': {'root_device':
{'name': '/dev/sda'}},
'uuid': 'node1',
'instance_info': {'magic': 'value'}}
self.assertEqual('/dev/sda',
self.hardware.get_os_install_device(
permit_refresh=True))
self.assertEqual(1, mock_cached_node.call_count)
mock_dev.assert_called_once_with()
def test__get_device_info(self):
fileobj = mock.mock_open(read_data='fake-vendor')
with mock.patch(
@ -4468,3 +4508,33 @@ class TestListHardwareInfo(base.IronicAgentTest):
self.assertEqual(fake_info, hardware.list_hardware_info())
mock_dispatch.assert_called_with('list_hardware_info')
self.assertEqual(2, mock_dispatch.call_count)
class TestAPIClientSaveAndUse(base.IronicAgentTest):
def test_save_api_client(self):
hardware.API_CLIENT = None
mock_api_client = mock.Mock()
hardware.save_api_client(mock_api_client, 1, 2)
self.assertEqual(mock_api_client, hardware.API_CLIENT)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
@mock.patch.object(hardware, 'get_cached_node', autospec=True)
def test_update_node_cache(self, mock_cached_node, mock_dispatch):
mock_cached_node.return_value = {'uuid': 'node1'}
updated_node = {'uuid': 'node1', 'other': 'key'}
hardware.API_CLIENT = None
mock_api_client = mock.Mock()
hardware.save_api_client(mock_api_client, 1, 2)
mock_api_client.lookup_node.return_value = {'node': updated_node}
self.assertEqual(updated_node, hardware.update_cached_node())
mock_api_client.lookup_node.assert_called_with(
hardware_info=mock.ANY,
timeout=1,
starting_interval=2,
uuid='node1')
self.assertEqual(updated_node, hardware.NODE)
calls = [mock.call('list_hardware_info'),
mock.call('wait_for_disks')]
mock_dispatch.assert_has_calls(calls)

View File

@ -0,0 +1,9 @@
---
fixes:
- |
Fixes an issue with nodes undergoing fast-track from introspection to
deployment where the agent internal cache of the node may be stale.
In particular, this can be observed if node does not honor a root device
hint which is saved to Ironic's API *after* the agent was started.
More information can be found in `story 2008039
<https://storyboard.openstack.org/#!/story/2008039>`_.