Limit Inspection->Lookup->Heartbeat lag

Caches hardware information collected during inspection
so that the initial lookup can occur without any delay.

Also adds logging to track how long inventory collection takes.

Co-Authored-By: Dmitry Tantsur <dtantsur@protonmail.com>
Change-Id: I3e0d237d37219e783d81913fa6cc490492b3f96a
This commit is contained in:
Julia Kreger 2020-07-02 10:14:02 -07:00 committed by Dmitry Tantsur
parent a08029a44e
commit c76b8b2c21
8 changed files with 71 additions and 6 deletions

View File

@ -403,6 +403,10 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
# Inspection should be started before call to lookup, otherwise
# lookup will fail due to unknown MAC.
uuid = None
# We can't try to inspect or heartbeat until we have valid
# interfaces to perform those actions over.
self._wait_for_interface()
if cfg.CONF.inspection_callback_url:
try:
# Attempt inspection. This may fail, and previously
@ -412,10 +416,8 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
LOG.error('Failed to perform inspection: %s', e)
if self.api_url:
self._wait_for_interface()
content = self.api_client.lookup_node(
hardware_info=hardware.dispatch_to_managers(
'list_hardware_info'),
hardware_info=hardware.list_hardware_info(use_cache=True),
timeout=self.lookup_timeout,
starting_interval=self.lookup_interval,
node_uuid=uuid)
@ -467,6 +469,9 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
LOG.info('No ipa-api-url configured, Heartbeat and lookup '
'skipped for inspector.')
else:
# NOTE(TheJulia): Once communication flow capability is
# able to be driven solely from the conductor, this is no
# longer a major issue.
LOG.error('Neither ipa-api-url nor inspection_callback_url'
'found, please check your pxe append parameters.')

View File

@ -692,6 +692,8 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
:return: a dictionary representing inventory
"""
start = time.time()
LOG.info('Collecting full inventory')
# NOTE(dtantsur): don't forget to update docs when extending inventory
hardware_info = {}
hardware_info['interfaces'] = self.list_network_interfaces()
@ -703,6 +705,7 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
hardware_info['system_vendor'] = self.get_system_vendor_info()
hardware_info['boot'] = self.get_boot_info()
hardware_info['hostname'] = netutils.get_hostname()
LOG.info('Inventory collected in %.2f second(s)', time.time() - start)
return hardware_info
def get_clean_steps(self, node, ports):
@ -2136,6 +2139,23 @@ def dispatch_to_managers(method, *args, **kwargs):
raise errors.HardwareManagerMethodNotFound(method)
_CACHED_HW_INFO = None
def list_hardware_info(use_cache=True):
"""List hardware information with caching."""
global _CACHED_HW_INFO
if _CACHED_HW_INFO is None:
_CACHED_HW_INFO = dispatch_to_managers('list_hardware_info')
return _CACHED_HW_INFO
if use_cache:
return _CACHED_HW_INFO
else:
return dispatch_to_managers('list_hardware_info')
def cache_node(node):
"""Store the node object in the hardware module.

View File

@ -204,7 +204,7 @@ def collect_default(data, failures):
:param failures: AccumulatedFailures object
"""
wait_for_dhcp()
inventory = hardware.dispatch_to_managers('list_hardware_info')
inventory = hardware.list_hardware_info()
data['inventory'] = inventory
# Replicate the same logic as in deploy. We need to make sure that when

View File

@ -24,6 +24,7 @@ from oslo_config import fixture as config_fixture
from oslotest import base as test_base
from ironic_python_agent.extensions import base as ext_base
from ironic_python_agent import hardware
from ironic_python_agent import utils
CONF = cfg.CONF
@ -60,6 +61,7 @@ class IronicAgentTest(test_base.BaseTestCase):
self.patch(utils, 'execute', do_not_call)
ext_base._EXT_MANAGER = None
hardware._CACHED_HW_INFO = None
def _set_config(self):
self.cfg_fixture = self.useFixture(config_fixture.Config(CONF))

View File

@ -517,7 +517,7 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
mock_inspector.assert_called_once_with()
self.assertFalse(mock_wait.called)
self.assertTrue(mock_wait.called)
self.assertFalse(mock_dispatch.called)
@mock.patch('ironic_lib.mdns.get_endpoint', autospec=True)
@ -572,7 +572,7 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
wsgi_server.start.assert_called_once_with()
self.assertFalse(mock_inspector.called)
self.assertFalse(mock_wait.called)
self.assertTrue(mock_wait.called)
self.assertFalse(mock_dispatch.called)
@mock.patch.object(time, 'time', autospec=True)

View File

@ -4356,3 +4356,22 @@ class TestVersions(base.IronicAgentTest):
self.assertRaises(errors.VersionMismatch,
hardware.check_versions,
{'not_specific': '1'})
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
class TestListHardwareInfo(base.IronicAgentTest):
def test_caching(self, mock_dispatch):
fake_info = {'I am': 'hardware'}
mock_dispatch.return_value = fake_info
self.assertEqual(fake_info, hardware.list_hardware_info())
self.assertEqual(fake_info, hardware.list_hardware_info())
mock_dispatch.assert_called_once_with('list_hardware_info')
self.assertEqual(fake_info,
hardware.list_hardware_info(use_cache=False))
self.assertEqual(fake_info, hardware.list_hardware_info())
mock_dispatch.assert_called_with('list_hardware_info')
self.assertEqual(2, mock_dispatch.call_count)

View File

@ -252,6 +252,20 @@ class TestCollectDefault(BaseDiscoverTest):
mock_dispatch.assert_called_once_with('list_hardware_info')
mock_wait_for_dhcp.assert_called_once_with()
def test_cache_hardware_info(self, mock_dispatch, mock_wait_for_dhcp,
mock_get_mgrs):
mgrs = [{'name': 'extra', 'version': '1.42'},
{'name': 'generic', 'version': '1.1'}]
mock_dispatch.return_value = self.inventory
mock_get_mgrs.return_value = [
mock.Mock(**{'get_version.return_value': item}) for item in mgrs
]
inspector.collect_default(self.data, self.failures)
inspector.collect_default(self.data, self.failures)
# Hardware is cached, so only one call is made
mock_dispatch.assert_called_once_with('list_hardware_info')
def test_no_root_disk(self, mock_dispatch, mock_wait_for_dhcp,
mock_get_mgrs):
mgrs = [{'name': 'extra', 'version': '1.42'},

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Speeds up going from inspection to cleaning with fast-track enabled by
caching hardware information between the steps.