Limit Inspection->Lookup->Heartbeat lag

Caches hardware information collected during inspection
so that the initial lookup can occur without any delay.

Also adds logging to track how long inventory collection takes.

Conflicts:
	ironic_python_agent/tests/unit/base.py

Co-Authored-By: Dmitry Tantsur <dtantsur@protonmail.com>
Change-Id: I3e0d237d37219e783d81913fa6cc490492b3f96a
(cherry picked from commit c76b8b2c21)
This commit is contained in:
Julia Kreger 2020-07-02 10:14:02 -07:00 committed by Dmitry Tantsur
parent e69f140551
commit 8ab527e72e
8 changed files with 72 additions and 6 deletions

View File

@ -409,6 +409,10 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
# Inspection should be started before call to lookup, otherwise # Inspection should be started before call to lookup, otherwise
# lookup will fail due to unknown MAC. # lookup will fail due to unknown MAC.
uuid = None uuid = None
# We can't try to inspect or heartbeat until we have valid
# interfaces to perform those actions over.
self._wait_for_interface()
if cfg.CONF.inspection_callback_url: if cfg.CONF.inspection_callback_url:
try: try:
# Attempt inspection. This may fail, and previously # Attempt inspection. This may fail, and previously
@ -418,10 +422,8 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
LOG.error('Failed to perform inspection: %s', e) LOG.error('Failed to perform inspection: %s', e)
if self.api_url: if self.api_url:
self._wait_for_interface()
content = self.api_client.lookup_node( content = self.api_client.lookup_node(
hardware_info=hardware.dispatch_to_managers( hardware_info=hardware.list_hardware_info(use_cache=True),
'list_hardware_info'),
timeout=self.lookup_timeout, timeout=self.lookup_timeout,
starting_interval=self.lookup_interval, starting_interval=self.lookup_interval,
node_uuid=uuid) node_uuid=uuid)
@ -473,6 +475,9 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
LOG.info('No ipa-api-url configured, Heartbeat and lookup ' LOG.info('No ipa-api-url configured, Heartbeat and lookup '
'skipped for inspector.') 'skipped for inspector.')
else: else:
# NOTE(TheJulia): Once communication flow capability is
# able to be driven solely from the conductor, this is no
# longer a major issue.
LOG.error('Neither ipa-api-url nor inspection_callback_url' LOG.error('Neither ipa-api-url nor inspection_callback_url'
'found, please check your pxe append parameters.') 'found, please check your pxe append parameters.')

View File

@ -670,6 +670,8 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
:return: a dictionary representing inventory :return: a dictionary representing inventory
""" """
start = time.time()
LOG.info('Collecting full inventory')
# NOTE(dtantsur): don't forget to update docs when extending inventory # NOTE(dtantsur): don't forget to update docs when extending inventory
hardware_info = {} hardware_info = {}
hardware_info['interfaces'] = self.list_network_interfaces() hardware_info['interfaces'] = self.list_network_interfaces()
@ -681,6 +683,7 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
hardware_info['system_vendor'] = self.get_system_vendor_info() hardware_info['system_vendor'] = self.get_system_vendor_info()
hardware_info['boot'] = self.get_boot_info() hardware_info['boot'] = self.get_boot_info()
hardware_info['hostname'] = netutils.get_hostname() hardware_info['hostname'] = netutils.get_hostname()
LOG.info('Inventory collected in %.2f second(s)', time.time() - start)
return hardware_info return hardware_info
def get_clean_steps(self, node, ports): def get_clean_steps(self, node, ports):
@ -2113,6 +2116,23 @@ def dispatch_to_managers(method, *args, **kwargs):
raise errors.HardwareManagerMethodNotFound(method) raise errors.HardwareManagerMethodNotFound(method)
_CACHED_HW_INFO = None
def list_hardware_info(use_cache=True):
"""List hardware information with caching."""
global _CACHED_HW_INFO
if _CACHED_HW_INFO is None:
_CACHED_HW_INFO = dispatch_to_managers('list_hardware_info')
return _CACHED_HW_INFO
if use_cache:
return _CACHED_HW_INFO
else:
return dispatch_to_managers('list_hardware_info')
def cache_node(node): def cache_node(node):
"""Store the node object in the hardware module. """Store the node object in the hardware module.

View File

@ -204,7 +204,7 @@ def collect_default(data, failures):
:param failures: AccumulatedFailures object :param failures: AccumulatedFailures object
""" """
wait_for_dhcp() wait_for_dhcp()
inventory = hardware.dispatch_to_managers('list_hardware_info') inventory = hardware.list_hardware_info()
data['inventory'] = inventory data['inventory'] = inventory
# Replicate the same logic as in deploy. We need to make sure that when # Replicate the same logic as in deploy. We need to make sure that when

View File

@ -23,6 +23,7 @@ from oslo_config import cfg
from oslo_config import fixture as config_fixture from oslo_config import fixture as config_fixture
from oslotest import base as test_base from oslotest import base as test_base
from ironic_python_agent import hardware
from ironic_python_agent import utils from ironic_python_agent import utils
CONF = cfg.CONF CONF = cfg.CONF
@ -58,6 +59,8 @@ class IronicAgentTest(test_base.BaseTestCase):
self.patch(subprocess, 'check_output', do_not_call) self.patch(subprocess, 'check_output', do_not_call)
self.patch(utils, 'execute', do_not_call) self.patch(utils, 'execute', do_not_call)
hardware._CACHED_HW_INFO = None
def _set_config(self): def _set_config(self):
self.cfg_fixture = self.useFixture(config_fixture.Config(CONF)) self.cfg_fixture = self.useFixture(config_fixture.Config(CONF))

View File

@ -517,7 +517,7 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
mock_inspector.assert_called_once_with() mock_inspector.assert_called_once_with()
self.assertFalse(mock_wait.called) self.assertTrue(mock_wait.called)
self.assertFalse(mock_dispatch.called) self.assertFalse(mock_dispatch.called)
@mock.patch('ironic_lib.mdns.get_endpoint', autospec=True) @mock.patch('ironic_lib.mdns.get_endpoint', autospec=True)
@ -572,7 +572,7 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
wsgi_server.start.assert_called_once_with() wsgi_server.start.assert_called_once_with()
self.assertFalse(mock_inspector.called) self.assertFalse(mock_inspector.called)
self.assertFalse(mock_wait.called) self.assertTrue(mock_wait.called)
self.assertFalse(mock_dispatch.called) self.assertFalse(mock_dispatch.called)
@mock.patch.object(time, 'time', autospec=True) @mock.patch.object(time, 'time', autospec=True)

View File

@ -4360,3 +4360,22 @@ class TestVersions(base.IronicAgentTest):
self.assertRaises(errors.VersionMismatch, self.assertRaises(errors.VersionMismatch,
hardware.check_versions, hardware.check_versions,
{'not_specific': '1'}) {'not_specific': '1'})
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
class TestListHardwareInfo(base.IronicAgentTest):
def test_caching(self, mock_dispatch):
fake_info = {'I am': 'hardware'}
mock_dispatch.return_value = fake_info
self.assertEqual(fake_info, hardware.list_hardware_info())
self.assertEqual(fake_info, hardware.list_hardware_info())
mock_dispatch.assert_called_once_with('list_hardware_info')
self.assertEqual(fake_info,
hardware.list_hardware_info(use_cache=False))
self.assertEqual(fake_info, hardware.list_hardware_info())
mock_dispatch.assert_called_with('list_hardware_info')
self.assertEqual(2, mock_dispatch.call_count)

View File

@ -252,6 +252,20 @@ class TestCollectDefault(BaseDiscoverTest):
mock_dispatch.assert_called_once_with('list_hardware_info') mock_dispatch.assert_called_once_with('list_hardware_info')
mock_wait_for_dhcp.assert_called_once_with() mock_wait_for_dhcp.assert_called_once_with()
def test_cache_hardware_info(self, mock_dispatch, mock_wait_for_dhcp,
mock_get_mgrs):
mgrs = [{'name': 'extra', 'version': '1.42'},
{'name': 'generic', 'version': '1.1'}]
mock_dispatch.return_value = self.inventory
mock_get_mgrs.return_value = [
mock.Mock(**{'get_version.return_value': item}) for item in mgrs
]
inspector.collect_default(self.data, self.failures)
inspector.collect_default(self.data, self.failures)
# Hardware is cached, so only one call is made
mock_dispatch.assert_called_once_with('list_hardware_info')
def test_no_root_disk(self, mock_dispatch, mock_wait_for_dhcp, def test_no_root_disk(self, mock_dispatch, mock_wait_for_dhcp,
mock_get_mgrs): mock_get_mgrs):
mgrs = [{'name': 'extra', 'version': '1.42'}, mgrs = [{'name': 'extra', 'version': '1.42'},

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Speeds up going from inspection to cleaning with fast-track enabled by
caching hardware information between the steps.