diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index 670a3a61e..d5312dabb 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -494,6 +494,15 @@ class IronicPythonAgent(base.ExecuteCommandMixin): self.node = content['node'] LOG.info('Lookup succeeded, node UUID is %s', self.node['uuid']) + + # Store skip_bmc_detect flag in the node before caching + # This allows hardware managers to check if BMC detection is needed + skip_bmc = content.get('config', {}).get('agent_skip_bmc_detect', + False) + if skip_bmc: + LOG.info('Ironic has indicated BMC detection should be skipped') + self.node['skip_bmc_detect'] = True + hardware.cache_node(self.node) self.heartbeat_timeout = content['config']['heartbeat_timeout'] diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 801d45f65..92287a9db 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -1151,18 +1151,52 @@ class HardwareManager(object, metaclass=abc.ABCMeta): hardware_info['cpu'] = self.get_cpus() hardware_info['disks'] = self.list_block_devices() hardware_info['memory'] = self.get_memory() - hardware_info['bmc_address'] = self.get_bmc_address() - hardware_info['bmc_v6address'] = self.get_bmc_v6address() + + # Check if Ironic has indicated BMC detection should be skipped + # This is set after lookup when using out-of-band + # management like Redfish + cached_node = get_cached_node() + skip_bmc = (cached_node and cached_node.get('skip_bmc_detect', + False)) + + if skip_bmc: + LOG.info('Skipping BMC detection as requested by Ironic') + hardware_info['bmc_address'] = None + hardware_info['bmc_v6address'] = None + else: + # Cache BMC information to avoid repeated expensive ipmitool calls + if not hasattr(self, '_bmc_cache'): + LOG.debug('Detecting BMC information (first time)') + self._bmc_cache = { + 'bmc_address': self.get_bmc_address(), + 'bmc_v6address': self.get_bmc_v6address(), + 'bmc_mac': None # Populated below + } + else: + LOG.debug('Using cached BMC information') + + hardware_info['bmc_address'] = self._bmc_cache['bmc_address'] + hardware_info['bmc_v6address'] = self._bmc_cache['bmc_v6address'] + hardware_info['system_vendor'] = self.get_system_vendor_info() hardware_info['boot'] = self.get_boot_info() hardware_info['hostname'] = netutils.get_hostname() - try: - hardware_info['bmc_mac'] = self.get_bmc_mac() - except errors.IncompatibleHardwareMethodError: - # if the hardware manager does not support obtaining the BMC MAC, - # we simply don't expose it. - pass + if not skip_bmc: + # Try to get BMC MAC, which may not be cached yet + if self._bmc_cache['bmc_mac'] is None: + try: + self._bmc_cache['bmc_mac'] = self.get_bmc_mac() + LOG.debug('Cached BMC MAC address') + except errors.IncompatibleHardwareMethodError: + # if the hardware manager does not support obtaining + # the BMC MAC, we simply don't expose it. + # Mark as unavailable to avoid retrying + self._bmc_cache['bmc_mac'] = 'unavailable' + + # Only add to hardware_info if we successfully got it + if self._bmc_cache['bmc_mac'] not in (None, 'unavailable'): + hardware_info['bmc_mac'] = self._bmc_cache['bmc_mac'] LOG.info('Inventory collected in %.2f second(s)', time.time() - start) return hardware_info diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 5d09caf3b..a8131d617 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -1509,6 +1509,133 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.assertEqual('mock_hostname', hardware_info['hostname']) mocked_lshw.assert_called_once_with(self.hardware) + @mock.patch.object(hardware, 'get_cached_node', autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + '_get_system_lshw_dict', autospec=True, + return_value={'id': 'host'}) + @mock.patch.object(netutils, 'get_hostname', autospec=True) + def test_list_hardware_info_with_bmc_caching( + self, mocked_get_hostname, mocked_lshw, mocked_get_node): + """Test that BMC information is cached after first detection.""" + mocked_get_node.return_value = None # No skip_bmc_detect flag + + # Mock all the hardware info methods + self.hardware.list_network_interfaces = mock.Mock(return_value=[]) + self.hardware.get_cpus = mock.Mock() + self.hardware.get_memory = mock.Mock() + self.hardware.list_block_devices = mock.Mock(return_value=[]) + self.hardware.get_boot_info = mock.Mock() + self.hardware.get_system_vendor_info = mock.Mock() + mocked_get_hostname.return_value = 'mock_hostname' + + # Mock BMC methods + self.hardware.get_bmc_address = mock.Mock(return_value='192.168.1.1') + self.hardware.get_bmc_v6address = mock.Mock(return_value='fe80::1') + self.hardware.get_bmc_mac = mock.Mock(return_value='aa:bb:cc:dd:ee:ff') + + # First call - should call BMC detection methods + hardware_info1 = self.hardware.list_hardware_info() + self.assertEqual('192.168.1.1', hardware_info1['bmc_address']) + self.assertEqual('fe80::1', hardware_info1['bmc_v6address']) + self.assertEqual('aa:bb:cc:dd:ee:ff', hardware_info1['bmc_mac']) + + # Verify BMC methods were called + self.hardware.get_bmc_address.assert_called_once() + self.hardware.get_bmc_v6address.assert_called_once() + self.hardware.get_bmc_mac.assert_called_once() + + # Second call - should use cached values + hardware_info2 = self.hardware.list_hardware_info() + self.assertEqual('192.168.1.1', hardware_info2['bmc_address']) + self.assertEqual('fe80::1', hardware_info2['bmc_v6address']) + self.assertEqual('aa:bb:cc:dd:ee:ff', hardware_info2['bmc_mac']) + + # BMC methods should NOT be called again (still only once) + self.hardware.get_bmc_address.assert_called_once() + self.hardware.get_bmc_v6address.assert_called_once() + self.hardware.get_bmc_mac.assert_called_once() + + @mock.patch.object(hardware, 'get_cached_node', autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + '_get_system_lshw_dict', autospec=True, + return_value={'id': 'host'}) + @mock.patch.object(netutils, 'get_hostname', autospec=True) + def test_list_hardware_info_skip_bmc_detect( + self, mocked_get_hostname, mocked_lshw, mocked_get_node): + """Test that BMC detection is skipped when flag is set.""" + mocked_get_node.return_value = {'skip_bmc_detect': True} + + # Mock all the hardware info methods + self.hardware.list_network_interfaces = mock.Mock(return_value=[]) + self.hardware.get_cpus = mock.Mock() + self.hardware.get_memory = mock.Mock() + self.hardware.list_block_devices = mock.Mock(return_value=[]) + self.hardware.get_boot_info = mock.Mock() + self.hardware.get_system_vendor_info = mock.Mock() + mocked_get_hostname.return_value = 'mock_hostname' + + # Mock BMC methods - these should NOT be called + self.hardware.get_bmc_address = mock.Mock() + self.hardware.get_bmc_v6address = mock.Mock() + self.hardware.get_bmc_mac = mock.Mock() + + # Call list_hardware_info + hardware_info = self.hardware.list_hardware_info() + + # BMC info should be None + self.assertIsNone(hardware_info['bmc_address']) + self.assertIsNone(hardware_info['bmc_v6address']) + self.assertNotIn('bmc_mac', hardware_info) + + # BMC detection methods should NOT have been called + self.hardware.get_bmc_address.assert_not_called() + self.hardware.get_bmc_v6address.assert_not_called() + self.hardware.get_bmc_mac.assert_not_called() + + @mock.patch.object(hardware, 'get_cached_node', autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + '_get_system_lshw_dict', autospec=True, + return_value={'id': 'host'}) + @mock.patch.object(netutils, 'get_hostname', autospec=True) + def test_list_hardware_info_bmc_mac_unavailable( + self, mocked_get_hostname, mocked_lshw, mocked_get_node): + """Test BMC MAC marked unavailable when not supported.""" + mocked_get_node.return_value = None + + # Mock all the hardware info methods + self.hardware.list_network_interfaces = mock.Mock(return_value=[]) + self.hardware.get_cpus = mock.Mock() + self.hardware.get_memory = mock.Mock() + self.hardware.list_block_devices = mock.Mock(return_value=[]) + self.hardware.get_boot_info = mock.Mock() + self.hardware.get_system_vendor_info = mock.Mock() + mocked_get_hostname.return_value = 'mock_hostname' + + # Mock BMC methods + self.hardware.get_bmc_address = mock.Mock(return_value='192.168.1.1') + self.hardware.get_bmc_v6address = mock.Mock(return_value='fe80::1') + # BMC MAC raises IncompatibleHardwareMethodError + self.hardware.get_bmc_mac = mock.Mock( + side_effect=errors.IncompatibleHardwareMethodError) + + # First call + hardware_info1 = self.hardware.list_hardware_info() + self.assertEqual('192.168.1.1', hardware_info1['bmc_address']) + self.assertEqual('fe80::1', hardware_info1['bmc_v6address']) + self.assertNotIn('bmc_mac', hardware_info1) # Not in output + + # Verify get_bmc_mac was called once + self.hardware.get_bmc_mac.assert_called_once() + + # Second call - get_bmc_mac should NOT be called again + hardware_info2 = self.hardware.list_hardware_info() + self.assertEqual('192.168.1.1', hardware_info2['bmc_address']) + self.assertEqual('fe80::1', hardware_info2['bmc_v6address']) + self.assertNotIn('bmc_mac', hardware_info2) + + # Still only one call (cached as 'unavailable') + self.hardware.get_bmc_mac.assert_called_once() + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) def test_list_block_devices(self, list_mock): device = hardware.BlockDevice('/dev/hdaa', 'small', 65535, False) diff --git a/releasenotes/notes/skip-bmc-detection-with-oob-management-7920f722d53974dc.yaml b/releasenotes/notes/skip-bmc-detection-with-oob-management-7920f722d53974dc.yaml new file mode 100644 index 000000000..37a2b915f --- /dev/null +++ b/releasenotes/notes/skip-bmc-detection-with-oob-management-7920f722d53974dc.yaml @@ -0,0 +1,13 @@ +--- +features: + - | + Adds support for automatically skipping BMC detection via ipmitool + when Ironic indicates the node uses out-of-band management interfaces + (e.g., Redfish, iDRAC Redfish, iLO, iRMC). This reduces deployment + time and avoids unnecessary ipmitool calls when the BMC information + is already known to Ironic. + - | + BMC information (address, v6address, and MAC) is now cached after the + first detection to avoid repeated expensive ipmitool calls during + subsequent inventory collections. This improves performance during + heartbeats and long-running operations like cleaning or rescue mode.