diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 3e74d350d..ac7e25d3f 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -16,6 +16,7 @@ import abc import functools import os import shlex +import time import netifaces from oslo_concurrency import processutils @@ -38,6 +39,9 @@ UNIT_CONVERTER = pint.UnitRegistry(filename=None) UNIT_CONVERTER.define('MB = []') UNIT_CONVERTER.define('GB = 1024 MB') +_DISK_WAIT_ATTEMPTS = 10 +_DISK_WAIT_DELAY = 3 + def _get_device_vendor(dev): """Get the vendor name of a given device.""" @@ -394,8 +398,27 @@ class GenericHardwareManager(HardwareManager): self.sys_path = '/sys' def evaluate_hardware_support(self): + # Do some initialization before we declare ourself ready + self._wait_for_disks() return HardwareSupport.GENERIC + def _wait_for_disks(self): + # Wait for at least one suitable disk to show up, otherwise neither + # inspection not deployment have any chances to succeed. + for attempt in range(_DISK_WAIT_ATTEMPTS): + try: + block_devices = self.list_block_devices() + utils.guess_root_disk(block_devices) + except errors.DeviceNotFound: + LOG.debug('Still waiting for at least one disk to appear, ' + 'attempt %d of %d', attempt + 1, _DISK_WAIT_ATTEMPTS) + time.sleep(_DISK_WAIT_DELAY) + else: + break + else: + LOG.warning('No disks detected in %d seconds', + _DISK_WAIT_DELAY * _DISK_WAIT_ATTEMPTS) + def _get_interface_info(self, interface_name): addr_path = '{0}/class/net/{1}/address'.format(self.sys_path, interface_name) diff --git a/ironic_python_agent/tests/unit/test_agent.py b/ironic_python_agent/tests/unit/test_agent.py index b51e68518..9885014a4 100644 --- a/ironic_python_agent/tests/unit/test_agent.py +++ b/ironic_python_agent/tests/unit/test_agent.py @@ -127,6 +127,8 @@ class TestHeartbeater(test_base.BaseTestCase): self.assertEqual(2.7, self.heartbeater.error_delay) +@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks', + lambda self: None) class TestBaseAgent(test_base.BaseTestCase): def setUp(self): @@ -294,6 +296,8 @@ class TestBaseAgent(test_base.BaseTestCase): self.agent.get_node_uuid) +@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks', + lambda self: None) class TestAgentStandalone(test_base.BaseTestCase): def setUp(self): @@ -338,6 +342,8 @@ class TestAgentStandalone(test_base.BaseTestCase): self.assertFalse(self.agent.api_client.lookup_node.called) +@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks', + lambda self: None) @mock.patch.object(socket, 'gethostbyname', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) class TestAdvertiseAddress(test_base.BaseTestCase): diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 5c0b60814..1aa04c624 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import time + import mock import netifaces -import os from oslo_concurrency import processutils from oslo_utils import units from oslotest import base as test_base @@ -1084,6 +1086,40 @@ class TestGenericHardwareManager(test_base.BaseTestCase): self.assertEqual('NEC', self.hardware.get_system_vendor_info().manufacturer) + @mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices', + autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + @mock.patch.object(utils, 'guess_root_disk', autospec=True) + def test_evaluate_hw_waits_for_disks(self, mocked_root_dev, mocked_sleep, + mocked_block_dev): + mocked_root_dev.side_effect = [ + errors.DeviceNotFound('boom'), + None + ] + + result = self.hardware.evaluate_hardware_support() + + self.assertEqual(hardware.HardwareSupport.GENERIC, result) + mocked_root_dev.assert_called_with(mocked_block_dev.return_value) + self.assertEqual(2, mocked_root_dev.call_count) + mocked_sleep.assert_called_once_with(hardware._DISK_WAIT_DELAY) + + @mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices', + autospec=True) + @mock.patch.object(time, 'sleep', autospec=True) + @mock.patch.object(utils, 'guess_root_disk', autospec=True) + def test_evaluate_hw_disks_timeout(self, mocked_root_dev, mocked_sleep, + mocked_block_dev): + mocked_root_dev.side_effect = errors.DeviceNotFound('boom') + + result = self.hardware.evaluate_hardware_support() + + self.assertEqual(hardware.HardwareSupport.GENERIC, result) + mocked_root_dev.assert_called_with(mocked_block_dev.return_value) + self.assertEqual(hardware._DISK_WAIT_ATTEMPTS, + mocked_root_dev.call_count) + mocked_sleep.assert_called_with(hardware._DISK_WAIT_DELAY) + @mock.patch.object(utils, 'execute', autospec=True) class TestModuleFunctions(test_base.BaseTestCase): diff --git a/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml b/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml new file mode 100644 index 000000000..3b5ea703d --- /dev/null +++ b/releasenotes/notes/disk-wait-2e0e85e0947f80e9.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - On start up wait up to 30 seconds for the first disk device suitable for + deployment to appear. This is to fix both inspection and deployment on + hardware that takes long to initialize (e.g. some RAID devices).