From 20c5894bc25389b7ea39a02e3e3853bc4cc41da7 Mon Sep 17 00:00:00 2001 From: Arne Wiebalck Date: Wed, 19 May 2021 18:37:45 +0200 Subject: [PATCH] Burn-in: Add disk step Add a clean step for disk burn-in via fio. Get basic run parameters from the node's driver_info. Story: #2007523 Task: #42384 Change-Id: I5f5e336bd629846b3d779fd0fc7a2060b385b035 --- doc/source/admin/hardware_managers.rst | 2 + ironic_python_agent/burnin.py | 37 +++++++++++++ ironic_python_agent/hardware.py | 15 +++++ ironic_python_agent/tests/unit/test_burnin.py | 55 +++++++++++++++++++ .../tests/unit/test_hardware.py | 7 +++ .../add_burnin_disk-12adb5735a41af47.yaml | 6 ++ 6 files changed, 122 insertions(+) create mode 100644 releasenotes/notes/add_burnin_disk-12adb5735a41af47.yaml diff --git a/doc/source/admin/hardware_managers.rst b/doc/source/admin/hardware_managers.rst index 2144cf9ff..4228dd853 100644 --- a/doc/source/admin/hardware_managers.rst +++ b/doc/source/admin/hardware_managers.rst @@ -77,6 +77,8 @@ Clean steps ``deploy.burnin_cpu`` Stress-test the CPUs of a node via stress-ng for a configurable amount of time. Disabled by default. +``deploy.burnin_disk`` + Stress-test the disks of a node via fio. Disabled by default. ``deploy.burnin_memory`` Stress-test the memory of a node via stress-ng for a configurable amount of time. Disabled by default. diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py index bd6545471..77f83ac41 100644 --- a/ironic_python_agent/burnin.py +++ b/ironic_python_agent/burnin.py @@ -15,6 +15,7 @@ from oslo_concurrency import processutils from oslo_log import log from ironic_python_agent import errors +from ironic_python_agent import hardware LOG = log.getLogger(__name__) @@ -78,3 +79,39 @@ def stress_ng_vm(node): {'err': e}) LOG.error(error_msg) raise errors.CommandExecutionError(error_msg) + + +def fio_disk(node): + """Burn-in the disks with fio + + Run an fio randrw job for a configurable number of iterations + or a given amount of time. + + :param node: Ironic node object + :raises: CommandExecutionError if the execution of fio fails. + """ + info = node.get('driver_info', {}) + # 4 iterations, same as badblock's default + loops = info.get('agent_burnin_fio_disk_loops', 4) + runtime = info.get('agent_burnin_fio_disk_runtime', 0) + + args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, + '--ioengine', 'libaio', '--iodepth', '32', '--verify', + 'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', + '--loops', loops, '--runtime', runtime, '--time_based'] + + devices = hardware.list_all_block_devices() + for device in devices: + args.extend(['--name', device.name]) + + LOG.debug('Burn-in fio disk command: %s', ' '.join(map(str, args))) + + try: + out, _ = utils.execute(*args) + # fio reports on stdout + LOG.info(out) + except (processutils.ProcessExecutionError, OSError) as e: + error_msg = ("fio (disk) failed with error %(err)s", + {'err': e}) + LOG.error(error_msg) + raise errors.CommandExecutionError(error_msg) diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 645d7219b..cc75c5ced 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -1402,6 +1402,14 @@ class GenericHardwareManager(HardwareManager): """ burnin.stress_ng_cpu(node) + def burnin_disk(self, node, ports): + """Burn-in the disk + + :param node: Ironic node object + :param ports: list of Ironic port objects + """ + burnin.fio_disk(node) + def burnin_memory(self, node, ports): """Burn-in the memory @@ -1890,6 +1898,13 @@ class GenericHardwareManager(HardwareManager): 'reboot_requested': False, 'abortable': True }, + { + 'step': 'burnin_disk', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True + }, { 'step': 'burnin_memory', 'priority': 0, diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py index 7f411b9fb..316aaebd5 100644 --- a/ironic_python_agent/tests/unit/test_burnin.py +++ b/ironic_python_agent/tests/unit/test_burnin.py @@ -17,6 +17,7 @@ from oslo_concurrency import processutils from ironic_python_agent import burnin from ironic_python_agent import errors +from ironic_python_agent import hardware from ironic_python_agent.tests.unit import base @@ -63,6 +64,7 @@ class TestBurnin(base.IronicAgentTest): burnin.stress_ng_vm(node) mock_execute.assert_called_once_with( + 'stress-ng', '--vm', 0, '--vm-bytes', '98%', '--timeout', 86400, '--metrics-brief') @@ -89,3 +91,56 @@ class TestBurnin(base.IronicAgentTest): self.assertRaises(errors.CommandExecutionError, burnin.stress_ng_vm, node) + + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + def test_fio_disk_default(self, mock_list, mock_execute): + + node = {'driver_info': {}} + + mock_list.return_value = [ + hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), + hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), + ] + mock_execute.return_value = (['out', 'err']) + + burnin.fio_disk(node) + + mock_execute.assert_called_once_with( + 'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, + '--ioengine', 'libaio', '--iodepth', '32', '--verify', + 'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', + '--loops', 4, '--runtime', 0, '--time_based', '--name', + '/dev/sdj', '--name', '/dev/hdaa') + + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + def test_fio_disk_no_default(self, mock_list, mock_execute): + + node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600, + 'agent_burnin_fio_disk_loops': 5}} + + mock_list.return_value = [ + hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), + hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), + ] + mock_execute.return_value = (['out', 'err']) + + burnin.fio_disk(node) + + mock_execute.assert_called_once_with( + 'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, + '--ioengine', 'libaio', '--iodepth', '32', '--verify', + 'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', + '--loops', 5, '--runtime', 600, '--time_based', '--name', + '/dev/sdj', '--name', '/dev/hdaa') + + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + def test_fio_disk_no_fio(self, mock_list, mock_execute): + + node = {'driver_info': {}} + mock_execute.side_effect = (['out', 'err'], + processutils.ProcessExecutionError()) + + burnin.fio_disk(node) + + self.assertRaises(errors.CommandExecutionError, + burnin.fio_disk, node) diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index a4739b65d..fbb8e6503 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -157,6 +157,13 @@ class TestGenericHardwareManager(base.IronicAgentTest): 'reboot_requested': False, 'abortable': True }, + { + 'step': 'burnin_disk', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True + }, { 'step': 'burnin_memory', 'priority': 0, diff --git a/releasenotes/notes/add_burnin_disk-12adb5735a41af47.yaml b/releasenotes/notes/add_burnin_disk-12adb5735a41af47.yaml new file mode 100644 index 000000000..7a96ced14 --- /dev/null +++ b/releasenotes/notes/add_burnin_disk-12adb5735a41af47.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Adds a burn-in cleaning step 'burnin_disk' to stress test disks for a + configurable number of iterations or a configurable amount of time with + fio. To use this step, stress-ng needs to be installed on the RAM disk.