Burn-in: Add disk step
Add a clean step for disk burn-in via fio. Get basic run parameters from the node's driver_info. Story: #2007523 Task: #42384 Change-Id: I5f5e336bd629846b3d779fd0fc7a2060b385b035
This commit is contained in:
		| @@ -77,6 +77,8 @@ Clean steps | |||||||
| ``deploy.burnin_cpu`` | ``deploy.burnin_cpu`` | ||||||
|     Stress-test the CPUs of a node via stress-ng for a configurable |     Stress-test the CPUs of a node via stress-ng for a configurable | ||||||
|     amount of time. Disabled by default. |     amount of time. Disabled by default. | ||||||
|  | ``deploy.burnin_disk`` | ||||||
|  |     Stress-test the disks of a node via fio. Disabled by default. | ||||||
| ``deploy.burnin_memory`` | ``deploy.burnin_memory`` | ||||||
|     Stress-test the memory of a node via stress-ng for a configurable |     Stress-test the memory of a node via stress-ng for a configurable | ||||||
|     amount of time. Disabled by default. |     amount of time. Disabled by default. | ||||||
|   | |||||||
| @@ -15,6 +15,7 @@ from oslo_concurrency import processutils | |||||||
| from oslo_log import log | from oslo_log import log | ||||||
|  |  | ||||||
| from ironic_python_agent import errors | from ironic_python_agent import errors | ||||||
|  | from ironic_python_agent import hardware | ||||||
|  |  | ||||||
| LOG = log.getLogger(__name__) | LOG = log.getLogger(__name__) | ||||||
|  |  | ||||||
| @@ -78,3 +79,39 @@ def stress_ng_vm(node): | |||||||
|                      {'err': e}) |                      {'err': e}) | ||||||
|         LOG.error(error_msg) |         LOG.error(error_msg) | ||||||
|         raise errors.CommandExecutionError(error_msg) |         raise errors.CommandExecutionError(error_msg) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def fio_disk(node): | ||||||
|  |     """Burn-in the disks with fio | ||||||
|  |  | ||||||
|  |     Run an fio randrw job for a configurable number of iterations | ||||||
|  |     or a given amount of time. | ||||||
|  |  | ||||||
|  |     :param node: Ironic node object | ||||||
|  |     :raises: CommandExecutionError if the execution of fio fails. | ||||||
|  |     """ | ||||||
|  |     info = node.get('driver_info', {}) | ||||||
|  |     # 4 iterations, same as badblock's default | ||||||
|  |     loops = info.get('agent_burnin_fio_disk_loops', 4) | ||||||
|  |     runtime = info.get('agent_burnin_fio_disk_runtime', 0) | ||||||
|  |  | ||||||
|  |     args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, | ||||||
|  |             '--ioengine', 'libaio', '--iodepth', '32', '--verify', | ||||||
|  |             'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', | ||||||
|  |             '--loops', loops, '--runtime', runtime, '--time_based'] | ||||||
|  |  | ||||||
|  |     devices = hardware.list_all_block_devices() | ||||||
|  |     for device in devices: | ||||||
|  |         args.extend(['--name', device.name]) | ||||||
|  |  | ||||||
|  |     LOG.debug('Burn-in fio disk command: %s', ' '.join(map(str, args))) | ||||||
|  |  | ||||||
|  |     try: | ||||||
|  |         out, _ = utils.execute(*args) | ||||||
|  |         # fio reports on stdout | ||||||
|  |         LOG.info(out) | ||||||
|  |     except (processutils.ProcessExecutionError, OSError) as e: | ||||||
|  |         error_msg = ("fio (disk) failed with error %(err)s", | ||||||
|  |                      {'err': e}) | ||||||
|  |         LOG.error(error_msg) | ||||||
|  |         raise errors.CommandExecutionError(error_msg) | ||||||
|   | |||||||
| @@ -1402,6 +1402,14 @@ class GenericHardwareManager(HardwareManager): | |||||||
|         """ |         """ | ||||||
|         burnin.stress_ng_cpu(node) |         burnin.stress_ng_cpu(node) | ||||||
|  |  | ||||||
|  |     def burnin_disk(self, node, ports): | ||||||
|  |         """Burn-in the disk | ||||||
|  |  | ||||||
|  |         :param node: Ironic node object | ||||||
|  |         :param ports: list of Ironic port objects | ||||||
|  |         """ | ||||||
|  |         burnin.fio_disk(node) | ||||||
|  |  | ||||||
|     def burnin_memory(self, node, ports): |     def burnin_memory(self, node, ports): | ||||||
|         """Burn-in the memory |         """Burn-in the memory | ||||||
|  |  | ||||||
| @@ -1890,6 +1898,13 @@ class GenericHardwareManager(HardwareManager): | |||||||
|                 'reboot_requested': False, |                 'reboot_requested': False, | ||||||
|                 'abortable': True |                 'abortable': True | ||||||
|             }, |             }, | ||||||
|  |             { | ||||||
|  |                 'step': 'burnin_disk', | ||||||
|  |                 'priority': 0, | ||||||
|  |                 'interface': 'deploy', | ||||||
|  |                 'reboot_requested': False, | ||||||
|  |                 'abortable': True | ||||||
|  |             }, | ||||||
|             { |             { | ||||||
|                 'step': 'burnin_memory', |                 'step': 'burnin_memory', | ||||||
|                 'priority': 0, |                 'priority': 0, | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ from oslo_concurrency import processutils | |||||||
|  |  | ||||||
| from ironic_python_agent import burnin | from ironic_python_agent import burnin | ||||||
| from ironic_python_agent import errors | from ironic_python_agent import errors | ||||||
|  | from ironic_python_agent import hardware | ||||||
| from ironic_python_agent.tests.unit import base | from ironic_python_agent.tests.unit import base | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -63,6 +64,7 @@ class TestBurnin(base.IronicAgentTest): | |||||||
|         burnin.stress_ng_vm(node) |         burnin.stress_ng_vm(node) | ||||||
|  |  | ||||||
|         mock_execute.assert_called_once_with( |         mock_execute.assert_called_once_with( | ||||||
|  |  | ||||||
|             'stress-ng', '--vm', 0, '--vm-bytes', '98%', |             'stress-ng', '--vm', 0, '--vm-bytes', '98%', | ||||||
|             '--timeout', 86400, '--metrics-brief') |             '--timeout', 86400, '--metrics-brief') | ||||||
|  |  | ||||||
| @@ -89,3 +91,56 @@ class TestBurnin(base.IronicAgentTest): | |||||||
|  |  | ||||||
|         self.assertRaises(errors.CommandExecutionError, |         self.assertRaises(errors.CommandExecutionError, | ||||||
|                           burnin.stress_ng_vm, node) |                           burnin.stress_ng_vm, node) | ||||||
|  |  | ||||||
|  |     @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) | ||||||
|  |     def test_fio_disk_default(self, mock_list, mock_execute): | ||||||
|  |  | ||||||
|  |         node = {'driver_info': {}} | ||||||
|  |  | ||||||
|  |         mock_list.return_value = [ | ||||||
|  |             hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), | ||||||
|  |             hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), | ||||||
|  |         ] | ||||||
|  |         mock_execute.return_value = (['out', 'err']) | ||||||
|  |  | ||||||
|  |         burnin.fio_disk(node) | ||||||
|  |  | ||||||
|  |         mock_execute.assert_called_once_with( | ||||||
|  |             'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, | ||||||
|  |             '--ioengine', 'libaio', '--iodepth', '32', '--verify', | ||||||
|  |             'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', | ||||||
|  |             '--loops', 4, '--runtime', 0, '--time_based', '--name', | ||||||
|  |             '/dev/sdj', '--name', '/dev/hdaa') | ||||||
|  |  | ||||||
|  |     @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) | ||||||
|  |     def test_fio_disk_no_default(self, mock_list, mock_execute): | ||||||
|  |  | ||||||
|  |         node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600, | ||||||
|  |                                 'agent_burnin_fio_disk_loops': 5}} | ||||||
|  |  | ||||||
|  |         mock_list.return_value = [ | ||||||
|  |             hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), | ||||||
|  |             hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), | ||||||
|  |         ] | ||||||
|  |         mock_execute.return_value = (['out', 'err']) | ||||||
|  |  | ||||||
|  |         burnin.fio_disk(node) | ||||||
|  |  | ||||||
|  |         mock_execute.assert_called_once_with( | ||||||
|  |             'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, | ||||||
|  |             '--ioengine', 'libaio', '--iodepth', '32', '--verify', | ||||||
|  |             'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', | ||||||
|  |             '--loops', 5, '--runtime', 600, '--time_based', '--name', | ||||||
|  |             '/dev/sdj', '--name', '/dev/hdaa') | ||||||
|  |  | ||||||
|  |     @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) | ||||||
|  |     def test_fio_disk_no_fio(self, mock_list, mock_execute): | ||||||
|  |  | ||||||
|  |         node = {'driver_info': {}} | ||||||
|  |         mock_execute.side_effect = (['out', 'err'], | ||||||
|  |                                     processutils.ProcessExecutionError()) | ||||||
|  |  | ||||||
|  |         burnin.fio_disk(node) | ||||||
|  |  | ||||||
|  |         self.assertRaises(errors.CommandExecutionError, | ||||||
|  |                           burnin.fio_disk, node) | ||||||
|   | |||||||
| @@ -157,6 +157,13 @@ class TestGenericHardwareManager(base.IronicAgentTest): | |||||||
|                 'reboot_requested': False, |                 'reboot_requested': False, | ||||||
|                 'abortable': True |                 'abortable': True | ||||||
|             }, |             }, | ||||||
|  |             { | ||||||
|  |                 'step': 'burnin_disk', | ||||||
|  |                 'priority': 0, | ||||||
|  |                 'interface': 'deploy', | ||||||
|  |                 'reboot_requested': False, | ||||||
|  |                 'abortable': True | ||||||
|  |             }, | ||||||
|             { |             { | ||||||
|                 'step': 'burnin_memory', |                 'step': 'burnin_memory', | ||||||
|                 'priority': 0, |                 'priority': 0, | ||||||
|   | |||||||
							
								
								
									
										6
									
								
								releasenotes/notes/add_burnin_disk-12adb5735a41af47.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								releasenotes/notes/add_burnin_disk-12adb5735a41af47.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | |||||||
|  | --- | ||||||
|  | features: | ||||||
|  |   - | | ||||||
|  |     Adds a burn-in cleaning step 'burnin_disk' to stress test disks for a | ||||||
|  |     configurable number of iterations or a configurable amount of time with | ||||||
|  |     fio. To use this step, stress-ng needs to be installed on the RAM disk. | ||||||
		Reference in New Issue
	
	Block a user
	 Arne Wiebalck
					Arne Wiebalck