Burn-in: Add disk step

Add a clean step for disk burn-in via fio. Get basic
run parameters from the node's driver_info.

Story: #2007523
Task: #42384

Change-Id: I5f5e336bd629846b3d779fd0fc7a2060b385b035
This commit is contained in:
Arne Wiebalck 2021-05-19 18:37:45 +02:00
parent 6fc5a14760
commit 20c5894bc2
6 changed files with 122 additions and 0 deletions

View File

@ -77,6 +77,8 @@ Clean steps
``deploy.burnin_cpu``
Stress-test the CPUs of a node via stress-ng for a configurable
amount of time. Disabled by default.
``deploy.burnin_disk``
Stress-test the disks of a node via fio. Disabled by default.
``deploy.burnin_memory``
Stress-test the memory of a node via stress-ng for a configurable
amount of time. Disabled by default.

View File

@ -15,6 +15,7 @@ from oslo_concurrency import processutils
from oslo_log import log
from ironic_python_agent import errors
from ironic_python_agent import hardware
LOG = log.getLogger(__name__)
@ -78,3 +79,39 @@ def stress_ng_vm(node):
{'err': e})
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)
def fio_disk(node):
"""Burn-in the disks with fio
Run an fio randrw job for a configurable number of iterations
or a given amount of time.
:param node: Ironic node object
:raises: CommandExecutionError if the execution of fio fails.
"""
info = node.get('driver_info', {})
# 4 iterations, same as badblock's default
loops = info.get('agent_burnin_fio_disk_loops', 4)
runtime = info.get('agent_burnin_fio_disk_runtime', 0)
args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', loops, '--runtime', runtime, '--time_based']
devices = hardware.list_all_block_devices()
for device in devices:
args.extend(['--name', device.name])
LOG.debug('Burn-in fio disk command: %s', ' '.join(map(str, args)))
try:
out, _ = utils.execute(*args)
# fio reports on stdout
LOG.info(out)
except (processutils.ProcessExecutionError, OSError) as e:
error_msg = ("fio (disk) failed with error %(err)s",
{'err': e})
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)

View File

@ -1402,6 +1402,14 @@ class GenericHardwareManager(HardwareManager):
"""
burnin.stress_ng_cpu(node)
def burnin_disk(self, node, ports):
"""Burn-in the disk
:param node: Ironic node object
:param ports: list of Ironic port objects
"""
burnin.fio_disk(node)
def burnin_memory(self, node, ports):
"""Burn-in the memory
@ -1890,6 +1898,13 @@ class GenericHardwareManager(HardwareManager):
'reboot_requested': False,
'abortable': True
},
{
'step': 'burnin_disk',
'priority': 0,
'interface': 'deploy',
'reboot_requested': False,
'abortable': True
},
{
'step': 'burnin_memory',
'priority': 0,

View File

@ -17,6 +17,7 @@ from oslo_concurrency import processutils
from ironic_python_agent import burnin
from ironic_python_agent import errors
from ironic_python_agent import hardware
from ironic_python_agent.tests.unit import base
@ -63,6 +64,7 @@ class TestBurnin(base.IronicAgentTest):
burnin.stress_ng_vm(node)
mock_execute.assert_called_once_with(
'stress-ng', '--vm', 0, '--vm-bytes', '98%',
'--timeout', 86400, '--metrics-brief')
@ -89,3 +91,56 @@ class TestBurnin(base.IronicAgentTest):
self.assertRaises(errors.CommandExecutionError,
burnin.stress_ng_vm, node)
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_default(self, mock_list, mock_execute):
node = {'driver_info': {}}
mock_list.return_value = [
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
hardware.BlockDevice('/dev/hdaa', 'small', 65535, False),
]
mock_execute.return_value = (['out', 'err'])
burnin.fio_disk(node)
mock_execute.assert_called_once_with(
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', 4, '--runtime', 0, '--time_based', '--name',
'/dev/sdj', '--name', '/dev/hdaa')
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_no_default(self, mock_list, mock_execute):
node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600,
'agent_burnin_fio_disk_loops': 5}}
mock_list.return_value = [
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
hardware.BlockDevice('/dev/hdaa', 'small', 65535, False),
]
mock_execute.return_value = (['out', 'err'])
burnin.fio_disk(node)
mock_execute.assert_called_once_with(
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', 5, '--runtime', 600, '--time_based', '--name',
'/dev/sdj', '--name', '/dev/hdaa')
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_no_fio(self, mock_list, mock_execute):
node = {'driver_info': {}}
mock_execute.side_effect = (['out', 'err'],
processutils.ProcessExecutionError())
burnin.fio_disk(node)
self.assertRaises(errors.CommandExecutionError,
burnin.fio_disk, node)

View File

@ -157,6 +157,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
'reboot_requested': False,
'abortable': True
},
{
'step': 'burnin_disk',
'priority': 0,
'interface': 'deploy',
'reboot_requested': False,
'abortable': True
},
{
'step': 'burnin_memory',
'priority': 0,

View File

@ -0,0 +1,6 @@
---
features:
- |
Adds a burn-in cleaning step 'burnin_disk' to stress test disks for a
configurable number of iterations or a configurable amount of time with
fio. To use this step, stress-ng needs to be installed on the RAM disk.