Burn-in: Add options for named log files

In order to ease logging of the various burn-in steps, this patch
proposes options to define the outpout files for all burn-in steps:
{'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network',
'agent_burnin_fio_disk'}_outputfile  via a node's driver-info.

Story: #2007523
Task: #44102

Change-Id: I327cae5949d38e738d3c535487b3795d00ad8f1e
This commit is contained in:
Arne Wiebalck 2021-12-01 12:22:43 +01:00
parent 87a42cc887
commit e751218059
3 changed files with 117 additions and 21 deletions

View File

@ -38,9 +38,13 @@ def stress_ng_cpu(node):
info = node.get('driver_info', {})
cpu = info.get('agent_burnin_cpu_cpu', 0)
timeout = info.get('agent_burnin_cpu_timeout', 86400)
outputfile = info.get('agent_burnin_cpu_outputfile', None)
args = ('stress-ng', '--cpu', cpu, '--timeout', timeout,
'--metrics-brief')
if outputfile:
args += ('--log-file', outputfile,)
LOG.debug('Burn-in stress_ng_cpu command: %s', args)
try:
@ -69,9 +73,13 @@ def stress_ng_vm(node):
vm = info.get('agent_burnin_vm_vm', 0)
vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%')
timeout = info.get('agent_burnin_vm_timeout', 86400)
outputfile = info.get('agent_burnin_vm_outputfile', None)
args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes,
'--timeout', timeout, '--metrics-brief')
if outputfile:
args += ('--log-file', outputfile,)
LOG.debug('Burn-in stress_ng_vm command: %s', args)
try:
@ -97,11 +105,14 @@ def fio_disk(node):
# 4 iterations, same as badblock's default
loops = info.get('agent_burnin_fio_disk_loops', 4)
runtime = info.get('agent_burnin_fio_disk_runtime', 0)
outputfile = info.get('agent_burnin_fio_disk_outputfile', None)
args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', loops, '--runtime', runtime, '--time_based']
if outputfile:
args.extend(['--output-format', 'json', '--output', outputfile])
devices = hardware.list_all_block_devices()
for device in devices:
@ -119,7 +130,7 @@ def fio_disk(node):
raise errors.CommandExecutionError(error_msg)
def _do_fio_network(writer, runtime, partner):
def _do_fio_network(writer, runtime, partner, outputfile):
args = ['fio', '--ioengine', 'net', '--port', '9000', '--fill_device', 1,
'--group_reporting', '--gtod_reduce', 1, '--numjobs', 16]
@ -129,6 +140,8 @@ def _do_fio_network(writer, runtime, partner):
else:
xargs = ['--name', 'reader', '--rw', 'read', '--hostname', partner]
args.extend(xargs)
if outputfile:
args.extend(['--output-format', 'json', '--output', outputfile])
while True:
LOG.info('Burn-in fio network command: %s', ' '.join(map(str, args)))
@ -137,13 +150,17 @@ def _do_fio_network(writer, runtime, partner):
# fio reports on stdout
LOG.info(out)
break
except (processutils.ProcessExecutionError, OSError) as e:
except processutils.ProcessExecutionError as e:
error_msg = "fio (network) failed with error %s" % e
LOG.error(error_msg)
# while the writer blocks in fio, the reader fails with
if writer:
raise errors.CommandExecutionError(error_msg)
# While the writer blocks in fio, the reader fails with
# 'Connection {refused, timeout}' errors if the partner
# is not ready, so we need to wait explicitly
if not writer and 'Connection' in str(e):
# is not ready, so we need to wait explicitly. Using the
# exit code accounts for both, logging to stderr as well
# as to a file.
if e.exit_code == 16:
LOG.info("fio (network): reader retrying in %s seconds ...",
NETWORK_READER_CYCLE)
time.sleep(NETWORK_READER_CYCLE)
@ -171,6 +188,7 @@ def fio_network(node):
info = node.get('driver_info', {})
runtime = info.get('agent_burnin_fio_network_runtime', 21600)
outputfile = info.get('agent_burnin_fio_network_outputfile', None)
# get our role and identify our partner
config = info.get('agent_burnin_fio_network_config')
@ -190,6 +208,14 @@ def fio_network(node):
error_msg = ("fio (network) failed to find partner")
raise errors.CleaningError(error_msg)
_do_fio_network(role == 'writer', runtime, partner)
logfilename = None
if outputfile:
logfilename = outputfile + '.' + role
_do_fio_network(role == 'writer', runtime, partner, logfilename)
LOG.debug("fio (network): first direction done, swapping roles ...")
_do_fio_network(not role == 'writer', runtime, partner)
if outputfile:
irole = "reader" if (role == "writer") else "writer"
logfilename = outputfile + '.' + irole
_do_fio_network(not role == 'writer', runtime, partner, logfilename)

View File

@ -36,14 +36,17 @@ class TestBurnin(base.IronicAgentTest):
def test_stress_ng_cpu_non_default(self, mock_execute):
node = {'driver_info': {'agent_burnin_cpu_cpu': 3,
'agent_burnin_cpu_timeout': 2911}}
node = {'driver_info': {
'agent_burnin_cpu_cpu': 3,
'agent_burnin_cpu_timeout': 2911,
'agent_burnin_cpu_outputfile': '/var/log/burnin.cpu'}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_cpu(node)
mock_execute.assert_called_once_with(
'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief')
'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief',
'--log-file', '/var/log/burnin.cpu')
def test_stress_ng_cpu_no_stress_ng(self, mock_execute):
@ -70,16 +73,19 @@ class TestBurnin(base.IronicAgentTest):
def test_stress_ng_vm_non_default(self, mock_execute):
node = {'driver_info': {'agent_burnin_vm_vm': 2,
'agent_burnin_vm_vm-bytes': '25%',
'agent_burnin_vm_timeout': 120}}
node = {'driver_info': {
'agent_burnin_vm_vm': 2,
'agent_burnin_vm_vm-bytes': '25%',
'agent_burnin_vm_timeout': 120,
'agent_burnin_vm_outputfile': '/var/log/burnin.vm'}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_vm(node)
mock_execute.assert_called_once_with(
'stress-ng', '--vm', 2, '--vm-bytes', '25%',
'--timeout', 120, '--metrics-brief')
'--timeout', 120, '--metrics-brief',
'--log-file', '/var/log/burnin.vm')
def test_stress_ng_vm_no_stress_ng(self, mock_execute):
@ -115,8 +121,10 @@ class TestBurnin(base.IronicAgentTest):
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_no_default(self, mock_list, mock_execute):
node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600,
'agent_burnin_fio_disk_loops': 5}}
node = {'driver_info': {
'agent_burnin_fio_disk_runtime': 600,
'agent_burnin_fio_disk_loops': 5,
'agent_burnin_fio_disk_outputfile': '/var/log/burnin.disk'}}
mock_list.return_value = [
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
@ -130,8 +138,9 @@ class TestBurnin(base.IronicAgentTest):
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', 5, '--runtime', 600, '--time_based', '--name',
'/dev/sdj', '--name', '/dev/hdaa')
'--loops', 5, '--runtime', 600, '--time_based', '--output-format',
'json', '--output', '/var/log/burnin.disk', '--name', '/dev/sdj',
'--name', '/dev/hdaa', )
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_no_fio(self, mock_list, mock_execute):
@ -167,6 +176,33 @@ class TestBurnin(base.IronicAgentTest):
'--listen')]
mock_execute.assert_has_calls(expected_calls)
def test_fio_network_reader_w_logfile(self, mock_execute):
node = {'driver_info': {
'agent_burnin_fio_network_runtime': 600,
'agent_burnin_fio_network_config':
{'partner': 'host-002',
'role': 'reader'},
'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
mock_execute.return_value = (['out', 'err'])
burnin.fio_network(node)
expected_calls = [
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name',
'reader', '--rw', 'read', '--hostname', 'host-002',
'--output-format', 'json', '--output',
'/var/log/burnin.network.reader'),
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
'--rw', 'write', '--runtime', 600, '--time_based',
'--listen', '--output-format', 'json', '--output',
'/var/log/burnin.network.writer')]
mock_execute.assert_has_calls(expected_calls)
def test_fio_network_writer(self, mock_execute):
node = {'driver_info': {'agent_burnin_fio_network_runtime': 600,
@ -189,6 +225,33 @@ class TestBurnin(base.IronicAgentTest):
'reader', '--rw', 'read', '--hostname', 'host-001')]
mock_execute.assert_has_calls(expected_calls)
def test_fio_network_writer_w_logfile(self, mock_execute):
node = {'driver_info': {
'agent_burnin_fio_network_runtime': 600,
'agent_burnin_fio_network_config':
{'partner': 'host-001',
'role': 'writer'},
'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
mock_execute.return_value = (['out', 'err'])
burnin.fio_network(node)
expected_calls = [
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
'--rw', 'write', '--runtime', 600, '--time_based',
'--listen', '--output-format', 'json', '--output',
'/var/log/burnin.network.writer'),
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name',
'reader', '--rw', 'read', '--hostname', 'host-001',
'--output-format', 'json', '--output',
'/var/log/burnin.network.reader')]
mock_execute.assert_has_calls(expected_calls)
def test_fio_network_no_fio(self, mock_execute):
node = {'driver_info': {'agent_burnin_fio_network_config':
@ -226,11 +289,11 @@ class TestBurnin(base.IronicAgentTest):
{'partner': 'host-004', 'role': 'reader'}}}
# mock the infinite loop
mock_execute.side_effect = (processutils.ProcessExecutionError(
'Connection timeout'),
'Connection timeout', exit_code=16),
processutils.ProcessExecutionError(
'Connection timeout'),
'Connection timeout', exit_code=16),
processutils.ProcessExecutionError(
'Connection refused'),
'Connection refused', exit_code=16),
['out', 'err'], # connected!
['out', 'err']) # reversed roles

View File

@ -0,0 +1,7 @@
---
features:
- |
Add options to have named output files for the burn-in logging:
{'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network',
'agent_burnin_fio_disk'}_outputfile. This should ease collecting
the output of the burn-in steps for analysis.