Burn-in: Add options for named log files
In order to ease logging of the various burn-in steps, this patch proposes options to define the outpout files for all burn-in steps: {'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network', 'agent_burnin_fio_disk'}_outputfile via a node's driver-info. Story: #2007523 Task: #44102 Change-Id: I327cae5949d38e738d3c535487b3795d00ad8f1e
This commit is contained in:
parent
87a42cc887
commit
e751218059
@ -38,9 +38,13 @@ def stress_ng_cpu(node):
|
||||
info = node.get('driver_info', {})
|
||||
cpu = info.get('agent_burnin_cpu_cpu', 0)
|
||||
timeout = info.get('agent_burnin_cpu_timeout', 86400)
|
||||
outputfile = info.get('agent_burnin_cpu_outputfile', None)
|
||||
|
||||
args = ('stress-ng', '--cpu', cpu, '--timeout', timeout,
|
||||
'--metrics-brief')
|
||||
if outputfile:
|
||||
args += ('--log-file', outputfile,)
|
||||
|
||||
LOG.debug('Burn-in stress_ng_cpu command: %s', args)
|
||||
|
||||
try:
|
||||
@ -69,9 +73,13 @@ def stress_ng_vm(node):
|
||||
vm = info.get('agent_burnin_vm_vm', 0)
|
||||
vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%')
|
||||
timeout = info.get('agent_burnin_vm_timeout', 86400)
|
||||
outputfile = info.get('agent_burnin_vm_outputfile', None)
|
||||
|
||||
args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes,
|
||||
'--timeout', timeout, '--metrics-brief')
|
||||
if outputfile:
|
||||
args += ('--log-file', outputfile,)
|
||||
|
||||
LOG.debug('Burn-in stress_ng_vm command: %s', args)
|
||||
|
||||
try:
|
||||
@ -97,11 +105,14 @@ def fio_disk(node):
|
||||
# 4 iterations, same as badblock's default
|
||||
loops = info.get('agent_burnin_fio_disk_loops', 4)
|
||||
runtime = info.get('agent_burnin_fio_disk_runtime', 0)
|
||||
outputfile = info.get('agent_burnin_fio_disk_outputfile', None)
|
||||
|
||||
args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
|
||||
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
|
||||
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
|
||||
'--loops', loops, '--runtime', runtime, '--time_based']
|
||||
if outputfile:
|
||||
args.extend(['--output-format', 'json', '--output', outputfile])
|
||||
|
||||
devices = hardware.list_all_block_devices()
|
||||
for device in devices:
|
||||
@ -119,7 +130,7 @@ def fio_disk(node):
|
||||
raise errors.CommandExecutionError(error_msg)
|
||||
|
||||
|
||||
def _do_fio_network(writer, runtime, partner):
|
||||
def _do_fio_network(writer, runtime, partner, outputfile):
|
||||
|
||||
args = ['fio', '--ioengine', 'net', '--port', '9000', '--fill_device', 1,
|
||||
'--group_reporting', '--gtod_reduce', 1, '--numjobs', 16]
|
||||
@ -129,6 +140,8 @@ def _do_fio_network(writer, runtime, partner):
|
||||
else:
|
||||
xargs = ['--name', 'reader', '--rw', 'read', '--hostname', partner]
|
||||
args.extend(xargs)
|
||||
if outputfile:
|
||||
args.extend(['--output-format', 'json', '--output', outputfile])
|
||||
|
||||
while True:
|
||||
LOG.info('Burn-in fio network command: %s', ' '.join(map(str, args)))
|
||||
@ -137,13 +150,17 @@ def _do_fio_network(writer, runtime, partner):
|
||||
# fio reports on stdout
|
||||
LOG.info(out)
|
||||
break
|
||||
except (processutils.ProcessExecutionError, OSError) as e:
|
||||
except processutils.ProcessExecutionError as e:
|
||||
error_msg = "fio (network) failed with error %s" % e
|
||||
LOG.error(error_msg)
|
||||
# while the writer blocks in fio, the reader fails with
|
||||
if writer:
|
||||
raise errors.CommandExecutionError(error_msg)
|
||||
# While the writer blocks in fio, the reader fails with
|
||||
# 'Connection {refused, timeout}' errors if the partner
|
||||
# is not ready, so we need to wait explicitly
|
||||
if not writer and 'Connection' in str(e):
|
||||
# is not ready, so we need to wait explicitly. Using the
|
||||
# exit code accounts for both, logging to stderr as well
|
||||
# as to a file.
|
||||
if e.exit_code == 16:
|
||||
LOG.info("fio (network): reader retrying in %s seconds ...",
|
||||
NETWORK_READER_CYCLE)
|
||||
time.sleep(NETWORK_READER_CYCLE)
|
||||
@ -171,6 +188,7 @@ def fio_network(node):
|
||||
|
||||
info = node.get('driver_info', {})
|
||||
runtime = info.get('agent_burnin_fio_network_runtime', 21600)
|
||||
outputfile = info.get('agent_burnin_fio_network_outputfile', None)
|
||||
|
||||
# get our role and identify our partner
|
||||
config = info.get('agent_burnin_fio_network_config')
|
||||
@ -190,6 +208,14 @@ def fio_network(node):
|
||||
error_msg = ("fio (network) failed to find partner")
|
||||
raise errors.CleaningError(error_msg)
|
||||
|
||||
_do_fio_network(role == 'writer', runtime, partner)
|
||||
logfilename = None
|
||||
if outputfile:
|
||||
logfilename = outputfile + '.' + role
|
||||
_do_fio_network(role == 'writer', runtime, partner, logfilename)
|
||||
|
||||
LOG.debug("fio (network): first direction done, swapping roles ...")
|
||||
_do_fio_network(not role == 'writer', runtime, partner)
|
||||
|
||||
if outputfile:
|
||||
irole = "reader" if (role == "writer") else "writer"
|
||||
logfilename = outputfile + '.' + irole
|
||||
_do_fio_network(not role == 'writer', runtime, partner, logfilename)
|
||||
|
@ -36,14 +36,17 @@ class TestBurnin(base.IronicAgentTest):
|
||||
|
||||
def test_stress_ng_cpu_non_default(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {'agent_burnin_cpu_cpu': 3,
|
||||
'agent_burnin_cpu_timeout': 2911}}
|
||||
node = {'driver_info': {
|
||||
'agent_burnin_cpu_cpu': 3,
|
||||
'agent_burnin_cpu_timeout': 2911,
|
||||
'agent_burnin_cpu_outputfile': '/var/log/burnin.cpu'}}
|
||||
mock_execute.return_value = (['out', 'err'])
|
||||
|
||||
burnin.stress_ng_cpu(node)
|
||||
|
||||
mock_execute.assert_called_once_with(
|
||||
'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief')
|
||||
'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief',
|
||||
'--log-file', '/var/log/burnin.cpu')
|
||||
|
||||
def test_stress_ng_cpu_no_stress_ng(self, mock_execute):
|
||||
|
||||
@ -70,16 +73,19 @@ class TestBurnin(base.IronicAgentTest):
|
||||
|
||||
def test_stress_ng_vm_non_default(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {'agent_burnin_vm_vm': 2,
|
||||
node = {'driver_info': {
|
||||
'agent_burnin_vm_vm': 2,
|
||||
'agent_burnin_vm_vm-bytes': '25%',
|
||||
'agent_burnin_vm_timeout': 120}}
|
||||
'agent_burnin_vm_timeout': 120,
|
||||
'agent_burnin_vm_outputfile': '/var/log/burnin.vm'}}
|
||||
mock_execute.return_value = (['out', 'err'])
|
||||
|
||||
burnin.stress_ng_vm(node)
|
||||
|
||||
mock_execute.assert_called_once_with(
|
||||
'stress-ng', '--vm', 2, '--vm-bytes', '25%',
|
||||
'--timeout', 120, '--metrics-brief')
|
||||
'--timeout', 120, '--metrics-brief',
|
||||
'--log-file', '/var/log/burnin.vm')
|
||||
|
||||
def test_stress_ng_vm_no_stress_ng(self, mock_execute):
|
||||
|
||||
@ -115,8 +121,10 @@ class TestBurnin(base.IronicAgentTest):
|
||||
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
|
||||
def test_fio_disk_no_default(self, mock_list, mock_execute):
|
||||
|
||||
node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600,
|
||||
'agent_burnin_fio_disk_loops': 5}}
|
||||
node = {'driver_info': {
|
||||
'agent_burnin_fio_disk_runtime': 600,
|
||||
'agent_burnin_fio_disk_loops': 5,
|
||||
'agent_burnin_fio_disk_outputfile': '/var/log/burnin.disk'}}
|
||||
|
||||
mock_list.return_value = [
|
||||
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
|
||||
@ -130,8 +138,9 @@ class TestBurnin(base.IronicAgentTest):
|
||||
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
|
||||
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
|
||||
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
|
||||
'--loops', 5, '--runtime', 600, '--time_based', '--name',
|
||||
'/dev/sdj', '--name', '/dev/hdaa')
|
||||
'--loops', 5, '--runtime', 600, '--time_based', '--output-format',
|
||||
'json', '--output', '/var/log/burnin.disk', '--name', '/dev/sdj',
|
||||
'--name', '/dev/hdaa', )
|
||||
|
||||
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
|
||||
def test_fio_disk_no_fio(self, mock_list, mock_execute):
|
||||
@ -167,6 +176,33 @@ class TestBurnin(base.IronicAgentTest):
|
||||
'--listen')]
|
||||
mock_execute.assert_has_calls(expected_calls)
|
||||
|
||||
def test_fio_network_reader_w_logfile(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {
|
||||
'agent_burnin_fio_network_runtime': 600,
|
||||
'agent_burnin_fio_network_config':
|
||||
{'partner': 'host-002',
|
||||
'role': 'reader'},
|
||||
'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
|
||||
mock_execute.return_value = (['out', 'err'])
|
||||
|
||||
burnin.fio_network(node)
|
||||
|
||||
expected_calls = [
|
||||
mock.call('fio', '--ioengine', 'net', '--port', '9000',
|
||||
'--fill_device', 1, '--group_reporting',
|
||||
'--gtod_reduce', 1, '--numjobs', 16, '--name',
|
||||
'reader', '--rw', 'read', '--hostname', 'host-002',
|
||||
'--output-format', 'json', '--output',
|
||||
'/var/log/burnin.network.reader'),
|
||||
mock.call('fio', '--ioengine', 'net', '--port', '9000',
|
||||
'--fill_device', 1, '--group_reporting',
|
||||
'--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
|
||||
'--rw', 'write', '--runtime', 600, '--time_based',
|
||||
'--listen', '--output-format', 'json', '--output',
|
||||
'/var/log/burnin.network.writer')]
|
||||
mock_execute.assert_has_calls(expected_calls)
|
||||
|
||||
def test_fio_network_writer(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {'agent_burnin_fio_network_runtime': 600,
|
||||
@ -189,6 +225,33 @@ class TestBurnin(base.IronicAgentTest):
|
||||
'reader', '--rw', 'read', '--hostname', 'host-001')]
|
||||
mock_execute.assert_has_calls(expected_calls)
|
||||
|
||||
def test_fio_network_writer_w_logfile(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {
|
||||
'agent_burnin_fio_network_runtime': 600,
|
||||
'agent_burnin_fio_network_config':
|
||||
{'partner': 'host-001',
|
||||
'role': 'writer'},
|
||||
'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
|
||||
mock_execute.return_value = (['out', 'err'])
|
||||
|
||||
burnin.fio_network(node)
|
||||
|
||||
expected_calls = [
|
||||
mock.call('fio', '--ioengine', 'net', '--port', '9000',
|
||||
'--fill_device', 1, '--group_reporting',
|
||||
'--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
|
||||
'--rw', 'write', '--runtime', 600, '--time_based',
|
||||
'--listen', '--output-format', 'json', '--output',
|
||||
'/var/log/burnin.network.writer'),
|
||||
mock.call('fio', '--ioengine', 'net', '--port', '9000',
|
||||
'--fill_device', 1, '--group_reporting',
|
||||
'--gtod_reduce', 1, '--numjobs', 16, '--name',
|
||||
'reader', '--rw', 'read', '--hostname', 'host-001',
|
||||
'--output-format', 'json', '--output',
|
||||
'/var/log/burnin.network.reader')]
|
||||
mock_execute.assert_has_calls(expected_calls)
|
||||
|
||||
def test_fio_network_no_fio(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {'agent_burnin_fio_network_config':
|
||||
@ -226,11 +289,11 @@ class TestBurnin(base.IronicAgentTest):
|
||||
{'partner': 'host-004', 'role': 'reader'}}}
|
||||
# mock the infinite loop
|
||||
mock_execute.side_effect = (processutils.ProcessExecutionError(
|
||||
'Connection timeout'),
|
||||
'Connection timeout', exit_code=16),
|
||||
processutils.ProcessExecutionError(
|
||||
'Connection timeout'),
|
||||
'Connection timeout', exit_code=16),
|
||||
processutils.ProcessExecutionError(
|
||||
'Connection refused'),
|
||||
'Connection refused', exit_code=16),
|
||||
['out', 'err'], # connected!
|
||||
['out', 'err']) # reversed roles
|
||||
|
||||
|
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Add options to have named output files for the burn-in logging:
|
||||
{'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network',
|
||||
'agent_burnin_fio_disk'}_outputfile. This should ease collecting
|
||||
the output of the burn-in steps for analysis.
|
Loading…
Reference in New Issue
Block a user