Merge "Burn-in: Add options for named log files"

This commit is contained in:
Zuul 2021-12-09 11:54:17 +00:00 committed by Gerrit Code Review
commit fa5cccd137
3 changed files with 117 additions and 21 deletions

View File

@ -39,9 +39,13 @@ def stress_ng_cpu(node):
info = node.get('driver_info', {}) info = node.get('driver_info', {})
cpu = info.get('agent_burnin_cpu_cpu', 0) cpu = info.get('agent_burnin_cpu_cpu', 0)
timeout = info.get('agent_burnin_cpu_timeout', 86400) timeout = info.get('agent_burnin_cpu_timeout', 86400)
outputfile = info.get('agent_burnin_cpu_outputfile', None)
args = ('stress-ng', '--cpu', cpu, '--timeout', timeout, args = ('stress-ng', '--cpu', cpu, '--timeout', timeout,
'--metrics-brief') '--metrics-brief')
if outputfile:
args += ('--log-file', outputfile,)
LOG.debug('Burn-in stress_ng_cpu command: %s', args) LOG.debug('Burn-in stress_ng_cpu command: %s', args)
try: try:
@ -70,9 +74,13 @@ def stress_ng_vm(node):
vm = info.get('agent_burnin_vm_vm', 0) vm = info.get('agent_burnin_vm_vm', 0)
vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%') vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%')
timeout = info.get('agent_burnin_vm_timeout', 86400) timeout = info.get('agent_burnin_vm_timeout', 86400)
outputfile = info.get('agent_burnin_vm_outputfile', None)
args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes, args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes,
'--timeout', timeout, '--metrics-brief') '--timeout', timeout, '--metrics-brief')
if outputfile:
args += ('--log-file', outputfile,)
LOG.debug('Burn-in stress_ng_vm command: %s', args) LOG.debug('Burn-in stress_ng_vm command: %s', args)
try: try:
@ -175,11 +183,14 @@ def fio_disk(node):
# 4 iterations, same as badblock's default # 4 iterations, same as badblock's default
loops = info.get('agent_burnin_fio_disk_loops', 4) loops = info.get('agent_burnin_fio_disk_loops', 4)
runtime = info.get('agent_burnin_fio_disk_runtime', 0) runtime = info.get('agent_burnin_fio_disk_runtime', 0)
outputfile = info.get('agent_burnin_fio_disk_outputfile', None)
args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify', '--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', 'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', loops, '--runtime', runtime, '--time_based'] '--loops', loops, '--runtime', runtime, '--time_based']
if outputfile:
args.extend(['--output-format', 'json', '--output', outputfile])
devices = hardware.list_all_block_devices() devices = hardware.list_all_block_devices()
for device in devices: for device in devices:
@ -203,7 +214,7 @@ def fio_disk(node):
_run_smart_test(devices) _run_smart_test(devices)
def _do_fio_network(writer, runtime, partner): def _do_fio_network(writer, runtime, partner, outputfile):
args = ['fio', '--ioengine', 'net', '--port', '9000', '--fill_device', 1, args = ['fio', '--ioengine', 'net', '--port', '9000', '--fill_device', 1,
'--group_reporting', '--gtod_reduce', 1, '--numjobs', 16] '--group_reporting', '--gtod_reduce', 1, '--numjobs', 16]
@ -213,6 +224,8 @@ def _do_fio_network(writer, runtime, partner):
else: else:
xargs = ['--name', 'reader', '--rw', 'read', '--hostname', partner] xargs = ['--name', 'reader', '--rw', 'read', '--hostname', partner]
args.extend(xargs) args.extend(xargs)
if outputfile:
args.extend(['--output-format', 'json', '--output', outputfile])
while True: while True:
LOG.info('Burn-in fio network command: %s', ' '.join(map(str, args))) LOG.info('Burn-in fio network command: %s', ' '.join(map(str, args)))
@ -221,13 +234,17 @@ def _do_fio_network(writer, runtime, partner):
# fio reports on stdout # fio reports on stdout
LOG.info(out) LOG.info(out)
break break
except (processutils.ProcessExecutionError, OSError) as e: except processutils.ProcessExecutionError as e:
error_msg = "fio (network) failed with error %s" % e error_msg = "fio (network) failed with error %s" % e
LOG.error(error_msg) LOG.error(error_msg)
# while the writer blocks in fio, the reader fails with if writer:
raise errors.CommandExecutionError(error_msg)
# While the writer blocks in fio, the reader fails with
# 'Connection {refused, timeout}' errors if the partner # 'Connection {refused, timeout}' errors if the partner
# is not ready, so we need to wait explicitly # is not ready, so we need to wait explicitly. Using the
if not writer and 'Connection' in str(e): # exit code accounts for both, logging to stderr as well
# as to a file.
if e.exit_code == 16:
LOG.info("fio (network): reader retrying in %s seconds ...", LOG.info("fio (network): reader retrying in %s seconds ...",
NETWORK_READER_CYCLE) NETWORK_READER_CYCLE)
time.sleep(NETWORK_READER_CYCLE) time.sleep(NETWORK_READER_CYCLE)
@ -255,6 +272,7 @@ def fio_network(node):
info = node.get('driver_info', {}) info = node.get('driver_info', {})
runtime = info.get('agent_burnin_fio_network_runtime', 21600) runtime = info.get('agent_burnin_fio_network_runtime', 21600)
outputfile = info.get('agent_burnin_fio_network_outputfile', None)
# get our role and identify our partner # get our role and identify our partner
config = info.get('agent_burnin_fio_network_config') config = info.get('agent_burnin_fio_network_config')
@ -274,6 +292,14 @@ def fio_network(node):
error_msg = ("fio (network) failed to find partner") error_msg = ("fio (network) failed to find partner")
raise errors.CleaningError(error_msg) raise errors.CleaningError(error_msg)
_do_fio_network(role == 'writer', runtime, partner) logfilename = None
if outputfile:
logfilename = outputfile + '.' + role
_do_fio_network(role == 'writer', runtime, partner, logfilename)
LOG.debug("fio (network): first direction done, swapping roles ...") LOG.debug("fio (network): first direction done, swapping roles ...")
_do_fio_network(not role == 'writer', runtime, partner)
if outputfile:
irole = "reader" if (role == "writer") else "writer"
logfilename = outputfile + '.' + irole
_do_fio_network(not role == 'writer', runtime, partner, logfilename)

View File

@ -69,14 +69,17 @@ class TestBurnin(base.IronicAgentTest):
def test_stress_ng_cpu_non_default(self, mock_execute): def test_stress_ng_cpu_non_default(self, mock_execute):
node = {'driver_info': {'agent_burnin_cpu_cpu': 3, node = {'driver_info': {
'agent_burnin_cpu_timeout': 2911}} 'agent_burnin_cpu_cpu': 3,
'agent_burnin_cpu_timeout': 2911,
'agent_burnin_cpu_outputfile': '/var/log/burnin.cpu'}}
mock_execute.return_value = (['out', 'err']) mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_cpu(node) burnin.stress_ng_cpu(node)
mock_execute.assert_called_once_with( mock_execute.assert_called_once_with(
'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief') 'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief',
'--log-file', '/var/log/burnin.cpu')
def test_stress_ng_cpu_no_stress_ng(self, mock_execute): def test_stress_ng_cpu_no_stress_ng(self, mock_execute):
@ -103,16 +106,19 @@ class TestBurnin(base.IronicAgentTest):
def test_stress_ng_vm_non_default(self, mock_execute): def test_stress_ng_vm_non_default(self, mock_execute):
node = {'driver_info': {'agent_burnin_vm_vm': 2, node = {'driver_info': {
'agent_burnin_vm_vm-bytes': '25%', 'agent_burnin_vm_vm': 2,
'agent_burnin_vm_timeout': 120}} 'agent_burnin_vm_vm-bytes': '25%',
'agent_burnin_vm_timeout': 120,
'agent_burnin_vm_outputfile': '/var/log/burnin.vm'}}
mock_execute.return_value = (['out', 'err']) mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_vm(node) burnin.stress_ng_vm(node)
mock_execute.assert_called_once_with( mock_execute.assert_called_once_with(
'stress-ng', '--vm', 2, '--vm-bytes', '25%', 'stress-ng', '--vm', 2, '--vm-bytes', '25%',
'--timeout', 120, '--metrics-brief') '--timeout', 120, '--metrics-brief',
'--log-file', '/var/log/burnin.vm')
def test_stress_ng_vm_no_stress_ng(self, mock_execute): def test_stress_ng_vm_no_stress_ng(self, mock_execute):
@ -148,8 +154,10 @@ class TestBurnin(base.IronicAgentTest):
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
def test_fio_disk_no_default(self, mock_list, mock_execute): def test_fio_disk_no_default(self, mock_list, mock_execute):
node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600, node = {'driver_info': {
'agent_burnin_fio_disk_loops': 5}} 'agent_burnin_fio_disk_runtime': 600,
'agent_burnin_fio_disk_loops': 5,
'agent_burnin_fio_disk_outputfile': '/var/log/burnin.disk'}}
mock_list.return_value = [ mock_list.return_value = [
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
@ -163,8 +171,9 @@ class TestBurnin(base.IronicAgentTest):
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, 'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
'--ioengine', 'libaio', '--iodepth', '32', '--verify', '--ioengine', 'libaio', '--iodepth', '32', '--verify',
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', 'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
'--loops', 5, '--runtime', 600, '--time_based', '--name', '--loops', 5, '--runtime', 600, '--time_based', '--output-format',
'/dev/sdj', '--name', '/dev/hdaa') 'json', '--output', '/var/log/burnin.disk', '--name', '/dev/sdj',
'--name', '/dev/hdaa', )
def test__smart_test_status(self, mock_execute): def test__smart_test_status(self, mock_execute):
device = hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True) device = hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True)
@ -244,6 +253,33 @@ class TestBurnin(base.IronicAgentTest):
'--listen')] '--listen')]
mock_execute.assert_has_calls(expected_calls) mock_execute.assert_has_calls(expected_calls)
def test_fio_network_reader_w_logfile(self, mock_execute):
node = {'driver_info': {
'agent_burnin_fio_network_runtime': 600,
'agent_burnin_fio_network_config':
{'partner': 'host-002',
'role': 'reader'},
'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
mock_execute.return_value = (['out', 'err'])
burnin.fio_network(node)
expected_calls = [
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name',
'reader', '--rw', 'read', '--hostname', 'host-002',
'--output-format', 'json', '--output',
'/var/log/burnin.network.reader'),
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
'--rw', 'write', '--runtime', 600, '--time_based',
'--listen', '--output-format', 'json', '--output',
'/var/log/burnin.network.writer')]
mock_execute.assert_has_calls(expected_calls)
def test_fio_network_writer(self, mock_execute): def test_fio_network_writer(self, mock_execute):
node = {'driver_info': {'agent_burnin_fio_network_runtime': 600, node = {'driver_info': {'agent_burnin_fio_network_runtime': 600,
@ -266,6 +302,33 @@ class TestBurnin(base.IronicAgentTest):
'reader', '--rw', 'read', '--hostname', 'host-001')] 'reader', '--rw', 'read', '--hostname', 'host-001')]
mock_execute.assert_has_calls(expected_calls) mock_execute.assert_has_calls(expected_calls)
def test_fio_network_writer_w_logfile(self, mock_execute):
node = {'driver_info': {
'agent_burnin_fio_network_runtime': 600,
'agent_burnin_fio_network_config':
{'partner': 'host-001',
'role': 'writer'},
'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}}
mock_execute.return_value = (['out', 'err'])
burnin.fio_network(node)
expected_calls = [
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer',
'--rw', 'write', '--runtime', 600, '--time_based',
'--listen', '--output-format', 'json', '--output',
'/var/log/burnin.network.writer'),
mock.call('fio', '--ioengine', 'net', '--port', '9000',
'--fill_device', 1, '--group_reporting',
'--gtod_reduce', 1, '--numjobs', 16, '--name',
'reader', '--rw', 'read', '--hostname', 'host-001',
'--output-format', 'json', '--output',
'/var/log/burnin.network.reader')]
mock_execute.assert_has_calls(expected_calls)
def test_fio_network_no_fio(self, mock_execute): def test_fio_network_no_fio(self, mock_execute):
node = {'driver_info': {'agent_burnin_fio_network_config': node = {'driver_info': {'agent_burnin_fio_network_config':
@ -303,11 +366,11 @@ class TestBurnin(base.IronicAgentTest):
{'partner': 'host-004', 'role': 'reader'}}} {'partner': 'host-004', 'role': 'reader'}}}
# mock the infinite loop # mock the infinite loop
mock_execute.side_effect = (processutils.ProcessExecutionError( mock_execute.side_effect = (processutils.ProcessExecutionError(
'Connection timeout'), 'Connection timeout', exit_code=16),
processutils.ProcessExecutionError( processutils.ProcessExecutionError(
'Connection timeout'), 'Connection timeout', exit_code=16),
processutils.ProcessExecutionError( processutils.ProcessExecutionError(
'Connection refused'), 'Connection refused', exit_code=16),
['out', 'err'], # connected! ['out', 'err'], # connected!
['out', 'err']) # reversed roles ['out', 'err']) # reversed roles

View File

@ -0,0 +1,7 @@
---
features:
- |
Add options to have named output files for the burn-in logging:
{'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network',
'agent_burnin_fio_disk'}_outputfile. This should ease collecting
the output of the burn-in steps for analysis.