From e751218059f86d788554883ef8a5db99efce44f4 Mon Sep 17 00:00:00 2001 From: Arne Wiebalck Date: Wed, 1 Dec 2021 12:22:43 +0100 Subject: [PATCH] Burn-in: Add options for named log files In order to ease logging of the various burn-in steps, this patch proposes options to define the outpout files for all burn-in steps: {'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network', 'agent_burnin_fio_disk'}_outputfile via a node's driver-info. Story: #2007523 Task: #44102 Change-Id: I327cae5949d38e738d3c535487b3795d00ad8f1e --- ironic_python_agent/burnin.py | 40 ++++++-- ironic_python_agent/tests/unit/test_burnin.py | 91 ++++++++++++++++--- ...d-logfiles-to-burnin-4388309bf7442d53.yaml | 7 ++ 3 files changed, 117 insertions(+), 21 deletions(-) create mode 100644 releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py index c97eea965..c081869c6 100644 --- a/ironic_python_agent/burnin.py +++ b/ironic_python_agent/burnin.py @@ -38,9 +38,13 @@ def stress_ng_cpu(node): info = node.get('driver_info', {}) cpu = info.get('agent_burnin_cpu_cpu', 0) timeout = info.get('agent_burnin_cpu_timeout', 86400) + outputfile = info.get('agent_burnin_cpu_outputfile', None) args = ('stress-ng', '--cpu', cpu, '--timeout', timeout, '--metrics-brief') + if outputfile: + args += ('--log-file', outputfile,) + LOG.debug('Burn-in stress_ng_cpu command: %s', args) try: @@ -69,9 +73,13 @@ def stress_ng_vm(node): vm = info.get('agent_burnin_vm_vm', 0) vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%') timeout = info.get('agent_burnin_vm_timeout', 86400) + outputfile = info.get('agent_burnin_vm_outputfile', None) args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes, '--timeout', timeout, '--metrics-brief') + if outputfile: + args += ('--log-file', outputfile,) + LOG.debug('Burn-in stress_ng_vm command: %s', args) try: @@ -97,11 +105,14 @@ def fio_disk(node): # 4 iterations, same as badblock's default loops = info.get('agent_burnin_fio_disk_loops', 4) runtime = info.get('agent_burnin_fio_disk_runtime', 0) + outputfile = info.get('agent_burnin_fio_disk_outputfile', None) args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, '--ioengine', 'libaio', '--iodepth', '32', '--verify', 'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', '--loops', loops, '--runtime', runtime, '--time_based'] + if outputfile: + args.extend(['--output-format', 'json', '--output', outputfile]) devices = hardware.list_all_block_devices() for device in devices: @@ -119,7 +130,7 @@ def fio_disk(node): raise errors.CommandExecutionError(error_msg) -def _do_fio_network(writer, runtime, partner): +def _do_fio_network(writer, runtime, partner, outputfile): args = ['fio', '--ioengine', 'net', '--port', '9000', '--fill_device', 1, '--group_reporting', '--gtod_reduce', 1, '--numjobs', 16] @@ -129,6 +140,8 @@ def _do_fio_network(writer, runtime, partner): else: xargs = ['--name', 'reader', '--rw', 'read', '--hostname', partner] args.extend(xargs) + if outputfile: + args.extend(['--output-format', 'json', '--output', outputfile]) while True: LOG.info('Burn-in fio network command: %s', ' '.join(map(str, args))) @@ -137,13 +150,17 @@ def _do_fio_network(writer, runtime, partner): # fio reports on stdout LOG.info(out) break - except (processutils.ProcessExecutionError, OSError) as e: + except processutils.ProcessExecutionError as e: error_msg = "fio (network) failed with error %s" % e LOG.error(error_msg) - # while the writer blocks in fio, the reader fails with + if writer: + raise errors.CommandExecutionError(error_msg) + # While the writer blocks in fio, the reader fails with # 'Connection {refused, timeout}' errors if the partner - # is not ready, so we need to wait explicitly - if not writer and 'Connection' in str(e): + # is not ready, so we need to wait explicitly. Using the + # exit code accounts for both, logging to stderr as well + # as to a file. + if e.exit_code == 16: LOG.info("fio (network): reader retrying in %s seconds ...", NETWORK_READER_CYCLE) time.sleep(NETWORK_READER_CYCLE) @@ -171,6 +188,7 @@ def fio_network(node): info = node.get('driver_info', {}) runtime = info.get('agent_burnin_fio_network_runtime', 21600) + outputfile = info.get('agent_burnin_fio_network_outputfile', None) # get our role and identify our partner config = info.get('agent_burnin_fio_network_config') @@ -190,6 +208,14 @@ def fio_network(node): error_msg = ("fio (network) failed to find partner") raise errors.CleaningError(error_msg) - _do_fio_network(role == 'writer', runtime, partner) + logfilename = None + if outputfile: + logfilename = outputfile + '.' + role + _do_fio_network(role == 'writer', runtime, partner, logfilename) + LOG.debug("fio (network): first direction done, swapping roles ...") - _do_fio_network(not role == 'writer', runtime, partner) + + if outputfile: + irole = "reader" if (role == "writer") else "writer" + logfilename = outputfile + '.' + irole + _do_fio_network(not role == 'writer', runtime, partner, logfilename) diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py index 2258352ec..12127576d 100644 --- a/ironic_python_agent/tests/unit/test_burnin.py +++ b/ironic_python_agent/tests/unit/test_burnin.py @@ -36,14 +36,17 @@ class TestBurnin(base.IronicAgentTest): def test_stress_ng_cpu_non_default(self, mock_execute): - node = {'driver_info': {'agent_burnin_cpu_cpu': 3, - 'agent_burnin_cpu_timeout': 2911}} + node = {'driver_info': { + 'agent_burnin_cpu_cpu': 3, + 'agent_burnin_cpu_timeout': 2911, + 'agent_burnin_cpu_outputfile': '/var/log/burnin.cpu'}} mock_execute.return_value = (['out', 'err']) burnin.stress_ng_cpu(node) mock_execute.assert_called_once_with( - 'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief') + 'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief', + '--log-file', '/var/log/burnin.cpu') def test_stress_ng_cpu_no_stress_ng(self, mock_execute): @@ -70,16 +73,19 @@ class TestBurnin(base.IronicAgentTest): def test_stress_ng_vm_non_default(self, mock_execute): - node = {'driver_info': {'agent_burnin_vm_vm': 2, - 'agent_burnin_vm_vm-bytes': '25%', - 'agent_burnin_vm_timeout': 120}} + node = {'driver_info': { + 'agent_burnin_vm_vm': 2, + 'agent_burnin_vm_vm-bytes': '25%', + 'agent_burnin_vm_timeout': 120, + 'agent_burnin_vm_outputfile': '/var/log/burnin.vm'}} mock_execute.return_value = (['out', 'err']) burnin.stress_ng_vm(node) mock_execute.assert_called_once_with( 'stress-ng', '--vm', 2, '--vm-bytes', '25%', - '--timeout', 120, '--metrics-brief') + '--timeout', 120, '--metrics-brief', + '--log-file', '/var/log/burnin.vm') def test_stress_ng_vm_no_stress_ng(self, mock_execute): @@ -115,8 +121,10 @@ class TestBurnin(base.IronicAgentTest): @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) def test_fio_disk_no_default(self, mock_list, mock_execute): - node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600, - 'agent_burnin_fio_disk_loops': 5}} + node = {'driver_info': { + 'agent_burnin_fio_disk_runtime': 600, + 'agent_burnin_fio_disk_loops': 5, + 'agent_burnin_fio_disk_outputfile': '/var/log/burnin.disk'}} mock_list.return_value = [ hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), @@ -130,8 +138,9 @@ class TestBurnin(base.IronicAgentTest): 'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1, '--ioengine', 'libaio', '--iodepth', '32', '--verify', 'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify', - '--loops', 5, '--runtime', 600, '--time_based', '--name', - '/dev/sdj', '--name', '/dev/hdaa') + '--loops', 5, '--runtime', 600, '--time_based', '--output-format', + 'json', '--output', '/var/log/burnin.disk', '--name', '/dev/sdj', + '--name', '/dev/hdaa', ) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) def test_fio_disk_no_fio(self, mock_list, mock_execute): @@ -167,6 +176,33 @@ class TestBurnin(base.IronicAgentTest): '--listen')] mock_execute.assert_has_calls(expected_calls) + def test_fio_network_reader_w_logfile(self, mock_execute): + + node = {'driver_info': { + 'agent_burnin_fio_network_runtime': 600, + 'agent_burnin_fio_network_config': + {'partner': 'host-002', + 'role': 'reader'}, + 'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}} + mock_execute.return_value = (['out', 'err']) + + burnin.fio_network(node) + + expected_calls = [ + mock.call('fio', '--ioengine', 'net', '--port', '9000', + '--fill_device', 1, '--group_reporting', + '--gtod_reduce', 1, '--numjobs', 16, '--name', + 'reader', '--rw', 'read', '--hostname', 'host-002', + '--output-format', 'json', '--output', + '/var/log/burnin.network.reader'), + mock.call('fio', '--ioengine', 'net', '--port', '9000', + '--fill_device', 1, '--group_reporting', + '--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer', + '--rw', 'write', '--runtime', 600, '--time_based', + '--listen', '--output-format', 'json', '--output', + '/var/log/burnin.network.writer')] + mock_execute.assert_has_calls(expected_calls) + def test_fio_network_writer(self, mock_execute): node = {'driver_info': {'agent_burnin_fio_network_runtime': 600, @@ -189,6 +225,33 @@ class TestBurnin(base.IronicAgentTest): 'reader', '--rw', 'read', '--hostname', 'host-001')] mock_execute.assert_has_calls(expected_calls) + def test_fio_network_writer_w_logfile(self, mock_execute): + + node = {'driver_info': { + 'agent_burnin_fio_network_runtime': 600, + 'agent_burnin_fio_network_config': + {'partner': 'host-001', + 'role': 'writer'}, + 'agent_burnin_fio_network_outputfile': '/var/log/burnin.network'}} + mock_execute.return_value = (['out', 'err']) + + burnin.fio_network(node) + + expected_calls = [ + mock.call('fio', '--ioengine', 'net', '--port', '9000', + '--fill_device', 1, '--group_reporting', + '--gtod_reduce', 1, '--numjobs', 16, '--name', 'writer', + '--rw', 'write', '--runtime', 600, '--time_based', + '--listen', '--output-format', 'json', '--output', + '/var/log/burnin.network.writer'), + mock.call('fio', '--ioengine', 'net', '--port', '9000', + '--fill_device', 1, '--group_reporting', + '--gtod_reduce', 1, '--numjobs', 16, '--name', + 'reader', '--rw', 'read', '--hostname', 'host-001', + '--output-format', 'json', '--output', + '/var/log/burnin.network.reader')] + mock_execute.assert_has_calls(expected_calls) + def test_fio_network_no_fio(self, mock_execute): node = {'driver_info': {'agent_burnin_fio_network_config': @@ -226,11 +289,11 @@ class TestBurnin(base.IronicAgentTest): {'partner': 'host-004', 'role': 'reader'}}} # mock the infinite loop mock_execute.side_effect = (processutils.ProcessExecutionError( - 'Connection timeout'), + 'Connection timeout', exit_code=16), processutils.ProcessExecutionError( - 'Connection timeout'), + 'Connection timeout', exit_code=16), processutils.ProcessExecutionError( - 'Connection refused'), + 'Connection refused', exit_code=16), ['out', 'err'], # connected! ['out', 'err']) # reversed roles diff --git a/releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml b/releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml new file mode 100644 index 000000000..9391d69db --- /dev/null +++ b/releasenotes/notes/add-named-logfiles-to-burnin-4388309bf7442d53.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Add options to have named output files for the burn-in logging: + {'agent_burnin_cpu', 'agent_burnin_vm', 'agent_burnin_fio_network', + 'agent_burnin_fio_disk'}_outputfile. This should ease collecting + the output of the burn-in steps for analysis.