Make logs collection a hardware manager call

This allows hardware managers to collect additional logs.

Change-Id: If082b921d4bf71c4cc41a5a72db6995b08637374
This commit is contained in:
Dmitry Tantsur 2023-01-25 13:33:29 +01:00
parent 06413e54a0
commit c26f498f49
5 changed files with 158 additions and 153 deletions

View File

@ -16,6 +16,7 @@ import abc
import binascii import binascii
import collections import collections
import functools import functools
import io
import ipaddress import ipaddress
import json import json
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
@ -1173,6 +1174,18 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
'version': getattr(self, 'HARDWARE_MANAGER_VERSION', '1.0') 'version': getattr(self, 'HARDWARE_MANAGER_VERSION', '1.0')
} }
def collect_system_logs(self, io_dict, file_list):
"""Collect logs from the system.
Implementations should update `io_dict` and `file_list` with logs
to send to Ironic and Inspector.
:param io_dict: Dictionary mapping file names to binary IO objects
with corresponding data.
:param file_list: List of full file paths to include.
"""
raise errors.IncompatibleHardwareMethodError()
class GenericHardwareManager(HardwareManager): class GenericHardwareManager(HardwareManager):
HARDWARE_MANAGER_NAME = 'generic_hardware_manager' HARDWARE_MANAGER_NAME = 'generic_hardware_manager'
@ -2886,6 +2899,64 @@ class GenericHardwareManager(HardwareManager):
""" """
return inject_files.inject_files(node, ports, files, verify_ca) return inject_files.inject_files(node, ports, files, verify_ca)
def collect_system_logs(self, io_dict, file_list):
commands = {
'df': ['df', '-a'],
'dmesg': ['dmesg'],
'iptables': ['iptables', '-L'],
'ip_addr': ['ip', 'addr'],
'lsblk': ['lsblk', '--all',
'-o%s' % ','.join(utils.LSBLK_COLUMNS)],
'lsblk-full': ['lsblk', '--all', '--bytes',
'--output-all', '--pairs'],
'lshw': ['lshw', '-quiet', '-json'],
'mdstat': ['cat', '/proc/mdstat'],
'mount': ['mount'],
'multipath': ['multipath', '-ll'],
'parted': ['parted', '-l'],
'ps': ['ps', 'au'],
}
for name, cmd in commands.items():
utils.try_collect_command_output(io_dict, name, cmd)
_collect_udev(io_dict)
def _collect_udev(io_dict):
"""Collect device properties from udev."""
try:
out, _e = il_utils.execute('lsblk', '-no', 'KNAME')
except processutils.ProcessExecutionError as exc:
LOG.warning('Could not list block devices: %s', exc)
return
context = pyudev.Context()
for kname in out.splitlines():
kname = kname.strip()
if not kname:
continue
name = os.path.join('/dev', kname)
try:
udev = pyudev.Devices.from_device_file(context, name)
except Exception as e:
LOG.warning("Device %(dev)s is inaccessible, skipping... "
"Error: %(error)s", {'dev': name, 'error': e})
continue
try:
props = dict(udev.properties)
except AttributeError: # pyudev < 0.20
props = dict(udev)
fp = io.TextIOWrapper(io.BytesIO(), encoding='utf-8')
json.dump(props, fp)
buf = fp.detach()
buf.seek(0)
io_dict[f'udev/{kname}'] = buf
def _compare_extensions(ext1, ext2): def _compare_extensions(ext1, ext2):
mgr1 = ext1.obj mgr1 = ext1.obj

View File

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import binascii import binascii
import json
import os import os
import shutil import shutil
import stat import stat
@ -5916,3 +5917,53 @@ class TestProtectedDiskSafetyChecks(base.IronicAgentTest):
hardware.safety_check_block_device, hardware.safety_check_block_device,
{}, '/dev/foo') {}, '/dev/foo')
self.assertEqual(1, mock_execute.call_count) self.assertEqual(1, mock_execute.call_count)
@mock.patch.object(il_utils, 'execute', autospec=True)
class TestCollectSystemLogs(base.IronicAgentTest):
def setUp(self):
super().setUp()
self.hardware = hardware.GenericHardwareManager()
@mock.patch('pyudev.Context', lambda: mock.sentinel.context)
@mock.patch('pyudev.Devices.from_device_file', autospec=True)
def test__collect_udev(self, mock_from_dev, mock_execute):
mock_execute.return_value = """
fake0
fake1
fake42
""", ""
mock_from_dev.side_effect = [
mock.Mock(properties={'ID_UUID': '0'}),
RuntimeError('nope'),
{'ID_UUID': '42'}
]
result = {}
hardware._collect_udev(result)
self.assertEqual({'udev/fake0', 'udev/fake42'}, set(result))
for i in ('0', '42'):
buf = result[f'udev/fake{i}']
# Avoiding getvalue on purpose - checking that the IO is not closed
val = json.loads(buf.read().decode('utf-8'))
self.assertEqual({'ID_UUID': i}, val)
@mock.patch.object(hardware, '_collect_udev', autospec=True)
def test_collect_system_logs(self, mock_udev, mock_execute):
commands = set()
expected = {'df', 'dmesg', 'iptables', 'ip', 'lsblk',
'lshw', 'cat', 'mount', 'multipath', 'parted', 'ps'}
def fake_execute(cmd, *args, **kwargs):
commands.add(cmd)
return cmd.encode(), ''
mock_execute.side_effect = fake_execute
io_dict = {}
file_list = []
self.hardware.collect_system_logs(io_dict, file_list)
self.assertEqual(commands, expected)
self.assertGreaterEqual(len(io_dict), len(expected))

View File

@ -17,7 +17,6 @@ import base64
import errno import errno
import glob import glob
import io import io
import json
import os import os
import shutil import shutil
import subprocess import subprocess
@ -411,14 +410,12 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
mock_call.side_effect = os_error mock_call.side_effect = os_error
self.assertFalse(utils.is_journalctl_present()) self.assertFalse(utils.is_journalctl_present())
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True) @mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True) @mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
@mock.patch.object(utils, 'get_journalctl_output', autospec=True) @mock.patch.object(utils, 'get_journalctl_output', autospec=True)
def test_collect_system_logs_journald( def test_collect_system_logs_journald(
self, mock_logs, mock_outputs, mock_journalctl, mock_gzip_b64, self, mock_logs, mock_journalctl, mock_dispatch, mock_gzip_b64):
mock_udev):
mock_journalctl.return_value = True mock_journalctl.return_value = True
ret = 'Patrick Star' ret = 'Patrick Star'
mock_gzip_b64.return_value = ret mock_gzip_b64.return_value = ret
@ -426,28 +423,17 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
logs_string = utils.collect_system_logs() logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string) self.assertEqual(ret, logs_string)
mock_logs.assert_called_once_with(lines=None) mock_logs.assert_called_once_with(lines=None)
calls = [mock.call(['ps', 'au']), mock.call(['df', '-a']),
mock.call(['iptables', '-L']), mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with( mock_gzip_b64.assert_called_once_with(
io_dict={'journal': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY, io_dict=mock.ANY, file_list=[])
'iptables': mock.ANY, 'ip_addr': mock.ANY, mock_dispatch.assert_called_once_with('collect_system_logs',
'lshw': mock.ANY, 'lsblk': mock.ANY, mock.ANY, [])
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=[])
mock_udev.assert_called_once_with(mock.ANY)
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True) @mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True) @mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
@mock.patch.object(utils, 'get_journalctl_output', autospec=True) @mock.patch.object(utils, 'get_journalctl_output', autospec=True)
def test_collect_system_logs_journald_with_logfile( def test_collect_system_logs_journald_with_logfile(
self, mock_logs, mock_outputs, mock_journalctl, mock_gzip_b64, self, mock_logs, mock_journalctl, mock_dispatch, mock_gzip_b64):
mock_udev):
tmp = tempfile.NamedTemporaryFile() tmp = tempfile.NamedTemporaryFile()
self.addCleanup(lambda: tmp.close()) self.addCleanup(lambda: tmp.close())
@ -459,55 +445,32 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
logs_string = utils.collect_system_logs() logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string) self.assertEqual(ret, logs_string)
mock_logs.assert_called_once_with(lines=None) mock_logs.assert_called_once_with(lines=None)
calls = [mock.call(['ps', 'au']), mock.call(['df', '-a']),
mock.call(['iptables', '-L']), mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with( mock_gzip_b64.assert_called_once_with(
io_dict={'journal': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY, io_dict=mock.ANY, file_list=[tmp.name])
'iptables': mock.ANY, 'ip_addr': mock.ANY, mock_dispatch.assert_called_once_with('collect_system_logs',
'lshw': mock.ANY, 'lsblk': mock.ANY, mock.ANY, [tmp.name])
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=[tmp.name])
mock_udev.assert_called_once_with(mock.ANY)
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True) @mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True) @mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
def test_collect_system_logs_non_journald( def test_collect_system_logs_non_journald(
self, mock_outputs, mock_journalctl, mock_gzip_b64, self, mock_journalctl, mock_dispatch, mock_gzip_b64):
mock_udev):
mock_journalctl.return_value = False mock_journalctl.return_value = False
ret = 'SpongeBob SquarePants' ret = 'SpongeBob SquarePants'
mock_gzip_b64.return_value = ret mock_gzip_b64.return_value = ret
logs_string = utils.collect_system_logs() logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string) self.assertEqual(ret, logs_string)
calls = [mock.call(['dmesg']), mock.call(['ps', 'au']),
mock.call(['df', '-a']), mock.call(['iptables', '-L']),
mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with( mock_gzip_b64.assert_called_once_with(
io_dict={'dmesg': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY, io_dict=mock.ANY, file_list=['/var/log'])
'iptables': mock.ANY, 'ip_addr': mock.ANY, mock_dispatch.assert_called_once_with('collect_system_logs',
'lshw': mock.ANY, 'lsblk': mock.ANY, mock.ANY, ['/var/log'])
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=['/var/log'])
mock_udev.assert_called_once_with(mock.ANY)
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True) @mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True) @mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
def test_collect_system_logs_non_journald_with_logfile( def test_collect_system_logs_non_journald_with_logfile(
self, mock_outputs, mock_journalctl, mock_gzip_b64, self, mock_journalctl, mock_dispatch, mock_gzip_b64):
mock_udev):
tmp = tempfile.NamedTemporaryFile() tmp = tempfile.NamedTemporaryFile()
self.addCleanup(lambda: tmp.close()) self.addCleanup(lambda: tmp.close())
@ -518,44 +481,10 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
logs_string = utils.collect_system_logs() logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string) self.assertEqual(ret, logs_string)
calls = [mock.call(['dmesg']), mock.call(['ps', 'au']),
mock.call(['df', '-a']), mock.call(['iptables', '-L']),
mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with( mock_gzip_b64.assert_called_once_with(
io_dict={'dmesg': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY, io_dict=mock.ANY, file_list=['/var/log', tmp.name])
'iptables': mock.ANY, 'ip_addr': mock.ANY, mock_dispatch.assert_called_once_with('collect_system_logs',
'lshw': mock.ANY, 'lsblk': mock.ANY, mock.ANY, ['/var/log', tmp.name])
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=['/var/log', tmp.name])
mock_udev.assert_called_once_with(mock.ANY)
@mock.patch('pyudev.Context', lambda: mock.sentinel.context)
@mock.patch('pyudev.Devices.from_device_file', autospec=True)
@mock.patch.object(ironic_utils, 'execute', autospec=True)
def test_collect_udev(self, mock_execute, mock_from_dev):
mock_execute.return_value = """
fake0
fake1
fake42
""", ""
mock_from_dev.side_effect = [
mock.Mock(properties={'ID_UUID': '0'}),
RuntimeError('nope'),
{'ID_UUID': '42'}
]
result = {}
utils._collect_udev(result)
self.assertEqual({'udev/fake0', 'udev/fake42'}, set(result))
for i in ('0', '42'):
buf = result[f'udev/fake{i}']
# Avoiding getvalue on purpose - checking that the IO is not closed
val = json.loads(buf.read().decode('utf-8'))
self.assertEqual({'ID_UUID': i}, val)
def test_get_ssl_client_options(self): def test_get_ssl_client_options(self):
# defaults # defaults

View File

@ -33,7 +33,6 @@ from oslo_concurrency import processutils
from oslo_config import cfg from oslo_config import cfg
from oslo_log import log as logging from oslo_log import log as logging
from oslo_utils import units from oslo_utils import units
import pyudev
import requests import requests
import tenacity import tenacity
@ -63,21 +62,6 @@ LSBLK_COLUMNS = ['KNAME', 'MODEL', 'SIZE', 'ROTA',
'TYPE', 'UUID', 'PARTUUID', 'SERIAL'] 'TYPE', 'UUID', 'PARTUUID', 'SERIAL']
COLLECT_LOGS_COMMANDS = {
'ps': ['ps', 'au'],
'df': ['df', '-a'],
'iptables': ['iptables', '-L'],
'ip_addr': ['ip', 'addr'],
'lshw': ['lshw', '-quiet', '-json'],
'lsblk': ['lsblk', '--all', '-o%s' % ','.join(LSBLK_COLUMNS)],
'lsblk-full': ['lsblk', '--all', '--bytes', '--output-all', '--pairs'],
'mdstat': ['cat', '/proc/mdstat'],
'mount': ['mount'],
'parted': ['parted', '-l'],
'multipath': ['multipath', '-ll'],
}
DEVICE_EXTRACTOR = re.compile(r'^(?:(.*\d)p|(.*\D))(?:\d+)$') DEVICE_EXTRACTOR = re.compile(r'^(?:(.*\d)p|(.*\D))(?:\d+)$')
@ -540,40 +524,12 @@ def gzip_and_b64encode(io_dict=None, file_list=None):
return _encode_as_text(fp.getvalue()) return _encode_as_text(fp.getvalue())
def _collect_udev(io_dict): def try_collect_command_output(io_dict, file_name, command):
"""Collect device properties from udev.""" LOG.debug('Collecting command output: %s', command)
try: try:
out, _e = ironic_utils.execute('lsblk', '-no', 'KNAME') io_dict[file_name] = get_command_output(command)
except processutils.ProcessExecutionError as exc: except errors.CommandExecutionError:
LOG.warning('Could not list block devices: %s', exc) LOG.debug('Collecting logs from command %s has failed', command)
return
context = pyudev.Context()
for kname in out.splitlines():
kname = kname.strip()
if not kname:
continue
name = os.path.join('/dev', kname)
try:
udev = pyudev.Devices.from_device_file(context, name)
except Exception as e:
LOG.warning("Device %(dev)s is inaccessible, skipping... "
"Error: %(error)s", {'dev': name, 'error': e})
continue
try:
props = dict(udev.properties)
except AttributeError: # pyudev < 0.20
props = dict(udev)
fp = io.TextIOWrapper(io.BytesIO(), encoding='utf-8')
json.dump(props, fp)
buf = fp.detach()
buf.seek(0)
io_dict[f'udev/{kname}'] = buf
def collect_system_logs(journald_max_lines=None): def collect_system_logs(journald_max_lines=None):
@ -589,12 +545,6 @@ def collect_system_logs(journald_max_lines=None):
""" """
LOG.info('Collecting system logs and debugging information') LOG.info('Collecting system logs and debugging information')
def try_get_command_output(io_dict, file_name, command):
try:
io_dict[file_name] = get_command_output(command)
except errors.CommandExecutionError:
LOG.debug('Collecting logs from command %s has failed', command)
io_dict = {} io_dict = {}
file_list = [] file_list = []
log_locations = [CONF.log_file, CONF.log_dir] log_locations = [CONF.log_file, CONF.log_dir]
@ -604,20 +554,19 @@ def collect_system_logs(journald_max_lines=None):
if log_loc and os.path.exists(log_loc): if log_loc and os.path.exists(log_loc):
file_list.append(log_loc) file_list.append(log_loc)
else: else:
try_get_command_output(io_dict, 'dmesg', ['dmesg'])
file_list.append('/var/log') file_list.append('/var/log')
for log_loc in log_locations: for log_loc in log_locations:
if (log_loc and os.path.exists(log_loc) if (log_loc and os.path.exists(log_loc)
and not log_loc.startswith('/var/log')): and not log_loc.startswith('/var/log')):
file_list.append(log_loc) file_list.append(log_loc)
for name, cmd in COLLECT_LOGS_COMMANDS.items(): # Avoid circular imports
try_get_command_output(io_dict, name, cmd) from ironic_python_agent import hardware
try: try:
_collect_udev(io_dict) hardware.dispatch_to_all_managers('collect_system_logs',
except Exception: io_dict, file_list)
LOG.exception('Unexpected error when collecting udev properties') except errors.HardwareManagerMethodNotFound:
LOG.warning('All hardware managers failed to collect logs')
return gzip_and_b64encode(io_dict=io_dict, file_list=file_list) return gzip_and_b64encode(io_dict=io_dict, file_list=file_list)

View File

@ -0,0 +1,5 @@
---
features:
- |
Ramdisk log collection now uses the hardware managers mechanism. You can
provide a `collect_system_logs` call to extend the collection process.