From 1e69403a9e8dd6084598d6005448c8fcc4835f20 Mon Sep 17 00:00:00 2001 From: Alex Schultz Date: Wed, 30 Nov 2016 15:34:41 -0700 Subject: [PATCH] Add overcloud support report collect command This command is to be used by an operator to run sosreport on specific set of servers (or all) and retrieve log bundles that can be used to debug the status of the cluster or troubleshoot issues. Depends-On: I47c486d14c46a653c61cfd92d9f484efe0407217 Change-Id: I45699dfa6eb3e83d419c7041dbb72cc5d5e4f0ea Implements-Blueprint: capture-environment-status-and-logs --- ...ment-status-and-logs-5f7f0f287d8465c5.yaml | 7 + setup.cfg | 1 + tripleoclient/exceptions.py | 12 ++ .../tests/v1/overcloud_support/__init__.py | 0 .../test_overcloud_support.py | 133 +++++++++++++ tripleoclient/tests/workflows/test_support.py | 185 ++++++++++++++++++ tripleoclient/v1/overcloud_support.py | 107 ++++++++++ tripleoclient/workflows/support.py | 146 ++++++++++++++ 8 files changed, 591 insertions(+) create mode 100644 releasenotes/notes/capture-environment-status-and-logs-5f7f0f287d8465c5.yaml create mode 100644 tripleoclient/tests/v1/overcloud_support/__init__.py create mode 100644 tripleoclient/tests/v1/overcloud_support/test_overcloud_support.py create mode 100644 tripleoclient/tests/workflows/test_support.py create mode 100644 tripleoclient/v1/overcloud_support.py create mode 100644 tripleoclient/workflows/support.py diff --git a/releasenotes/notes/capture-environment-status-and-logs-5f7f0f287d8465c5.yaml b/releasenotes/notes/capture-environment-status-and-logs-5f7f0f287d8465c5.yaml new file mode 100644 index 000000000..3a92081c4 --- /dev/null +++ b/releasenotes/notes/capture-environment-status-and-logs-5f7f0f287d8465c5.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Implemented new 'openstack overcloud support report' command to execute + a log collection and retrieval against overcloud nodes. This new command + allows an operator to perform sosreport retrieval from all nodes or + specific nodes based on their server name. diff --git a/setup.cfg b/setup.cfg index 76df2c626..fe250f907 100644 --- a/setup.cfg +++ b/setup.cfg @@ -85,6 +85,7 @@ openstack.tripleoclient.v1 = overcloud_profiles_match = tripleoclient.v1.overcloud_profiles:MatchProfiles overcloud_profiles_list = tripleoclient.v1.overcloud_profiles:ListProfiles overcloud_raid_create = tripleoclient.v1.overcloud_raid:CreateRAID + overcloud_support_report_collect = tripleoclient.v1.overcloud_support:ReportExecute overcloud_update_clear_breakpoints = tripleoclient.v1.overcloud_update:ClearBreakpointsOvercloud overcloud_update_stack = tripleoclient.v1.overcloud_update:UpdateOvercloud overcloud_execute = tripleoclient.v1.overcloud_execute:RemoteExecute diff --git a/tripleoclient/exceptions.py b/tripleoclient/exceptions.py index a87988fcc..bad805d20 100644 --- a/tripleoclient/exceptions.py +++ b/tripleoclient/exceptions.py @@ -87,3 +87,15 @@ class WorkflowActionError(Exception): def __init__(self, message, action='', output=''): message = message.format(action, output) super(WorkflowActionError, self).__init__(message) + + +class DownloadError(Exception): + """Download attempt failed""" + + +class LogFetchError(Exception): + """Fetching logs failed""" + + +class ContainerDeleteFailed(Exception): + """Container deletion failed""" diff --git a/tripleoclient/tests/v1/overcloud_support/__init__.py b/tripleoclient/tests/v1/overcloud_support/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tripleoclient/tests/v1/overcloud_support/test_overcloud_support.py b/tripleoclient/tests/v1/overcloud_support/test_overcloud_support.py new file mode 100644 index 000000000..5b6596f45 --- /dev/null +++ b/tripleoclient/tests/v1/overcloud_support/test_overcloud_support.py @@ -0,0 +1,133 @@ +# Copyright 2017 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +import mock + + +from tripleoclient.tests.v1.overcloud_deploy import fakes +from tripleoclient.v1 import overcloud_support + + +class TestOvercloudSupportReport(fakes.TestDeployOvercloud): + + def setUp(self): + super(TestOvercloudSupportReport, self).setUp() + + self.cmd = overcloud_support.ReportExecute(self.app, None) + self.app.client_manager.workflow_engine = mock.Mock() + self.app.client_manager.tripleoclient = mock.Mock() + self.app.client_manager.object_store = mock.Mock() + self.workflow = self.app.client_manager.workflow_engine + self.swift = self.app.client_manager.object_store + + @mock.patch('tripleoclient.workflows.support.download_files') + @mock.patch('tripleoclient.workflows.support.delete_container') + @mock.patch('tripleoclient.workflows.support.fetch_logs') + def test_action(self, fetch_logs_mock, delete_container_mock, + download_files_mock): + arglist = ['-c', 'mycontainer', '-t', '60', 'control'] + verifylist = [ + ('server_name', 'control'), + ('container', 'mycontainer'), + ('timeout', 60) + ] + parsed_args = self.check_parser(self.cmd, arglist, verifylist) + + self.cmd.take_action(parsed_args) + + fetch_logs_mock.assert_called_once_with(self.app.client_manager, + parsed_args.container, + parsed_args.server_name, + timeout=60, + concurrency=None) + + download_files_mock.assert_called_once_with( + self.app.client_manager, parsed_args.container, + parsed_args.destination) + + delete_container_mock.assert_called_once_with(self.app.client_manager, + parsed_args.container, + timeout=60, + concurrency=None) + + @mock.patch('tripleoclient.workflows.support.download_files') + @mock.patch('tripleoclient.workflows.support.delete_container') + @mock.patch('tripleoclient.workflows.support.fetch_logs') + def test_action_skip_container_delete(self, fetch_logs_mock, + delete_container_mock, + download_files_mock): + arglist = ['-c', 'mycontainer', '--skip-container-delete', 'control'] + verifylist = [ + ('server_name', 'control'), + ('container', 'mycontainer') + ] + parsed_args = self.check_parser(self.cmd, arglist, verifylist) + + self.cmd.take_action(parsed_args) + + fetch_logs_mock.assert_called_once_with(self.app.client_manager, + parsed_args.container, + parsed_args.server_name, + timeout=None, + concurrency=None) + + download_files_mock.assert_called_once_with( + self.app.client_manager, parsed_args.container, + parsed_args.destination) + + delete_container_mock.assert_not_called() + + @mock.patch('tripleoclient.workflows.support.delete_container') + @mock.patch('tripleoclient.workflows.support.fetch_logs') + def test_action_collect_logs_only(self, fetch_logs_mock, + delete_container_mock): + arglist = ['--collect-only', '-t', '60', '-n', '10', 'control'] + verifylist = [ + ('server_name', 'control'), + ('collect_only', True), + ('timeout', 60), + ('concurrency', 10) + ] + parsed_args = self.check_parser(self.cmd, arglist, verifylist) + + self.cmd.take_action(parsed_args) + + fetch_logs_mock.assert_called_once_with(self.app.client_manager, + parsed_args.container, + parsed_args.server_name, + timeout=60, + concurrency=10) + delete_container_mock.assert_not_called() + + @mock.patch('tripleoclient.workflows.support.download_files') + @mock.patch('tripleoclient.workflows.support.delete_container') + @mock.patch('tripleoclient.workflows.support.fetch_logs') + def test_action_download_logs_only(self, fetch_logs_mock, + delete_container_mock, + download_files_mock): + arglist = ['--download-only', 'control'] + verifylist = [ + ('server_name', 'control'), + ('download_only', True), + ] + parsed_args = self.check_parser(self.cmd, arglist, verifylist) + + self.cmd.take_action(parsed_args) + + fetch_logs_mock.assert_not_called() + delete_container_mock.assert_not_called() + download_files_mock.assert_called_once_with( + self.app.client_manager, parsed_args.container, + parsed_args.destination) diff --git a/tripleoclient/tests/workflows/test_support.py b/tripleoclient/tests/workflows/test_support.py new file mode 100644 index 000000000..62b11e12a --- /dev/null +++ b/tripleoclient/tests/workflows/test_support.py @@ -0,0 +1,185 @@ +# Copyright 2017 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +import mock + +from tripleoclient.exceptions import DownloadError +from tripleoclient.tests.v1.overcloud_deploy import fakes +from tripleoclient.workflows import support + + +class TestSupportFetchLogs(fakes.TestDeployOvercloud): + + def setUp(self): + super(TestSupportFetchLogs, self).setUp() + self.app.client_manager = mock.Mock() + self.app.client_manager.workflow_engine = self.workflow = mock.Mock() + self.tripleoclient = mock.Mock() + self.websocket = mock.Mock() + self.websocket.__enter__ = lambda s: self.websocket + self.websocket.__exit__ = lambda s, *exc: None + self.tripleoclient.messaging_websocket.return_value = self.websocket + self.app.client_manager.tripleoclient = self.tripleoclient + + uuid4_patcher = mock.patch('uuid.uuid4', return_value="UUID4") + self.mock_uuid4 = uuid4_patcher.start() + self.addCleanup(self.mock_uuid4.stop) + + @mock.patch('tripleoclient.workflows.base.wait_for_messages') + @mock.patch('tripleoclient.workflows.base.start_workflow') + def test_fetch_logs(self, start_wf_mock, messages_mock): + messages_mock.return_value = [] + fetch_name = 'tripleo.support.v1.fetch_logs' + fetch_input = { + 'server_name': 'test', + 'container': 'test', + 'queue_name': 'UUID4' + } + support.fetch_logs(self.app.client_manager, 'test', 'test') + start_wf_mock.assert_called_once_with(self.workflow, + fetch_name, + workflow_input=fetch_input) + + @mock.patch('tripleoclient.workflows.base.wait_for_messages') + @mock.patch('tripleoclient.workflows.base.start_workflow') + def test_fetch_logs_with_timeout(self, start_wf_mock, messages_mock): + messages_mock.return_value = [] + fetch_name = 'tripleo.support.v1.fetch_logs' + fetch_input = { + 'server_name': 'test', + 'container': 'test', + 'queue_name': 'UUID4', + 'timeout': 59, + } + support.fetch_logs(self.app.client_manager, 'test', 'test', timeout=59) + start_wf_mock.assert_called_once_with(self.workflow, + fetch_name, + workflow_input=fetch_input) + + @mock.patch('tripleoclient.workflows.base.wait_for_messages') + @mock.patch('tripleoclient.workflows.base.start_workflow') + def test_fetch_logs_with_concurrency(self, start_wf_mock, messages_mock): + messages_mock.return_value = [] + fetch_name = 'tripleo.support.v1.fetch_logs' + fetch_input = { + 'server_name': 'test', + 'container': 'test', + 'queue_name': 'UUID4', + 'concurrency': 10, + } + support.fetch_logs(self.app.client_manager, 'test', 'test', + concurrency=10) + start_wf_mock.assert_called_once_with(self.workflow, + fetch_name, + workflow_input=fetch_input) + + +class TestSupportDeleteContainer(fakes.TestDeployOvercloud): + + def setUp(self): + super(TestSupportDeleteContainer, self).setUp() + self.app.client_manager = mock.Mock() + self.app.client_manager.workflow_engine = self.workflow = mock.Mock() + self.tripleoclient = mock.Mock() + self.websocket = mock.Mock() + self.websocket.__enter__ = lambda s: self.websocket + self.websocket.__exit__ = lambda s, *exc: None + self.tripleoclient.messaging_websocket.return_value = self.websocket + self.app.client_manager.tripleoclient = self.tripleoclient + + uuid4_patcher = mock.patch('uuid.uuid4', return_value="UUID4") + self.mock_uuid4 = uuid4_patcher.start() + self.addCleanup(self.mock_uuid4.stop) + + @mock.patch('tripleoclient.workflows.base.wait_for_messages') + @mock.patch('tripleoclient.workflows.base.start_workflow') + def test_delete_container(self, start_wf_mock, messages_mock): + messages_mock.return_value = [] + fetch_name = 'tripleo.support.v1.delete_container' + fetch_input = { + 'container': 'test', + 'queue_name': 'UUID4' + } + support.delete_container(self.app.client_manager, 'test') + start_wf_mock.assert_called_once_with(self.workflow, + fetch_name, + workflow_input=fetch_input) + + @mock.patch('tripleoclient.workflows.base.wait_for_messages') + @mock.patch('tripleoclient.workflows.base.start_workflow') + def test_delete_container_with_timeout(self, start_wf_mock, messages_mock): + messages_mock.return_value = [] + fetch_name = 'tripleo.support.v1.delete_container' + fetch_input = { + 'container': 'test', + 'queue_name': 'UUID4', + 'timeout': 59, + } + support.delete_container(self.app.client_manager, 'test', timeout=59) + start_wf_mock.assert_called_once_with(self.workflow, + fetch_name, + workflow_input=fetch_input) + + @mock.patch('tripleoclient.workflows.base.wait_for_messages') + @mock.patch('tripleoclient.workflows.base.start_workflow') + def test_delete_container_with_concurrency(self, start_wf_mock, + messages_mock): + messages_mock.return_value = [] + fetch_name = 'tripleo.support.v1.delete_container' + fetch_input = { + 'container': 'test', + 'queue_name': 'UUID4', + 'concurrency': 10, + } + support.delete_container(self.app.client_manager, 'test', + concurrency=10) + start_wf_mock.assert_called_once_with(self.workflow, + fetch_name, + workflow_input=fetch_input) + + +class TestDownloadContainer(fakes.TestDeployOvercloud): + def setUp(self): + super(TestDownloadContainer, self).setUp() + + self.app.client_manager.workflow_engine = mock.Mock() + self.app.client_manager.tripleoclient = mock.Mock() + self.app.client_manager.object_store = mock.Mock() + + def test_download_files_not_enough_space(self): + support.check_local_space = mock.MagicMock() + support.check_local_space.return_value = False + oc = self.app.client_manager.object_store + oc.object_list.return_value = [{'bytes': 100}] + self.assertRaises(DownloadError, + support.download_files, + self.app.client_manager, + 'test', + 'test') + + @mock.patch('os.path.exists') + def test_download_files(self, exists_mock): + support.check_local_space = mock.MagicMock() + support.check_local_space.return_value = True + exists_mock.return_value = True + oc = self.app.client_manager.object_store + oc.object_list.return_value = [ + {'name': 'test1'} + ] + oc.object_save = mock.MagicMock() + support.download_files(self.app.client_manager, 'test', '/test') + oc.object_save.assert_called_with(container='test', + object='test1', + file='/test/test1') diff --git a/tripleoclient/v1/overcloud_support.py b/tripleoclient/v1/overcloud_support.py new file mode 100644 index 000000000..e411a1c10 --- /dev/null +++ b/tripleoclient/v1/overcloud_support.py @@ -0,0 +1,107 @@ +# Copyright 2017 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +import logging + +from tripleoclient.workflows import support + +from osc_lib.command import command +from osc_lib.i18n import _ + + +class ReportExecute(command.Command): + """Run sosreport on selected servers.""" + + log = logging.getLogger(__name__ + ".ReportExecute") + + def get_parser(self, prog_name): + parser = super(ReportExecute, self).get_parser(prog_name) + parser.add_argument('server_name', + help=_('Nova server_name or partial name to match.' + ' For example "controller" will match all ' + 'controllers for an environment.')) + parser.add_argument('-c', '--container', dest='container', + default='overcloud_support', + help=_('Swift Container to store logs to')) + parser.add_argument('-o', '--output', dest='destination', + default='support_logs', + help=_('Output directory for the report')) + parser.add_argument('--skip-container-delete', dest='skip_delete', + default=False, + help=_('Do not delete the container after the ' + 'files have been downloaded. Ignored ' + 'if --collect-only or --download-only ' + 'is provided.'), + action='store_true') + parser.add_argument('-t', '--timeout', dest='timeout', type=int, + default=None, + help=_('Maximum time to wait for the log ' + 'collection and container deletion ' + 'workflows to finish.')) + parser.add_argument('-n', '--concurrency', dest='concurrency', + type=int, default=None, + help=_('Number of parallel log collection and ' + 'object deletion tasks to run.')) + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument('--collect-only', dest='collect_only', + help=_('Skip log downloads, only collect logs and ' + 'put in the container'), + default=False, + action='store_true') + group.add_argument('--download-only', dest='download_only', + help=_('Skip generation, only download from ' + 'the provided container'), + default=False, + action='store_true') + return parser + + def take_action(self, parsed_args): + self.log.debug('take_action({})'.format(parsed_args)) + + clients = self.app.client_manager + container = parsed_args.container + server_name = parsed_args.server_name + destination = parsed_args.destination + timeout = parsed_args.timeout + concurrency = parsed_args.concurrency + + if not server_name: + raise Exception(_('Please specify the server_name option.')) + + if not parsed_args.download_only: + print(_('Starting log collection... (This may take a while)')) + try: + support.fetch_logs(clients, container, server_name, + timeout=timeout, concurrency=concurrency) + except Exception as err: + self.log.error('Unable to fetch logs, {}'.format(err)) + raise err + + if not parsed_args.collect_only: + try: + support.download_files(clients, container, destination) + except Exception as err: + self.log.error('Unable to download files, {}'.format(err)) + raise err + + if not parsed_args.collect_only and not parsed_args.download_only and \ + not parsed_args.skip_delete: + print(_('Deleting container') + ' {}...'.format(container)) + try: + support.delete_container(clients, container, timeout=timeout, + concurrency=concurrency) + except Exception as err: + self.log.error('Unable to delete container, {}'.format(err)) + raise err diff --git a/tripleoclient/workflows/support.py b/tripleoclient/workflows/support.py new file mode 100644 index 000000000..0a8be5307 --- /dev/null +++ b/tripleoclient/workflows/support.py @@ -0,0 +1,146 @@ +# Copyright 2017 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +import os +import uuid + +from osc_lib.i18n import _ + +from tripleoclient.exceptions import ContainerDeleteFailed +from tripleoclient.exceptions import DownloadError +from tripleoclient.exceptions import LogFetchError +from tripleoclient.workflows import base + + +def check_local_space(path, object_list): + required_space = sum([x['bytes'] for x in object_list]) + stats = os.statvfs(path) + free_space = stats.f_bavail * stats.f_frsize + return free_space >= required_space + + +def download_files(clients, container_name, destination): + """Downloads log files from a container action + + :param clients: openstack clients + :param container: name of the container to put the logs + :param destination: folder to download files to + """ + oc = clients.object_store + object_list = oc.object_list(container=container_name, all_data=True) + + # handle relative destination path + if not os.path.dirname(destination): + destination = os.path.join(os.sep, os.getcwd(), destination) + + if not os.path.exists(destination): + print('Creating destination path: {}'.format(destination)) + os.makedirs(destination) + + if not check_local_space(destination, object_list): + raise DownloadError(_('Not enough local space to download files.')) + + for data in object_list: + print('Downloading file: {}'.format(data['name'])) + file_path = os.path.join(os.sep, destination, data['name']) + oc.object_save(container=container_name, + object=data['name'], + file=file_path) + + +def fetch_logs(clients, container, server_name, timeout=None, + concurrency=None): + """Executes fetch log action + + :param clients: openstack clients + :param container: name of the container to put the logs + :param server_name: server name to restrict where logs are pulled from + :param timeout: timeout for the log fetch operation + :param concurrency: max number of concurrent log collection tasks + """ + + workflow_input = { + "container": container, + "server_name": server_name, + "queue_name": str(uuid.uuid4()), + } + + if timeout is not None: + workflow_input['timeout'] = timeout + if concurrency is not None: + workflow_input['concurrency'] = concurrency + + workflow_client = clients.workflow_engine + tripleoclients = clients.tripleoclient + queue_name = workflow_input['queue_name'] + + execution = base.start_workflow( + workflow_client, + 'tripleo.support.v1.fetch_logs', + workflow_input=workflow_input + ) + + websocket = tripleoclients.messaging_websocket(queue_name) + messages = base.wait_for_messages(workflow_client, + websocket, + execution, + timeout) + + for message in messages: + if message['status'] != 'SUCCESS': + raise LogFetchError(message['message']) + if message['message']: + print('{}'.format(message['message'])) + + +def delete_container(clients, container, timeout=None, concurrency=None): + """Deletes container from swift + + :param clients: openstack clients + :param container: name of the container where the logs were stored + :param timeout: timeout for the delete operations + :param concurrency: max number of object deletion tasks to run at one time + """ + workflow_input = { + "container": container, + "queue_name": str(uuid.uuid4()), + } + + if timeout is not None: + workflow_input['timeout'] = timeout + if concurrency is not None: + workflow_input['concurrency'] = concurrency + + workflow_client = clients.workflow_engine + tripleoclients = clients.tripleoclient + queue_name = workflow_input['queue_name'] + + execution = base.start_workflow( + workflow_client, + 'tripleo.support.v1.delete_container', + workflow_input=workflow_input + ) + + websocket = tripleoclients.messaging_websocket(queue_name) + messages = base.wait_for_messages(workflow_client, + websocket, + execution, + timeout) + + for message in messages: + if message['status'] != 'SUCCESS': + raise ContainerDeleteFailed(message['message']) + if message['message']: + print('{}'.format(message['message']))