Add overcloud support report collect command

This command is to be used by an operator to run sosreport on
specific set of servers (or all) and retrieve log bundles that can
be used to debug the status of the cluster or troubleshoot issues.

Depends-On: I47c486d14c46a653c61cfd92d9f484efe0407217
Change-Id: I45699dfa6eb3e83d419c7041dbb72cc5d5e4f0ea
Implements-Blueprint: capture-environment-status-and-logs
This commit is contained in:
Alex Schultz 2016-11-30 15:34:41 -07:00
parent 026cb4a76e
commit 1e69403a9e
8 changed files with 591 additions and 0 deletions

View File

@ -0,0 +1,7 @@
---
features:
- |
Implemented new 'openstack overcloud support report' command to execute
a log collection and retrieval against overcloud nodes. This new command
allows an operator to perform sosreport retrieval from all nodes or
specific nodes based on their server name.

View File

@ -85,6 +85,7 @@ openstack.tripleoclient.v1 =
overcloud_profiles_match = tripleoclient.v1.overcloud_profiles:MatchProfiles
overcloud_profiles_list = tripleoclient.v1.overcloud_profiles:ListProfiles
overcloud_raid_create = tripleoclient.v1.overcloud_raid:CreateRAID
overcloud_support_report_collect = tripleoclient.v1.overcloud_support:ReportExecute
overcloud_update_clear_breakpoints = tripleoclient.v1.overcloud_update:ClearBreakpointsOvercloud
overcloud_update_stack = tripleoclient.v1.overcloud_update:UpdateOvercloud
overcloud_execute = tripleoclient.v1.overcloud_execute:RemoteExecute

View File

@ -87,3 +87,15 @@ class WorkflowActionError(Exception):
def __init__(self, message, action='', output=''):
message = message.format(action, output)
super(WorkflowActionError, self).__init__(message)
class DownloadError(Exception):
"""Download attempt failed"""
class LogFetchError(Exception):
"""Fetching logs failed"""
class ContainerDeleteFailed(Exception):
"""Container deletion failed"""

View File

@ -0,0 +1,133 @@
# Copyright 2017 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import mock
from tripleoclient.tests.v1.overcloud_deploy import fakes
from tripleoclient.v1 import overcloud_support
class TestOvercloudSupportReport(fakes.TestDeployOvercloud):
def setUp(self):
super(TestOvercloudSupportReport, self).setUp()
self.cmd = overcloud_support.ReportExecute(self.app, None)
self.app.client_manager.workflow_engine = mock.Mock()
self.app.client_manager.tripleoclient = mock.Mock()
self.app.client_manager.object_store = mock.Mock()
self.workflow = self.app.client_manager.workflow_engine
self.swift = self.app.client_manager.object_store
@mock.patch('tripleoclient.workflows.support.download_files')
@mock.patch('tripleoclient.workflows.support.delete_container')
@mock.patch('tripleoclient.workflows.support.fetch_logs')
def test_action(self, fetch_logs_mock, delete_container_mock,
download_files_mock):
arglist = ['-c', 'mycontainer', '-t', '60', 'control']
verifylist = [
('server_name', 'control'),
('container', 'mycontainer'),
('timeout', 60)
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
fetch_logs_mock.assert_called_once_with(self.app.client_manager,
parsed_args.container,
parsed_args.server_name,
timeout=60,
concurrency=None)
download_files_mock.assert_called_once_with(
self.app.client_manager, parsed_args.container,
parsed_args.destination)
delete_container_mock.assert_called_once_with(self.app.client_manager,
parsed_args.container,
timeout=60,
concurrency=None)
@mock.patch('tripleoclient.workflows.support.download_files')
@mock.patch('tripleoclient.workflows.support.delete_container')
@mock.patch('tripleoclient.workflows.support.fetch_logs')
def test_action_skip_container_delete(self, fetch_logs_mock,
delete_container_mock,
download_files_mock):
arglist = ['-c', 'mycontainer', '--skip-container-delete', 'control']
verifylist = [
('server_name', 'control'),
('container', 'mycontainer')
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
fetch_logs_mock.assert_called_once_with(self.app.client_manager,
parsed_args.container,
parsed_args.server_name,
timeout=None,
concurrency=None)
download_files_mock.assert_called_once_with(
self.app.client_manager, parsed_args.container,
parsed_args.destination)
delete_container_mock.assert_not_called()
@mock.patch('tripleoclient.workflows.support.delete_container')
@mock.patch('tripleoclient.workflows.support.fetch_logs')
def test_action_collect_logs_only(self, fetch_logs_mock,
delete_container_mock):
arglist = ['--collect-only', '-t', '60', '-n', '10', 'control']
verifylist = [
('server_name', 'control'),
('collect_only', True),
('timeout', 60),
('concurrency', 10)
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
fetch_logs_mock.assert_called_once_with(self.app.client_manager,
parsed_args.container,
parsed_args.server_name,
timeout=60,
concurrency=10)
delete_container_mock.assert_not_called()
@mock.patch('tripleoclient.workflows.support.download_files')
@mock.patch('tripleoclient.workflows.support.delete_container')
@mock.patch('tripleoclient.workflows.support.fetch_logs')
def test_action_download_logs_only(self, fetch_logs_mock,
delete_container_mock,
download_files_mock):
arglist = ['--download-only', 'control']
verifylist = [
('server_name', 'control'),
('download_only', True),
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
fetch_logs_mock.assert_not_called()
delete_container_mock.assert_not_called()
download_files_mock.assert_called_once_with(
self.app.client_manager, parsed_args.container,
parsed_args.destination)

View File

@ -0,0 +1,185 @@
# Copyright 2017 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import mock
from tripleoclient.exceptions import DownloadError
from tripleoclient.tests.v1.overcloud_deploy import fakes
from tripleoclient.workflows import support
class TestSupportFetchLogs(fakes.TestDeployOvercloud):
def setUp(self):
super(TestSupportFetchLogs, self).setUp()
self.app.client_manager = mock.Mock()
self.app.client_manager.workflow_engine = self.workflow = mock.Mock()
self.tripleoclient = mock.Mock()
self.websocket = mock.Mock()
self.websocket.__enter__ = lambda s: self.websocket
self.websocket.__exit__ = lambda s, *exc: None
self.tripleoclient.messaging_websocket.return_value = self.websocket
self.app.client_manager.tripleoclient = self.tripleoclient
uuid4_patcher = mock.patch('uuid.uuid4', return_value="UUID4")
self.mock_uuid4 = uuid4_patcher.start()
self.addCleanup(self.mock_uuid4.stop)
@mock.patch('tripleoclient.workflows.base.wait_for_messages')
@mock.patch('tripleoclient.workflows.base.start_workflow')
def test_fetch_logs(self, start_wf_mock, messages_mock):
messages_mock.return_value = []
fetch_name = 'tripleo.support.v1.fetch_logs'
fetch_input = {
'server_name': 'test',
'container': 'test',
'queue_name': 'UUID4'
}
support.fetch_logs(self.app.client_manager, 'test', 'test')
start_wf_mock.assert_called_once_with(self.workflow,
fetch_name,
workflow_input=fetch_input)
@mock.patch('tripleoclient.workflows.base.wait_for_messages')
@mock.patch('tripleoclient.workflows.base.start_workflow')
def test_fetch_logs_with_timeout(self, start_wf_mock, messages_mock):
messages_mock.return_value = []
fetch_name = 'tripleo.support.v1.fetch_logs'
fetch_input = {
'server_name': 'test',
'container': 'test',
'queue_name': 'UUID4',
'timeout': 59,
}
support.fetch_logs(self.app.client_manager, 'test', 'test', timeout=59)
start_wf_mock.assert_called_once_with(self.workflow,
fetch_name,
workflow_input=fetch_input)
@mock.patch('tripleoclient.workflows.base.wait_for_messages')
@mock.patch('tripleoclient.workflows.base.start_workflow')
def test_fetch_logs_with_concurrency(self, start_wf_mock, messages_mock):
messages_mock.return_value = []
fetch_name = 'tripleo.support.v1.fetch_logs'
fetch_input = {
'server_name': 'test',
'container': 'test',
'queue_name': 'UUID4',
'concurrency': 10,
}
support.fetch_logs(self.app.client_manager, 'test', 'test',
concurrency=10)
start_wf_mock.assert_called_once_with(self.workflow,
fetch_name,
workflow_input=fetch_input)
class TestSupportDeleteContainer(fakes.TestDeployOvercloud):
def setUp(self):
super(TestSupportDeleteContainer, self).setUp()
self.app.client_manager = mock.Mock()
self.app.client_manager.workflow_engine = self.workflow = mock.Mock()
self.tripleoclient = mock.Mock()
self.websocket = mock.Mock()
self.websocket.__enter__ = lambda s: self.websocket
self.websocket.__exit__ = lambda s, *exc: None
self.tripleoclient.messaging_websocket.return_value = self.websocket
self.app.client_manager.tripleoclient = self.tripleoclient
uuid4_patcher = mock.patch('uuid.uuid4', return_value="UUID4")
self.mock_uuid4 = uuid4_patcher.start()
self.addCleanup(self.mock_uuid4.stop)
@mock.patch('tripleoclient.workflows.base.wait_for_messages')
@mock.patch('tripleoclient.workflows.base.start_workflow')
def test_delete_container(self, start_wf_mock, messages_mock):
messages_mock.return_value = []
fetch_name = 'tripleo.support.v1.delete_container'
fetch_input = {
'container': 'test',
'queue_name': 'UUID4'
}
support.delete_container(self.app.client_manager, 'test')
start_wf_mock.assert_called_once_with(self.workflow,
fetch_name,
workflow_input=fetch_input)
@mock.patch('tripleoclient.workflows.base.wait_for_messages')
@mock.patch('tripleoclient.workflows.base.start_workflow')
def test_delete_container_with_timeout(self, start_wf_mock, messages_mock):
messages_mock.return_value = []
fetch_name = 'tripleo.support.v1.delete_container'
fetch_input = {
'container': 'test',
'queue_name': 'UUID4',
'timeout': 59,
}
support.delete_container(self.app.client_manager, 'test', timeout=59)
start_wf_mock.assert_called_once_with(self.workflow,
fetch_name,
workflow_input=fetch_input)
@mock.patch('tripleoclient.workflows.base.wait_for_messages')
@mock.patch('tripleoclient.workflows.base.start_workflow')
def test_delete_container_with_concurrency(self, start_wf_mock,
messages_mock):
messages_mock.return_value = []
fetch_name = 'tripleo.support.v1.delete_container'
fetch_input = {
'container': 'test',
'queue_name': 'UUID4',
'concurrency': 10,
}
support.delete_container(self.app.client_manager, 'test',
concurrency=10)
start_wf_mock.assert_called_once_with(self.workflow,
fetch_name,
workflow_input=fetch_input)
class TestDownloadContainer(fakes.TestDeployOvercloud):
def setUp(self):
super(TestDownloadContainer, self).setUp()
self.app.client_manager.workflow_engine = mock.Mock()
self.app.client_manager.tripleoclient = mock.Mock()
self.app.client_manager.object_store = mock.Mock()
def test_download_files_not_enough_space(self):
support.check_local_space = mock.MagicMock()
support.check_local_space.return_value = False
oc = self.app.client_manager.object_store
oc.object_list.return_value = [{'bytes': 100}]
self.assertRaises(DownloadError,
support.download_files,
self.app.client_manager,
'test',
'test')
@mock.patch('os.path.exists')
def test_download_files(self, exists_mock):
support.check_local_space = mock.MagicMock()
support.check_local_space.return_value = True
exists_mock.return_value = True
oc = self.app.client_manager.object_store
oc.object_list.return_value = [
{'name': 'test1'}
]
oc.object_save = mock.MagicMock()
support.download_files(self.app.client_manager, 'test', '/test')
oc.object_save.assert_called_with(container='test',
object='test1',
file='/test/test1')

View File

@ -0,0 +1,107 @@
# Copyright 2017 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import logging
from tripleoclient.workflows import support
from osc_lib.command import command
from osc_lib.i18n import _
class ReportExecute(command.Command):
"""Run sosreport on selected servers."""
log = logging.getLogger(__name__ + ".ReportExecute")
def get_parser(self, prog_name):
parser = super(ReportExecute, self).get_parser(prog_name)
parser.add_argument('server_name',
help=_('Nova server_name or partial name to match.'
' For example "controller" will match all '
'controllers for an environment.'))
parser.add_argument('-c', '--container', dest='container',
default='overcloud_support',
help=_('Swift Container to store logs to'))
parser.add_argument('-o', '--output', dest='destination',
default='support_logs',
help=_('Output directory for the report'))
parser.add_argument('--skip-container-delete', dest='skip_delete',
default=False,
help=_('Do not delete the container after the '
'files have been downloaded. Ignored '
'if --collect-only or --download-only '
'is provided.'),
action='store_true')
parser.add_argument('-t', '--timeout', dest='timeout', type=int,
default=None,
help=_('Maximum time to wait for the log '
'collection and container deletion '
'workflows to finish.'))
parser.add_argument('-n', '--concurrency', dest='concurrency',
type=int, default=None,
help=_('Number of parallel log collection and '
'object deletion tasks to run.'))
group = parser.add_mutually_exclusive_group(required=False)
group.add_argument('--collect-only', dest='collect_only',
help=_('Skip log downloads, only collect logs and '
'put in the container'),
default=False,
action='store_true')
group.add_argument('--download-only', dest='download_only',
help=_('Skip generation, only download from '
'the provided container'),
default=False,
action='store_true')
return parser
def take_action(self, parsed_args):
self.log.debug('take_action({})'.format(parsed_args))
clients = self.app.client_manager
container = parsed_args.container
server_name = parsed_args.server_name
destination = parsed_args.destination
timeout = parsed_args.timeout
concurrency = parsed_args.concurrency
if not server_name:
raise Exception(_('Please specify the server_name option.'))
if not parsed_args.download_only:
print(_('Starting log collection... (This may take a while)'))
try:
support.fetch_logs(clients, container, server_name,
timeout=timeout, concurrency=concurrency)
except Exception as err:
self.log.error('Unable to fetch logs, {}'.format(err))
raise err
if not parsed_args.collect_only:
try:
support.download_files(clients, container, destination)
except Exception as err:
self.log.error('Unable to download files, {}'.format(err))
raise err
if not parsed_args.collect_only and not parsed_args.download_only and \
not parsed_args.skip_delete:
print(_('Deleting container') + ' {}...'.format(container))
try:
support.delete_container(clients, container, timeout=timeout,
concurrency=concurrency)
except Exception as err:
self.log.error('Unable to delete container, {}'.format(err))
raise err

View File

@ -0,0 +1,146 @@
# Copyright 2017 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import os
import uuid
from osc_lib.i18n import _
from tripleoclient.exceptions import ContainerDeleteFailed
from tripleoclient.exceptions import DownloadError
from tripleoclient.exceptions import LogFetchError
from tripleoclient.workflows import base
def check_local_space(path, object_list):
required_space = sum([x['bytes'] for x in object_list])
stats = os.statvfs(path)
free_space = stats.f_bavail * stats.f_frsize
return free_space >= required_space
def download_files(clients, container_name, destination):
"""Downloads log files from a container action
:param clients: openstack clients
:param container: name of the container to put the logs
:param destination: folder to download files to
"""
oc = clients.object_store
object_list = oc.object_list(container=container_name, all_data=True)
# handle relative destination path
if not os.path.dirname(destination):
destination = os.path.join(os.sep, os.getcwd(), destination)
if not os.path.exists(destination):
print('Creating destination path: {}'.format(destination))
os.makedirs(destination)
if not check_local_space(destination, object_list):
raise DownloadError(_('Not enough local space to download files.'))
for data in object_list:
print('Downloading file: {}'.format(data['name']))
file_path = os.path.join(os.sep, destination, data['name'])
oc.object_save(container=container_name,
object=data['name'],
file=file_path)
def fetch_logs(clients, container, server_name, timeout=None,
concurrency=None):
"""Executes fetch log action
:param clients: openstack clients
:param container: name of the container to put the logs
:param server_name: server name to restrict where logs are pulled from
:param timeout: timeout for the log fetch operation
:param concurrency: max number of concurrent log collection tasks
"""
workflow_input = {
"container": container,
"server_name": server_name,
"queue_name": str(uuid.uuid4()),
}
if timeout is not None:
workflow_input['timeout'] = timeout
if concurrency is not None:
workflow_input['concurrency'] = concurrency
workflow_client = clients.workflow_engine
tripleoclients = clients.tripleoclient
queue_name = workflow_input['queue_name']
execution = base.start_workflow(
workflow_client,
'tripleo.support.v1.fetch_logs',
workflow_input=workflow_input
)
websocket = tripleoclients.messaging_websocket(queue_name)
messages = base.wait_for_messages(workflow_client,
websocket,
execution,
timeout)
for message in messages:
if message['status'] != 'SUCCESS':
raise LogFetchError(message['message'])
if message['message']:
print('{}'.format(message['message']))
def delete_container(clients, container, timeout=None, concurrency=None):
"""Deletes container from swift
:param clients: openstack clients
:param container: name of the container where the logs were stored
:param timeout: timeout for the delete operations
:param concurrency: max number of object deletion tasks to run at one time
"""
workflow_input = {
"container": container,
"queue_name": str(uuid.uuid4()),
}
if timeout is not None:
workflow_input['timeout'] = timeout
if concurrency is not None:
workflow_input['concurrency'] = concurrency
workflow_client = clients.workflow_engine
tripleoclients = clients.tripleoclient
queue_name = workflow_input['queue_name']
execution = base.start_workflow(
workflow_client,
'tripleo.support.v1.delete_container',
workflow_input=workflow_input
)
websocket = tripleoclients.messaging_websocket(queue_name)
messages = base.wait_for_messages(workflow_client,
websocket,
execution,
timeout)
for message in messages:
if message['status'] != 'SUCCESS':
raise ContainerDeleteFailed(message['message'])
if message['message']:
print('{}'.format(message['message']))