Implement "openstack stack failures list"

A command which prints failed resources in the stack to help debug
stack issues.

Failed resources are added by recursing from the top level stack into
failed nested stack resources. A failed nested stack resource is only
added to the failed list if it contains no failed resources so that the
user only sees root-cause failed resources.

As an added convenience to debugging deployment resources, deployment
outputs deploy_stdout and deploy_stderr are printed as well (the full
outputs are printed when --long is specified).

The output is a yamlish format where the key is a dot-delimited resource
name path and the value is a dict of resource information.

Change-Id: Ifa44175d8dbab2f7b62691b839e1696e3af7f4f3
Closes-Bug: #1585820
This commit is contained in:
Steve Baker 2016-02-17 11:17:53 +13:00
parent 7299dd2fde
commit 3cde580165
4 changed files with 400 additions and 0 deletions

View File

@ -81,6 +81,8 @@ def print_software_deployment_output(data, name, out=sys.stdout, long=False):
The format attempts to be valid yaml, but is primarily aimed at showing
useful information to the user in a helpful layout.
"""
if data is None:
data = {}
if name in ('deploy_stdout', 'deploy_stderr'):
output = indent_and_truncate(
data.get(name),

View File

@ -0,0 +1,136 @@
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import collections
from cliff import command
from heatclient.common import format_utils
from heatclient import exc
from heatclient.openstack.common._i18n import _
class ListStackFailures(command.Command):
"""Show information about failed stack resources."""
def take_action(self, parsed_args):
self.heat_client = self.app.client_manager.orchestration
failures = self._build_failed_resources(parsed_args.stack)
deployment_failures = self._build_software_deployments(failures)
self._print_failures(failures, deployment_failures,
long=parsed_args.long)
def get_parser(self, prog_name):
parser = super(ListStackFailures, self).get_parser(prog_name)
parser.add_argument(
'stack',
metavar='<stack>',
help=_('Stack to display (name or ID)'),
)
parser.add_argument(
'--long',
action='store_true',
default=False,
help=_('Show full deployment logs in output'),
)
return parser
def _build_failed_resources(self, stack):
"""List information about FAILED stack resources.
Failed resources are added by recursing from the top level stack into
failed nested stack resources. A failed nested stack resource is only
added to the failed list if it contains no failed resources.
"""
s = self.heat_client.stacks.get(stack)
if s.status != 'FAILED':
return []
resources = self.heat_client.resources.list(s.id)
failures = collections.OrderedDict()
self._append_failed_resources(failures, resources, [s.stack_name])
return failures
def _append_failed_resources(self, failures, resources, resource_path):
"""Recursively build list of failed resources."""
appended = False
for r in resources:
if not r.resource_status.endswith('FAILED'):
continue
# determine if this resources is a nested stack
links_rel = list([l['rel'] for l in r.links])
is_nested = 'nested' in links_rel
nested_appended = False
next_resource_path = list(resource_path)
next_resource_path.append(r.resource_name)
if is_nested:
try:
nested_resources = self.heat_client.resources.list(
r.physical_resource_id)
nested_appended = self._append_failed_resources(
failures, nested_resources, next_resource_path)
except exc.HTTPNotFound:
# there is a failed resource but no stack
pass
if not nested_appended:
failures['.'.join(next_resource_path)] = r
appended = True
return appended
def _build_software_deployments(self, resources):
"""Build a dict of software deployments from the supplied resources.
The key is the deployment ID.
"""
df = {}
if not resources:
return df
for r in resources.values():
if r.resource_type not in ('OS::Heat::StructuredDeployment',
'OS::Heat::SoftwareDeployment'):
continue
try:
sd = self.heat_client.software_deployments.get(
deployment_id=r.physical_resource_id)
df[r.physical_resource_id] = sd
except exc.HTTPNotFound:
pass
return df
def _print_failures(self, failures, deployment_failures, long=False):
"""Print failed resources.
If the resource is a deployment resource, look up the deployment and
print deploy_stdout and deploy_stderr.
"""
out = self.app.stdout
if not failures:
return
for k, f in failures.items():
out.write('%s:\n' % k)
out.write(' resource_type: %s\n' % f.resource_type)
out.write(' physical_resource_id: %s\n' %
f.physical_resource_id)
out.write(' status: %s\n' % f.resource_status)
reason = format_utils.indent_and_truncate(
f.resource_status_reason,
spaces=4,
truncate=not long,
truncate_prefix='...\n')
out.write(' status_reason: |\n%s\n' % reason)
df = deployment_failures.get(f.physical_resource_id)
if df:
for output in ('deploy_stdout', 'deploy_stderr'):
format_utils.print_software_deployment_output(
data=df.output_values, name=output, long=long, out=out)

View File

@ -0,0 +1,261 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import mock
from heatclient import exc
from heatclient.osc.v1 import stack_failures
from heatclient.tests.unit.osc.v1 import fakes as orchestration_fakes
class ListStackFailuresTest(orchestration_fakes.TestOrchestrationv1):
def setUp(self):
super(ListStackFailuresTest, self).setUp()
self.cmd = stack_failures.ListStackFailures(self.app, None)
self.cmd.heat_client = self.app.client_manager.orchestration
self.stack_client = self.app.client_manager.orchestration.stacks
self.resource_client = self.app.client_manager.orchestration.resources
self.software_deployments_client = \
self.app.client_manager.orchestration.software_deployments
self.stack = mock.MagicMock(id='123', status='FAILED',
stack_name='stack')
self.stack_client.get.return_value = self.stack
self.failed_template_resource = mock.MagicMock(
physical_resource_id='aaaa',
resource_type='My::TemplateResource',
resource_status='CREATE_FAILED',
links=[{'rel': 'nested'}],
resource_name='my_templateresource',
resource_status_reason='All gone Pete Tong',
logical_resource_id='my_templateresource',
)
self.failed_resource = mock.MagicMock(
physical_resource_id='cccc',
resource_type='OS::Nova::Server',
resource_status='CREATE_FAILED',
links=[],
resource_name='my_server',
resource_status_reason='All gone Pete Tong',
logical_resource_id='my_server',
)
self.other_failed_template_resource = mock.MagicMock(
physical_resource_id='dddd',
resource_type='My::OtherTemplateResource',
resource_status='CREATE_FAILED',
links=[{'rel': 'nested'}],
resource_name='my_othertemplateresource',
resource_status_reason='RPC timeout',
logical_resource_id='my_othertemplateresource',
)
self.working_resource = mock.MagicMock(
physical_resource_id='bbbb',
resource_type='OS::Nova::Server',
resource_status='CREATE_COMPLETE',
resource_name='my_server',
)
self.failed_deployment_resource = mock.MagicMock(
physical_resource_id='eeee',
resource_type='OS::Heat::SoftwareDeployment',
resource_status='CREATE_FAILED',
links=[],
resource_name='my_deployment',
resource_status_reason='Returned deploy_statuscode 1',
logical_resource_id='my_deployment',
)
self.failed_deployment = mock.MagicMock(
id='eeee',
output_values={
'deploy_statuscode': '1',
'deploy_stderr': 'It broke',
'deploy_stdout': ('1\n2\n3\n4\n5\n6\n7\n8\n9\n10'
'\n11\n12')
},
)
self.software_deployments_client.get.return_value = (
self.failed_deployment)
def test_build_failed_none(self):
self.stack = mock.MagicMock(id='123', status='COMPLETE',
stack_name='stack')
failures = self.cmd._build_failed_resources('stack')
expected = collections.OrderedDict()
self.assertEqual(expected, failures)
def test_build_failed_resources(self):
self.resource_client.list.side_effect = [[
# resource-list stack
self.failed_template_resource,
self.other_failed_template_resource,
self.working_resource,
], [ # resource-list aaaa
self.failed_resource
], [ # resource-list dddd
]]
failures = self.cmd._build_failed_resources('stack')
expected = collections.OrderedDict()
expected['stack.my_templateresource.my_server'] = self.failed_resource
expected['stack.my_othertemplateresource'] = (
self.other_failed_template_resource)
self.assertEqual(expected, failures)
def test_build_failed_resources_not_found(self):
self.resource_client.list.side_effect = [[
# resource-list stack
self.failed_template_resource,
self.other_failed_template_resource,
self.working_resource,
], exc.HTTPNotFound(), [ # resource-list dddd
]]
failures = self.cmd._build_failed_resources('stack')
expected = collections.OrderedDict()
expected['stack.my_templateresource'] = self.failed_template_resource
expected['stack.my_othertemplateresource'] = (
self.other_failed_template_resource)
self.assertEqual(expected, failures)
def test_build_software_deployments(self):
resources = {
'stack.my_server': self.working_resource,
'stack.my_deployment': self.failed_deployment_resource
}
deployments = self.cmd._build_software_deployments(resources)
self.assertEqual({
'eeee': self.failed_deployment
}, deployments)
def test_build_software_deployments_not_found(self):
resources = {
'stack.my_server': self.working_resource,
'stack.my_deployment': self.failed_deployment_resource
}
self.software_deployments_client.get.side_effect = exc.HTTPNotFound()
deployments = self.cmd._build_software_deployments(resources)
self.assertEqual({}, deployments)
def test_build_software_deployments_no_resources(self):
resources = {}
self.software_deployments_client.get.side_effect = exc.HTTPNotFound()
deployments = self.cmd._build_software_deployments(resources)
self.assertEqual({}, deployments)
def test_list_stack_failures(self):
self.resource_client.list.side_effect = [[
# resource-list stack
self.failed_template_resource,
self.other_failed_template_resource,
self.working_resource,
self.failed_deployment_resource
], [ # resource-list aaaa
self.failed_resource
], [ # resource-list dddd
]]
arglist = ['stack']
parsed_args = self.check_parser(self.cmd, arglist, [])
self.cmd.take_action(parsed_args)
self.assertEqual(
self.app.stdout.make_string(),
'''stack.my_templateresource.my_server:
resource_type: OS::Nova::Server
physical_resource_id: cccc
status: CREATE_FAILED
status_reason: |
All gone Pete Tong
stack.my_othertemplateresource:
resource_type: My::OtherTemplateResource
physical_resource_id: dddd
status: CREATE_FAILED
status_reason: |
RPC timeout
stack.my_deployment:
resource_type: OS::Heat::SoftwareDeployment
physical_resource_id: eeee
status: CREATE_FAILED
status_reason: |
Returned deploy_statuscode 1
deploy_stdout: |
...
3
4
5
6
7
8
9
10
11
12
(truncated, view all with --long)
deploy_stderr: |
It broke
''')
def test_list_stack_failures_long(self):
self.resource_client.list.side_effect = [[
# resource-list stack
self.failed_template_resource,
self.other_failed_template_resource,
self.working_resource,
self.failed_deployment_resource
], [ # resource-list aaaa
self.failed_resource
], [ # resource-list dddd
]]
arglist = ['--long', 'stack']
parsed_args = self.check_parser(self.cmd, arglist, [])
self.cmd.take_action(parsed_args)
self.assertEqual(
self.app.stdout.make_string(),
'''stack.my_templateresource.my_server:
resource_type: OS::Nova::Server
physical_resource_id: cccc
status: CREATE_FAILED
status_reason: |
All gone Pete Tong
stack.my_othertemplateresource:
resource_type: My::OtherTemplateResource
physical_resource_id: dddd
status: CREATE_FAILED
status_reason: |
RPC timeout
stack.my_deployment:
resource_type: OS::Heat::SoftwareDeployment
physical_resource_id: eeee
status: CREATE_FAILED
status_reason: |
Returned deploy_statuscode 1
deploy_stdout: |
1
2
3
4
5
6
7
8
9
10
11
12
deploy_stderr: |
It broke
''')

View File

@ -56,6 +56,7 @@ openstack.orchestration.v1 =
stack_event_list = heatclient.osc.v1.event:ListEvent
stack_event_show = heatclient.osc.v1.event:ShowEvent
stack_environment_show = heatclient.osc.v1.stack:EnvironmentShowStack
stack_failures_list = heatclient.osc.v1.stack_failures:ListStackFailures
stack_hook_clear = heatclient.osc.v1.stack:StackHookClear
stack_hook_poll = heatclient.osc.v1.stack:StackHookPoll
stack_list = heatclient.osc.v1.stack:ListStack