Catch all errors and provide failure summary

In order for the gates to get a meaningful metric to pass/fail against,
write out the percent of failed attempts (that is, attempts that caused
some exception) after receiving a termination/interrupt.

Along with this, change the except clause in the test loop to catch all
failures and log them, rather than aborting the program. This will allow
the program to keep running until explicitly stopped, providing more
accurate results.

Since the exceptions are being caught at the loop level, the tests
themselves can use a 'happy path,' without doing their own error
handling. This makes implementation of tests more straightforward.

Change-Id: I10436d1f3e99234aa167ab7a765e59d46d54eeb8
This commit is contained in:
Nolan Brubaker 2017-05-22 15:12:31 -04:00
parent 325076bef6
commit 77a61e8cdd

View File

@ -15,13 +15,12 @@
#
# (c) 2017, Nolan Brubaker <nolan.brubaker@rackspace.com>
# Necessary for accurate failure rate calculation
from __future__ import division
import argparse
import datetime
from keystoneauth1.identity import v3
from keystoneauth1 import session
from keystoneauth1.exceptions.connection import ConnectFailure
from keystoneauth1.exceptions.http import BadGateway
from keystoneauth1.exceptions.http import InternalServerError
from keystoneclient.v3 import client as key_client
import logging
import os
@ -29,7 +28,6 @@ import signal
import sys
import time
from glanceclient import Client
from glanceclient import exc as glance_exc
import tempfile
logger = logging.getLogger(__name__)
@ -142,13 +140,8 @@ class KeystoneTest(ServiceTest):
sess = session.Session(auth=auth)
keystone = key_client.Client(session=sess)
# Only catch the 500 errors; let connection failures be handled by
# the test_loop function
try:
keystone.projects.list()
msg = "Project list retrieved"
except InternalServerError:
msg = "Failed to get project list"
keystone.projects.list()
msg = "Project list retrieved"
return msg
@ -171,15 +164,9 @@ class GlanceTest(ServiceTest):
image = glance.images.create(name="Rolling test",
disk_format="raw",
container_format="bare")
try:
glance.images.upload(image.id, self.temp_file)
except glance_exc.HTTPInternalServerError:
# TODO: set msg and error type instead.
logger.error("Failed to upload")
return
finally:
glance.images.delete(image.id)
self.temp_file.close()
glance.images.upload(image.id, self.temp_file)
glance.images.delete(image.id)
self.temp_file.close()
msg = "Image created and deleted."
return msg
@ -204,11 +191,18 @@ class TestRunner(object):
def __init__(self):
signal.signal(signal.SIGINT, self.prep_exit)
signal.signal(signal.SIGTERM, self.prep_exit)
self.failures = 0
self.attempts = 0
def prep_exit(self, signum, frame):
self.stop_now = True
logger.info("Received signal, stopping")
def write_summary(self):
percentage = (self.failures / self.attempts) * 100
# Display minimum of 2 digits, but don't use decimals.
logger.info("%2.0f%% failure rate", percentage)
def test_loop(self, test):
"""Main loop to execute tests
@ -222,14 +216,8 @@ class TestRunner(object):
against an OpenStack service API.
"""
disconnected = None
# Has to be a tuple for python syntax reasons.
# This is currently the set needed for glance; should probably
# provide some way of letting a test say which exceptions should
# be caught for a service.
exc_list = (ConnectFailure, InternalServerError, BadGateway,
glance_exc.CommunicationError,
glance_exc.HTTPInternalServerError)
while True:
self.attempts += 1
try:
# Pause for a bit so we're not generating more data than we
# can handle
@ -262,11 +250,19 @@ class TestRunner(object):
except NotImplementedError:
pass
except (exc_list):
# Catch all exceptions not handled by the tests themselves,
# since we want to keep the loop running until explicitly stopped
except Exception as e:
self.failures += 1
if not disconnected:
disconnected = datetime.datetime.now()
# OpenStack API exceptions put their info in the 'details'
# attribute; 'message' is the standard one.
error_msg = getattr(e, 'details', e.message)
logger.error("%s", error_msg)
if self.stop_now:
self.write_summary()
sys.exit()