Make status description field more useful

If during operating cluster some error occurs, status
description field for cluster will be updated with
exception message

Change-Id: I8973a4ac0d1dbab9c58787d385c12451eb67b1ef
Closes-bug: #1419136
This commit is contained in:
Vitaly Gridnev 2015-02-09 15:39:09 +03:00
parent e8c8880d35
commit 5b30113fca
4 changed files with 123 additions and 42 deletions

View File

@ -76,10 +76,10 @@ def scale_cluster(id, data):
cluster = g.change_cluster_status(cluster, "Validating")
quotas.check_scaling(cluster, to_be_enlarged, additional)
plugin.validate_scaling(cluster, to_be_enlarged, additional)
except Exception:
except Exception as e:
with excutils.save_and_reraise_exception():
g.clean_cluster_from_empty_ng(cluster)
g.change_cluster_status(cluster, "Active")
g.change_cluster_status(cluster, "Active", six.text_type(e))
# If we are here validation is successful.
# So let's update to_be_enlarged map:
@ -109,7 +109,7 @@ def create_cluster(values):
except Exception as e:
with excutils.save_and_reraise_exception():
g.change_cluster_status(cluster, "Error",
status_description=six.text_type(e))
six.text_type(e))
OPS.provision_cluster(cluster.id)

View File

@ -14,6 +14,7 @@
# limitations under the License.
import functools
import six
import uuid
from oslo_config import cfg
@ -23,6 +24,7 @@ import oslo_messaging as messaging
from sahara import conductor as c
from sahara import context
from sahara import exceptions
from sahara.i18n import _
from sahara.i18n import _LE
from sahara.i18n import _LI
from sahara.plugins import base as plugin_base
@ -150,48 +152,56 @@ class OpsServer(rpc_utils.RPCServer):
return INFRA.get_type_and_version()
def ops_error_handler(f):
@functools.wraps(f)
def wrapper(cluster_id, *args, **kwds):
try:
f(cluster_id, *args, **kwds)
except Exception as ex:
# something happened during cluster operation
def ops_error_handler(description):
def decorator(f):
@functools.wraps(f)
def wrapper(cluster_id, *args, **kwds):
ctx = context.ctx()
cluster = conductor.cluster_get(ctx, cluster_id)
# check if cluster still exists (it might have been removed)
if cluster is None or cluster.status == 'Deleting':
LOG.info(_LI("Cluster %s was deleted or marked for "
"deletion. Canceling current operation."),
cluster_id)
return
LOG.exception(
_LE("Error during operating cluster '%(name)s' (reason: "
"%(reason)s)"), {'name': cluster.name, 'reason': ex})
try:
# trying to rollback
if _rollback_cluster(cluster, ex):
g.change_cluster_status(cluster, "Active")
else:
g.change_cluster_status(cluster, "Error")
except Exception as rex:
# Clearing status description before executing
g.change_cluster_status_description(cluster_id, "")
f(cluster_id, *args, **kwds)
except Exception as ex:
# something happened during cluster operation
cluster = conductor.cluster_get(ctx, cluster_id)
# check if cluster still exists (it might have been
# removed during rollback)
if cluster is None:
LOG.info(_LI("Cluster with %s was deleted. Canceling "
"current operation."), cluster_id)
# check if cluster still exists (it might have been removed)
if cluster is None or cluster.status == 'Deleting':
LOG.info(_LI("Cluster id={id} was deleted or "
"marked for deletion. Canceling "
"current operation.").format(id=cluster_id))
return
msg = six.text_type(ex)
LOG.exception(
_LE("Error during rollback of cluster '%(name)s' (reason: "
"%(reason)s)"), {'name': cluster.name, 'reason': rex})
_LE("Error during operating on cluster {name} (reason: "
"{reason})").format(name=cluster.name, reason=msg))
g.change_cluster_status(cluster, "Error")
try:
# trying to rollback
desc = description.format(reason=msg)
if _rollback_cluster(cluster, ex):
g.change_cluster_status(cluster, "Active", desc)
else:
g.change_cluster_status(cluster, "Error", desc)
except Exception as rex:
cluster = conductor.cluster_get(ctx, cluster_id)
# check if cluster still exists (it might have been
# removed during rollback)
if cluster is None:
LOG.info(_LI("Cluster id={id} was deleted. Canceling "
"current operation.").format(
id=cluster_id))
return
return wrapper
LOG.exception(
_LE("Error during rollback of cluster {name} (reason:"
" {reason})").format(name=cluster.name,
reason=six.text_type(rex)))
desc = "{0}, {1}".format(msg, six.text_type(rex))
g.change_cluster_status(
cluster, "Error", description.format(reason=desc))
return wrapper
return decorator
def _rollback_cluster(cluster, reason):
@ -223,7 +233,8 @@ def _update_sahara_info(ctx, cluster):
ctx, cluster, {'sahara_info': sahara_info})
@ops_error_handler
@ops_error_handler(
_("Creating cluster failed for the following reason(s): {reason}"))
def _provision_cluster(cluster_id):
ctx, cluster, plugin = _prepare_provisioning(cluster_id)
@ -256,7 +267,8 @@ def _provision_cluster(cluster_id):
job_manager.run_job(je.id)
@ops_error_handler
@ops_error_handler(
_("Scaling cluster failed for the following reason(s): {reason}"))
def _provision_scaled_cluster(cluster_id, node_group_id_map):
ctx, cluster, plugin = _prepare_provisioning(cluster_id)
@ -288,7 +300,8 @@ def _provision_scaled_cluster(cluster_id, node_group_id_map):
g.change_cluster_status(cluster, "Active")
@ops_error_handler
@ops_error_handler(
_("Terminating cluster failed for the following reason(s): {reason}"))
def terminate_cluster(cluster_id):
ctx = context.ctx()
cluster = conductor.cluster_get(ctx, cluster_id)

View File

@ -20,6 +20,12 @@ from sahara.service import ops
from sahara.tests.unit import base
class FakeCluster(object):
id = 'id'
status = "Some_status"
name = "Fake_cluster"
class FakeNodeGroup(object):
id = 'id'
count = 2
@ -66,9 +72,11 @@ class FakeINFRA(object):
TestOPS.SEQUENCE.append('rollback_cluster')
class TestOPS(base.SaharaTestCase):
class TestOPS(base.SaharaWithDbTestCase):
SEQUENCE = []
@mock.patch('sahara.utils.general.change_cluster_status_description',
return_value=FakeCluster())
@mock.patch('sahara.service.ops._update_sahara_info')
@mock.patch('sahara.service.ops._prepare_provisioning',
return_value=(mock.Mock(), mock.Mock(), FakePlugin()))
@ -80,7 +88,8 @@ class TestOPS(base.SaharaTestCase):
@mock.patch('sahara.service.edp.job_manager.run_job')
def test_provision_cluster(self, p_run_job, p_job_exec, p_create_trust,
p_conf, p_cluster_get, p_change_status,
p_prep_provisioning, p_update_sahara_info):
p_prep_provisioning, p_update_sahara_info,
p_change_cluster_status_desc):
del self.SEQUENCE[:]
ops.INFRA = FakeINFRA()
ops._provision_cluster('123')
@ -118,3 +127,51 @@ class TestOPS(base.SaharaTestCase):
self.assertEqual(['on_terminate_cluster', 'shutdown_cluster',
'cluster_destroy'], self.SEQUENCE,
'Order of calls is wrong')
@mock.patch('sahara.utils.general.change_cluster_status_description')
@mock.patch('sahara.service.ops._prepare_provisioning')
@mock.patch('sahara.utils.general.change_cluster_status')
@mock.patch('sahara.service.ops._rollback_cluster')
@mock.patch('sahara.conductor.API.cluster_get')
def test_ops_error_hadler_success_rollback(
self, p_cluster_get, p_rollback_cluster, p_change_cluster_status,
p__prepare_provisioning, p_change_cluster_status_desc):
# Test scenario: failed scaling -> success_rollback
fake_cluster = FakeCluster()
p_change_cluster_status_desc.return_value = FakeCluster()
p_rollback_cluster.return_value = True
p_cluster_get.return_value = fake_cluster
p__prepare_provisioning.side_effect = ValueError('error1')
expected = [
mock.call(fake_cluster, 'Active',
'Scaling cluster failed for the following '
'reason(s): error1')
]
ops._provision_scaled_cluster(fake_cluster.id, {'id': 1})
self.assertEqual(expected, p_change_cluster_status.call_args_list)
@mock.patch('sahara.utils.general.change_cluster_status_description')
@mock.patch('sahara.service.ops._prepare_provisioning')
@mock.patch('sahara.utils.general.change_cluster_status')
@mock.patch('sahara.service.ops._rollback_cluster')
@mock.patch('sahara.conductor.API.cluster_get')
def test_ops_error_hadler_failed_rollback(
self, p_cluster_get, p_rollback_cluster, p_change_cluster_status,
p__prepare_provisioning, p_change_cluster_status_desc):
# Test scenario: failed scaling -> failed_rollback
fake_cluster = FakeCluster()
p_change_cluster_status_desc.return_value = FakeCluster()
p__prepare_provisioning.side_effect = ValueError('error1')
p_rollback_cluster.side_effect = ValueError('error2')
p_cluster_get.return_value = fake_cluster
expected = [
mock.call(
fake_cluster, 'Error', 'Scaling cluster failed for the '
'following reason(s): error1, error2')
]
ops._provision_scaled_cluster(fake_cluster.id, {'id': 1})
self.assertEqual(expected, p_change_cluster_status.call_args_list)

View File

@ -75,6 +75,17 @@ def natural_sort_key(s):
for text in re.split(NATURAL_SORT_RE, s)]
def change_cluster_status_description(cluster, status_description):
ctx = context.ctx()
cluster = conductor.cluster_get(ctx, cluster) if cluster else None
if cluster is None or cluster.status == "Deleting":
return cluster
return conductor.cluster_update(
ctx, cluster, {'status_description': status_description})
def change_cluster_status(cluster, status, status_description=None):
ctx = context.ctx()