Make status description field more useful
If during operating cluster some error occurs, status description field for cluster will be updated with exception message Change-Id: I8973a4ac0d1dbab9c58787d385c12451eb67b1ef Closes-bug: #1419136
This commit is contained in:
parent
e8c8880d35
commit
5b30113fca
@ -76,10 +76,10 @@ def scale_cluster(id, data):
|
||||
cluster = g.change_cluster_status(cluster, "Validating")
|
||||
quotas.check_scaling(cluster, to_be_enlarged, additional)
|
||||
plugin.validate_scaling(cluster, to_be_enlarged, additional)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
with excutils.save_and_reraise_exception():
|
||||
g.clean_cluster_from_empty_ng(cluster)
|
||||
g.change_cluster_status(cluster, "Active")
|
||||
g.change_cluster_status(cluster, "Active", six.text_type(e))
|
||||
|
||||
# If we are here validation is successful.
|
||||
# So let's update to_be_enlarged map:
|
||||
@ -109,7 +109,7 @@ def create_cluster(values):
|
||||
except Exception as e:
|
||||
with excutils.save_and_reraise_exception():
|
||||
g.change_cluster_status(cluster, "Error",
|
||||
status_description=six.text_type(e))
|
||||
six.text_type(e))
|
||||
|
||||
OPS.provision_cluster(cluster.id)
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import functools
|
||||
import six
|
||||
import uuid
|
||||
|
||||
from oslo_config import cfg
|
||||
@ -23,6 +24,7 @@ import oslo_messaging as messaging
|
||||
from sahara import conductor as c
|
||||
from sahara import context
|
||||
from sahara import exceptions
|
||||
from sahara.i18n import _
|
||||
from sahara.i18n import _LE
|
||||
from sahara.i18n import _LI
|
||||
from sahara.plugins import base as plugin_base
|
||||
@ -150,48 +152,56 @@ class OpsServer(rpc_utils.RPCServer):
|
||||
return INFRA.get_type_and_version()
|
||||
|
||||
|
||||
def ops_error_handler(f):
|
||||
@functools.wraps(f)
|
||||
def wrapper(cluster_id, *args, **kwds):
|
||||
try:
|
||||
f(cluster_id, *args, **kwds)
|
||||
except Exception as ex:
|
||||
# something happened during cluster operation
|
||||
def ops_error_handler(description):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
def wrapper(cluster_id, *args, **kwds):
|
||||
ctx = context.ctx()
|
||||
cluster = conductor.cluster_get(ctx, cluster_id)
|
||||
# check if cluster still exists (it might have been removed)
|
||||
if cluster is None or cluster.status == 'Deleting':
|
||||
LOG.info(_LI("Cluster %s was deleted or marked for "
|
||||
"deletion. Canceling current operation."),
|
||||
cluster_id)
|
||||
return
|
||||
|
||||
LOG.exception(
|
||||
_LE("Error during operating cluster '%(name)s' (reason: "
|
||||
"%(reason)s)"), {'name': cluster.name, 'reason': ex})
|
||||
|
||||
try:
|
||||
# trying to rollback
|
||||
if _rollback_cluster(cluster, ex):
|
||||
g.change_cluster_status(cluster, "Active")
|
||||
else:
|
||||
g.change_cluster_status(cluster, "Error")
|
||||
except Exception as rex:
|
||||
# Clearing status description before executing
|
||||
g.change_cluster_status_description(cluster_id, "")
|
||||
f(cluster_id, *args, **kwds)
|
||||
except Exception as ex:
|
||||
# something happened during cluster operation
|
||||
cluster = conductor.cluster_get(ctx, cluster_id)
|
||||
# check if cluster still exists (it might have been
|
||||
# removed during rollback)
|
||||
if cluster is None:
|
||||
LOG.info(_LI("Cluster with %s was deleted. Canceling "
|
||||
"current operation."), cluster_id)
|
||||
# check if cluster still exists (it might have been removed)
|
||||
if cluster is None or cluster.status == 'Deleting':
|
||||
LOG.info(_LI("Cluster id={id} was deleted or "
|
||||
"marked for deletion. Canceling "
|
||||
"current operation.").format(id=cluster_id))
|
||||
return
|
||||
|
||||
msg = six.text_type(ex)
|
||||
LOG.exception(
|
||||
_LE("Error during rollback of cluster '%(name)s' (reason: "
|
||||
"%(reason)s)"), {'name': cluster.name, 'reason': rex})
|
||||
_LE("Error during operating on cluster {name} (reason: "
|
||||
"{reason})").format(name=cluster.name, reason=msg))
|
||||
|
||||
g.change_cluster_status(cluster, "Error")
|
||||
try:
|
||||
# trying to rollback
|
||||
desc = description.format(reason=msg)
|
||||
if _rollback_cluster(cluster, ex):
|
||||
g.change_cluster_status(cluster, "Active", desc)
|
||||
else:
|
||||
g.change_cluster_status(cluster, "Error", desc)
|
||||
except Exception as rex:
|
||||
cluster = conductor.cluster_get(ctx, cluster_id)
|
||||
# check if cluster still exists (it might have been
|
||||
# removed during rollback)
|
||||
if cluster is None:
|
||||
LOG.info(_LI("Cluster id={id} was deleted. Canceling "
|
||||
"current operation.").format(
|
||||
id=cluster_id))
|
||||
return
|
||||
|
||||
return wrapper
|
||||
LOG.exception(
|
||||
_LE("Error during rollback of cluster {name} (reason:"
|
||||
" {reason})").format(name=cluster.name,
|
||||
reason=six.text_type(rex)))
|
||||
desc = "{0}, {1}".format(msg, six.text_type(rex))
|
||||
g.change_cluster_status(
|
||||
cluster, "Error", description.format(reason=desc))
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def _rollback_cluster(cluster, reason):
|
||||
@ -223,7 +233,8 @@ def _update_sahara_info(ctx, cluster):
|
||||
ctx, cluster, {'sahara_info': sahara_info})
|
||||
|
||||
|
||||
@ops_error_handler
|
||||
@ops_error_handler(
|
||||
_("Creating cluster failed for the following reason(s): {reason}"))
|
||||
def _provision_cluster(cluster_id):
|
||||
ctx, cluster, plugin = _prepare_provisioning(cluster_id)
|
||||
|
||||
@ -256,7 +267,8 @@ def _provision_cluster(cluster_id):
|
||||
job_manager.run_job(je.id)
|
||||
|
||||
|
||||
@ops_error_handler
|
||||
@ops_error_handler(
|
||||
_("Scaling cluster failed for the following reason(s): {reason}"))
|
||||
def _provision_scaled_cluster(cluster_id, node_group_id_map):
|
||||
ctx, cluster, plugin = _prepare_provisioning(cluster_id)
|
||||
|
||||
@ -288,7 +300,8 @@ def _provision_scaled_cluster(cluster_id, node_group_id_map):
|
||||
g.change_cluster_status(cluster, "Active")
|
||||
|
||||
|
||||
@ops_error_handler
|
||||
@ops_error_handler(
|
||||
_("Terminating cluster failed for the following reason(s): {reason}"))
|
||||
def terminate_cluster(cluster_id):
|
||||
ctx = context.ctx()
|
||||
cluster = conductor.cluster_get(ctx, cluster_id)
|
||||
|
@ -20,6 +20,12 @@ from sahara.service import ops
|
||||
from sahara.tests.unit import base
|
||||
|
||||
|
||||
class FakeCluster(object):
|
||||
id = 'id'
|
||||
status = "Some_status"
|
||||
name = "Fake_cluster"
|
||||
|
||||
|
||||
class FakeNodeGroup(object):
|
||||
id = 'id'
|
||||
count = 2
|
||||
@ -66,9 +72,11 @@ class FakeINFRA(object):
|
||||
TestOPS.SEQUENCE.append('rollback_cluster')
|
||||
|
||||
|
||||
class TestOPS(base.SaharaTestCase):
|
||||
class TestOPS(base.SaharaWithDbTestCase):
|
||||
SEQUENCE = []
|
||||
|
||||
@mock.patch('sahara.utils.general.change_cluster_status_description',
|
||||
return_value=FakeCluster())
|
||||
@mock.patch('sahara.service.ops._update_sahara_info')
|
||||
@mock.patch('sahara.service.ops._prepare_provisioning',
|
||||
return_value=(mock.Mock(), mock.Mock(), FakePlugin()))
|
||||
@ -80,7 +88,8 @@ class TestOPS(base.SaharaTestCase):
|
||||
@mock.patch('sahara.service.edp.job_manager.run_job')
|
||||
def test_provision_cluster(self, p_run_job, p_job_exec, p_create_trust,
|
||||
p_conf, p_cluster_get, p_change_status,
|
||||
p_prep_provisioning, p_update_sahara_info):
|
||||
p_prep_provisioning, p_update_sahara_info,
|
||||
p_change_cluster_status_desc):
|
||||
del self.SEQUENCE[:]
|
||||
ops.INFRA = FakeINFRA()
|
||||
ops._provision_cluster('123')
|
||||
@ -118,3 +127,51 @@ class TestOPS(base.SaharaTestCase):
|
||||
self.assertEqual(['on_terminate_cluster', 'shutdown_cluster',
|
||||
'cluster_destroy'], self.SEQUENCE,
|
||||
'Order of calls is wrong')
|
||||
|
||||
@mock.patch('sahara.utils.general.change_cluster_status_description')
|
||||
@mock.patch('sahara.service.ops._prepare_provisioning')
|
||||
@mock.patch('sahara.utils.general.change_cluster_status')
|
||||
@mock.patch('sahara.service.ops._rollback_cluster')
|
||||
@mock.patch('sahara.conductor.API.cluster_get')
|
||||
def test_ops_error_hadler_success_rollback(
|
||||
self, p_cluster_get, p_rollback_cluster, p_change_cluster_status,
|
||||
p__prepare_provisioning, p_change_cluster_status_desc):
|
||||
# Test scenario: failed scaling -> success_rollback
|
||||
fake_cluster = FakeCluster()
|
||||
p_change_cluster_status_desc.return_value = FakeCluster()
|
||||
p_rollback_cluster.return_value = True
|
||||
p_cluster_get.return_value = fake_cluster
|
||||
p__prepare_provisioning.side_effect = ValueError('error1')
|
||||
|
||||
expected = [
|
||||
mock.call(fake_cluster, 'Active',
|
||||
'Scaling cluster failed for the following '
|
||||
'reason(s): error1')
|
||||
]
|
||||
|
||||
ops._provision_scaled_cluster(fake_cluster.id, {'id': 1})
|
||||
self.assertEqual(expected, p_change_cluster_status.call_args_list)
|
||||
|
||||
@mock.patch('sahara.utils.general.change_cluster_status_description')
|
||||
@mock.patch('sahara.service.ops._prepare_provisioning')
|
||||
@mock.patch('sahara.utils.general.change_cluster_status')
|
||||
@mock.patch('sahara.service.ops._rollback_cluster')
|
||||
@mock.patch('sahara.conductor.API.cluster_get')
|
||||
def test_ops_error_hadler_failed_rollback(
|
||||
self, p_cluster_get, p_rollback_cluster, p_change_cluster_status,
|
||||
p__prepare_provisioning, p_change_cluster_status_desc):
|
||||
# Test scenario: failed scaling -> failed_rollback
|
||||
fake_cluster = FakeCluster()
|
||||
p_change_cluster_status_desc.return_value = FakeCluster()
|
||||
p__prepare_provisioning.side_effect = ValueError('error1')
|
||||
p_rollback_cluster.side_effect = ValueError('error2')
|
||||
p_cluster_get.return_value = fake_cluster
|
||||
|
||||
expected = [
|
||||
mock.call(
|
||||
fake_cluster, 'Error', 'Scaling cluster failed for the '
|
||||
'following reason(s): error1, error2')
|
||||
]
|
||||
|
||||
ops._provision_scaled_cluster(fake_cluster.id, {'id': 1})
|
||||
self.assertEqual(expected, p_change_cluster_status.call_args_list)
|
||||
|
@ -75,6 +75,17 @@ def natural_sort_key(s):
|
||||
for text in re.split(NATURAL_SORT_RE, s)]
|
||||
|
||||
|
||||
def change_cluster_status_description(cluster, status_description):
|
||||
ctx = context.ctx()
|
||||
|
||||
cluster = conductor.cluster_get(ctx, cluster) if cluster else None
|
||||
|
||||
if cluster is None or cluster.status == "Deleting":
|
||||
return cluster
|
||||
return conductor.cluster_update(
|
||||
ctx, cluster, {'status_description': status_description})
|
||||
|
||||
|
||||
def change_cluster_status(cluster, status, status_description=None):
|
||||
ctx = context.ctx()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user