Make status description field more useful

If during operating cluster some error occurs, status
description field for cluster will be updated with
exception message

Change-Id: I8973a4ac0d1dbab9c58787d385c12451eb67b1ef
Closes-bug: #1419136
This commit is contained in:
Vitaly Gridnev
2015-02-09 15:39:09 +03:00
parent e8c8880d35
commit 5b30113fca
4 changed files with 123 additions and 42 deletions

View File

@@ -14,6 +14,7 @@
# limitations under the License.
import functools
import six
import uuid
from oslo_config import cfg
@@ -23,6 +24,7 @@ import oslo_messaging as messaging
from sahara import conductor as c
from sahara import context
from sahara import exceptions
from sahara.i18n import _
from sahara.i18n import _LE
from sahara.i18n import _LI
from sahara.plugins import base as plugin_base
@@ -150,48 +152,56 @@ class OpsServer(rpc_utils.RPCServer):
return INFRA.get_type_and_version()
def ops_error_handler(f):
@functools.wraps(f)
def wrapper(cluster_id, *args, **kwds):
try:
f(cluster_id, *args, **kwds)
except Exception as ex:
# something happened during cluster operation
def ops_error_handler(description):
def decorator(f):
@functools.wraps(f)
def wrapper(cluster_id, *args, **kwds):
ctx = context.ctx()
cluster = conductor.cluster_get(ctx, cluster_id)
# check if cluster still exists (it might have been removed)
if cluster is None or cluster.status == 'Deleting':
LOG.info(_LI("Cluster %s was deleted or marked for "
"deletion. Canceling current operation."),
cluster_id)
return
LOG.exception(
_LE("Error during operating cluster '%(name)s' (reason: "
"%(reason)s)"), {'name': cluster.name, 'reason': ex})
try:
# trying to rollback
if _rollback_cluster(cluster, ex):
g.change_cluster_status(cluster, "Active")
else:
g.change_cluster_status(cluster, "Error")
except Exception as rex:
# Clearing status description before executing
g.change_cluster_status_description(cluster_id, "")
f(cluster_id, *args, **kwds)
except Exception as ex:
# something happened during cluster operation
cluster = conductor.cluster_get(ctx, cluster_id)
# check if cluster still exists (it might have been
# removed during rollback)
if cluster is None:
LOG.info(_LI("Cluster with %s was deleted. Canceling "
"current operation."), cluster_id)
# check if cluster still exists (it might have been removed)
if cluster is None or cluster.status == 'Deleting':
LOG.info(_LI("Cluster id={id} was deleted or "
"marked for deletion. Canceling "
"current operation.").format(id=cluster_id))
return
msg = six.text_type(ex)
LOG.exception(
_LE("Error during rollback of cluster '%(name)s' (reason: "
"%(reason)s)"), {'name': cluster.name, 'reason': rex})
_LE("Error during operating on cluster {name} (reason: "
"{reason})").format(name=cluster.name, reason=msg))
g.change_cluster_status(cluster, "Error")
try:
# trying to rollback
desc = description.format(reason=msg)
if _rollback_cluster(cluster, ex):
g.change_cluster_status(cluster, "Active", desc)
else:
g.change_cluster_status(cluster, "Error", desc)
except Exception as rex:
cluster = conductor.cluster_get(ctx, cluster_id)
# check if cluster still exists (it might have been
# removed during rollback)
if cluster is None:
LOG.info(_LI("Cluster id={id} was deleted. Canceling "
"current operation.").format(
id=cluster_id))
return
return wrapper
LOG.exception(
_LE("Error during rollback of cluster {name} (reason:"
" {reason})").format(name=cluster.name,
reason=six.text_type(rex)))
desc = "{0}, {1}".format(msg, six.text_type(rex))
g.change_cluster_status(
cluster, "Error", description.format(reason=desc))
return wrapper
return decorator
def _rollback_cluster(cluster, reason):
@@ -223,7 +233,8 @@ def _update_sahara_info(ctx, cluster):
ctx, cluster, {'sahara_info': sahara_info})
@ops_error_handler
@ops_error_handler(
_("Creating cluster failed for the following reason(s): {reason}"))
def _provision_cluster(cluster_id):
ctx, cluster, plugin = _prepare_provisioning(cluster_id)
@@ -256,7 +267,8 @@ def _provision_cluster(cluster_id):
job_manager.run_job(je.id)
@ops_error_handler
@ops_error_handler(
_("Scaling cluster failed for the following reason(s): {reason}"))
def _provision_scaled_cluster(cluster_id, node_group_id_map):
ctx, cluster, plugin = _prepare_provisioning(cluster_id)
@@ -288,7 +300,8 @@ def _provision_scaled_cluster(cluster_id, node_group_id_map):
g.change_cluster_status(cluster, "Active")
@ops_error_handler
@ops_error_handler(
_("Terminating cluster failed for the following reason(s): {reason}"))
def terminate_cluster(cluster_id):
ctx = context.ctx()
cluster = conductor.cluster_get(ctx, cluster_id)