Make status description field more useful

If during operating cluster some error occurs, status description field for cluster will be updated with exception message Change-Id: I8973a4ac0d1dbab9c58787d385c12451eb67b1ef Closes-bug: #1419136
2015-02-09 15:39:09 +03:00 · 2015-02-09 15:39:09 +03:00 · 5b30113fca
commit 5b30113fca
parent e8c8880d35
4 changed files with 123 additions and 42 deletions
--- a/sahara/service/api.py
+++ b/sahara/service/api.py
@ -76,10 +76,10 @@ def scale_cluster(id, data):
        cluster = g.change_cluster_status(cluster, "Validating")
        quotas.check_scaling(cluster, to_be_enlarged, additional)
        plugin.validate_scaling(cluster, to_be_enlarged, additional)
-    except Exception:
+    except Exception as e:
        with excutils.save_and_reraise_exception():
            g.clean_cluster_from_empty_ng(cluster)
-            g.change_cluster_status(cluster, "Active")
+            g.change_cluster_status(cluster, "Active", six.text_type(e))

    # If we are here validation is successful.
    # So let's update to_be_enlarged map:
@ -109,7 +109,7 @@ def create_cluster(values):
    except Exception as e:
        with excutils.save_and_reraise_exception():
            g.change_cluster_status(cluster, "Error",
-                                    status_description=six.text_type(e))
+                                    six.text_type(e))

    OPS.provision_cluster(cluster.id)

--- a/sahara/service/ops.py
+++ b/sahara/service/ops.py
@ -14,6 +14,7 @@
 # limitations under the License.

 import functools
+import six
 import uuid

 from oslo_config import cfg
@ -23,6 +24,7 @@ import oslo_messaging as messaging
 from sahara import conductor as c
 from sahara import context
 from sahara import exceptions
+from sahara.i18n import _
 from sahara.i18n import _LE
 from sahara.i18n import _LI
 from sahara.plugins import base as plugin_base
@ -150,48 +152,56 @@ class OpsServer(rpc_utils.RPCServer):
        return INFRA.get_type_and_version()


-def ops_error_handler(f):
-    @functools.wraps(f)
-    def wrapper(cluster_id, *args, **kwds):
-        try:
-            f(cluster_id, *args, **kwds)
-        except Exception as ex:
-            # something happened during cluster operation
+def ops_error_handler(description):
+    def decorator(f):
+        @functools.wraps(f)
+        def wrapper(cluster_id, *args, **kwds):
            ctx = context.ctx()
-            cluster = conductor.cluster_get(ctx, cluster_id)
-            # check if cluster still exists (it might have been removed)
-            if cluster is None or cluster.status == 'Deleting':
-                LOG.info(_LI("Cluster %s was deleted or marked for "
-                             "deletion. Canceling current operation."),
-                         cluster_id)
-                return
-
-            LOG.exception(
-                _LE("Error during operating cluster '%(name)s' (reason: "
-                    "%(reason)s)"), {'name': cluster.name, 'reason': ex})
-
            try:
-                # trying to rollback
-                if _rollback_cluster(cluster, ex):
-                    g.change_cluster_status(cluster, "Active")
-                else:
-                    g.change_cluster_status(cluster, "Error")
-            except Exception as rex:
+                # Clearing status description before executing
+                g.change_cluster_status_description(cluster_id, "")
+                f(cluster_id, *args, **kwds)
+            except Exception as ex:
+                # something happened during cluster operation
                cluster = conductor.cluster_get(ctx, cluster_id)
-                # check if cluster still exists (it might have been
-                # removed during rollback)
-                if cluster is None:
-                    LOG.info(_LI("Cluster with %s was deleted. Canceling "
-                                 "current operation."), cluster_id)
+                # check if cluster still exists (it might have been removed)
+                if cluster is None or cluster.status == 'Deleting':
+                    LOG.info(_LI("Cluster id={id} was deleted or "
+                                 "marked for deletion. Canceling "
+                                 "current operation.").format(id=cluster_id))
                    return

+                msg = six.text_type(ex)
                LOG.exception(
-                    _LE("Error during rollback of cluster '%(name)s' (reason: "
-                        "%(reason)s)"), {'name': cluster.name, 'reason': rex})
+                    _LE("Error during operating on cluster {name} (reason: "
+                        "{reason})").format(name=cluster.name, reason=msg))

-                g.change_cluster_status(cluster, "Error")
+                try:
+                    # trying to rollback
+                    desc = description.format(reason=msg)
+                    if _rollback_cluster(cluster, ex):
+                        g.change_cluster_status(cluster, "Active", desc)
+                    else:
+                        g.change_cluster_status(cluster, "Error", desc)
+                except Exception as rex:
+                    cluster = conductor.cluster_get(ctx, cluster_id)
+                    # check if cluster still exists (it might have been
+                    # removed during rollback)
+                    if cluster is None:
+                        LOG.info(_LI("Cluster id={id} was deleted. Canceling "
+                                     "current operation.").format(
+                            id=cluster_id))
+                        return

-    return wrapper
+                    LOG.exception(
+                        _LE("Error during rollback of cluster {name} (reason:"
+                            " {reason})").format(name=cluster.name,
+                                                 reason=six.text_type(rex)))
+                    desc = "{0}, {1}".format(msg, six.text_type(rex))
+                    g.change_cluster_status(
+                        cluster, "Error", description.format(reason=desc))
+        return wrapper
+    return decorator


 def _rollback_cluster(cluster, reason):
@ -223,7 +233,8 @@ def _update_sahara_info(ctx, cluster):
        ctx, cluster,  {'sahara_info': sahara_info})


-@ops_error_handler
+@ops_error_handler(
+    _("Creating cluster failed for the following reason(s): {reason}"))
 def _provision_cluster(cluster_id):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

@ -256,7 +267,8 @@ def _provision_cluster(cluster_id):
        job_manager.run_job(je.id)


-@ops_error_handler
+@ops_error_handler(
+    _("Scaling cluster failed for the following reason(s): {reason}"))
 def _provision_scaled_cluster(cluster_id, node_group_id_map):
    ctx, cluster, plugin = _prepare_provisioning(cluster_id)

@ -288,7 +300,8 @@ def _provision_scaled_cluster(cluster_id, node_group_id_map):
    g.change_cluster_status(cluster, "Active")


-@ops_error_handler
+@ops_error_handler(
+    _("Terminating cluster failed for the following reason(s): {reason}"))
 def terminate_cluster(cluster_id):
    ctx = context.ctx()
    cluster = conductor.cluster_get(ctx, cluster_id)
--- a/sahara/tests/unit/service/test_ops.py
+++ b/sahara/tests/unit/service/test_ops.py
@ -20,6 +20,12 @@ from sahara.service import ops
 from sahara.tests.unit import base


+class FakeCluster(object):
+    id = 'id'
+    status = "Some_status"
+    name = "Fake_cluster"
+
+
 class FakeNodeGroup(object):
    id = 'id'
    count = 2
@ -66,9 +72,11 @@ class FakeINFRA(object):
        TestOPS.SEQUENCE.append('rollback_cluster')


-class TestOPS(base.SaharaTestCase):
+class TestOPS(base.SaharaWithDbTestCase):
    SEQUENCE = []

+    @mock.patch('sahara.utils.general.change_cluster_status_description',
+                return_value=FakeCluster())
    @mock.patch('sahara.service.ops._update_sahara_info')
    @mock.patch('sahara.service.ops._prepare_provisioning',
                return_value=(mock.Mock(), mock.Mock(), FakePlugin()))
@ -80,7 +88,8 @@ class TestOPS(base.SaharaTestCase):
    @mock.patch('sahara.service.edp.job_manager.run_job')
    def test_provision_cluster(self, p_run_job, p_job_exec, p_create_trust,
                               p_conf, p_cluster_get, p_change_status,
-                               p_prep_provisioning, p_update_sahara_info):
+                               p_prep_provisioning, p_update_sahara_info,
+                               p_change_cluster_status_desc):
        del self.SEQUENCE[:]
        ops.INFRA = FakeINFRA()
        ops._provision_cluster('123')
@ -118,3 +127,51 @@ class TestOPS(base.SaharaTestCase):
        self.assertEqual(['on_terminate_cluster', 'shutdown_cluster',
                         'cluster_destroy'], self.SEQUENCE,
                         'Order of calls is wrong')
+
+    @mock.patch('sahara.utils.general.change_cluster_status_description')
+    @mock.patch('sahara.service.ops._prepare_provisioning')
+    @mock.patch('sahara.utils.general.change_cluster_status')
+    @mock.patch('sahara.service.ops._rollback_cluster')
+    @mock.patch('sahara.conductor.API.cluster_get')
+    def test_ops_error_hadler_success_rollback(
+            self, p_cluster_get, p_rollback_cluster, p_change_cluster_status,
+            p__prepare_provisioning, p_change_cluster_status_desc):
+        # Test scenario: failed scaling -> success_rollback
+        fake_cluster = FakeCluster()
+        p_change_cluster_status_desc.return_value = FakeCluster()
+        p_rollback_cluster.return_value = True
+        p_cluster_get.return_value = fake_cluster
+        p__prepare_provisioning.side_effect = ValueError('error1')
+
+        expected = [
+            mock.call(fake_cluster, 'Active',
+                      'Scaling cluster failed for the following '
+                      'reason(s): error1')
+        ]
+
+        ops._provision_scaled_cluster(fake_cluster.id, {'id': 1})
+        self.assertEqual(expected, p_change_cluster_status.call_args_list)
+
+    @mock.patch('sahara.utils.general.change_cluster_status_description')
+    @mock.patch('sahara.service.ops._prepare_provisioning')
+    @mock.patch('sahara.utils.general.change_cluster_status')
+    @mock.patch('sahara.service.ops._rollback_cluster')
+    @mock.patch('sahara.conductor.API.cluster_get')
+    def test_ops_error_hadler_failed_rollback(
+            self, p_cluster_get, p_rollback_cluster, p_change_cluster_status,
+            p__prepare_provisioning, p_change_cluster_status_desc):
+        # Test scenario: failed scaling -> failed_rollback
+        fake_cluster = FakeCluster()
+        p_change_cluster_status_desc.return_value = FakeCluster()
+        p__prepare_provisioning.side_effect = ValueError('error1')
+        p_rollback_cluster.side_effect = ValueError('error2')
+        p_cluster_get.return_value = fake_cluster
+
+        expected = [
+            mock.call(
+                fake_cluster, 'Error', 'Scaling cluster failed for the '
+                                       'following reason(s): error1, error2')
+        ]
+
+        ops._provision_scaled_cluster(fake_cluster.id, {'id': 1})
+        self.assertEqual(expected, p_change_cluster_status.call_args_list)
--- a/sahara/utils/general.py
+++ b/sahara/utils/general.py
@ -75,6 +75,17 @@ def natural_sort_key(s):
            for text in re.split(NATURAL_SORT_RE, s)]


+def change_cluster_status_description(cluster, status_description):
+    ctx = context.ctx()
+
+    cluster = conductor.cluster_get(ctx, cluster) if cluster else None
+
+    if cluster is None or cluster.status == "Deleting":
+        return cluster
+    return conductor.cluster_update(
+        ctx, cluster, {'status_description': status_description})
+
+
 def change_cluster_status(cluster, status, status_description=None):
    ctx = context.ctx()