Merge "Added several checks on deleted cluster to prevent error logs"

2014-06-16 09:45:58 +00:00 · 2014-06-16 09:45:58 +00:00 · 9b7818a958
commit 9b7818a958
parent 0faaebc435 d429c22a85
4 changed files with 69 additions and 0 deletions
--- a/sahara/service/direct_engine.py
+++ b/sahara/service/direct_engine.py
@ -56,10 +56,18 @@ class DirectEngine(e.Engine):

            self._await_active(cluster, instances)

+            if not g.check_cluster_exists(cluster):
+                LOG.info(g.format_cluster_deleted_message(cluster))
+                return
+
            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

+            if not g.check_cluster_exists(cluster):
+                LOG.info(g.format_cluster_deleted_message(cluster))
+                return
+
            cluster = conductor.cluster_get(ctx, cluster)

            # attach volumes
@ -73,6 +81,10 @@ class DirectEngine(e.Engine):
            self._configure_instances(cluster)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
+                if not g.check_cluster_exists(cluster):
+                    LOG.info(g.format_cluster_deleted_message(cluster))
+                    return
+
                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

@ -98,10 +110,18 @@ class DirectEngine(e.Engine):

            self._await_active(cluster, instances)

+            if not g.check_cluster_exists(cluster):
+                LOG.info(g.format_cluster_deleted_message(cluster))
+                return []
+
            self._assign_floating_ips(instances)

            self._await_networks(cluster, instances)

+            if not g.check_cluster_exists(cluster):
+                LOG.info(g.format_cluster_deleted_message(cluster))
+                return []
+
            cluster = conductor.cluster_get(ctx, cluster)

            volumes.attach_to_instances(
@ -109,6 +129,10 @@ class DirectEngine(e.Engine):

        except Exception as ex:
            with excutils.save_and_reraise_exception():
+                if not g.check_cluster_exists(cluster):
+                    LOG.info(g.format_cluster_deleted_message(cluster))
+                    return []
+
                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

--- a/sahara/service/heat_engine.py
+++ b/sahara/service/heat_engine.py
@ -51,6 +51,9 @@ class HeatEngine(e.Engine):
            launcher.launch_instances(ctx, cluster, target_count)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
+                if not g.check_cluster_exists(cluster):
+                    LOG.info(g.format_cluster_deleted_message(cluster))
+                    return
                self._log_operation_exception(
                    "Can't start cluster '%s' (reason: %s)", cluster, ex)

@ -83,6 +86,9 @@ class HeatEngine(e.Engine):
            launcher.launch_instances(ctx, cluster, target_count)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
+                if not g.check_cluster_exists(cluster):
+                    LOG.info(g.format_cluster_deleted_message(cluster))
+                    return
                self._log_operation_exception(
                    "Can't scale cluster '%s' (reason: %s)", cluster, ex)

@ -92,6 +98,9 @@ class HeatEngine(e.Engine):
                    self._rollback_cluster_scaling(
                        ctx, cluster, rollback_count, target_count)
                except Exception:
+                    if not g.check_cluster_exists(cluster):
+                        LOG.info(g.format_cluster_deleted_message(cluster))
+                        return
                    # if something fails during the rollback, we stop
                    # doing anything further
                    cluster = conductor.cluster_update(ctx, cluster,
@ -196,6 +205,10 @@ class _CreateLauncher(HeatEngine):

        self._await_networks(cluster, instances)

+        if not g.check_cluster_exists(cluster):
+            LOG.info(g.format_cluster_deleted_message(cluster))
+            return
+
        # prepare all instances
        cluster = conductor.cluster_update(ctx, cluster,
                                           {"status": self.STAGES[2]})
--- a/sahara/service/ops.py
+++ b/sahara/service/ops.py
@ -129,30 +129,48 @@ def _provision_cluster(cluster_id):
    cluster = conductor.cluster_get(ctx, cluster_id)
    INFRA.create_cluster(cluster)

+    if not g.check_cluster_exists(cluster):
+        LOG.info(g.format_cluster_deleted_message(cluster))
+        return
+
    # configure cluster
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Configuring"})
    LOG.info(g.format_cluster_status(cluster))
    try:
        plugin.configure_cluster(cluster)
    except Exception as ex:
+        if not g.check_cluster_exists(cluster):
+            LOG.info(g.format_cluster_deleted_message(cluster))
+            return
        LOG.exception("Can't configure cluster '%s' (reason: %s)",
                      cluster.name, ex)
        cluster = conductor.cluster_update(ctx, cluster, {"status": "Error"})
        LOG.info(g.format_cluster_status(cluster))
        return

+    if not g.check_cluster_exists(cluster):
+        LOG.info(g.format_cluster_deleted_message(cluster))
+        return
+
    # starting prepared and configured cluster
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Starting"})
    LOG.info(g.format_cluster_status(cluster))
    try:
        plugin.start_cluster(cluster)
    except Exception as ex:
+        if not g.check_cluster_exists(cluster):
+            LOG.info(g.format_cluster_deleted_message(cluster))
+            return
        LOG.exception("Can't start services for cluster '%s' (reason: %s)",
                      cluster.name, ex)
        cluster = conductor.cluster_update(ctx, cluster, {"status": "Error"})
        LOG.info(g.format_cluster_status(cluster))
        return

+    if not g.check_cluster_exists(cluster):
+        LOG.info(g.format_cluster_deleted_message(cluster))
+        return
+
    # cluster is now up and ready
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))
@ -198,6 +216,9 @@ def _provision_scaled_cluster(cluster_id, node_group_id_map):
            instances = g.get_instances(cluster, instances)
            plugin.scale_cluster(cluster, instances)
        except Exception as ex:
+            if not g.check_cluster_exists(cluster):
+                LOG.info(g.format_cluster_deleted_message(cluster))
+                return
            LOG.exception("Can't scale cluster '%s' (reason: %s)",
                          cluster.name, ex)
            cluster = conductor.cluster_update(ctx, cluster,
@ -205,6 +226,10 @@ def _provision_scaled_cluster(cluster_id, node_group_id_map):
            LOG.info(g.format_cluster_status(cluster))
            return

+    if not g.check_cluster_exists(cluster):
+        LOG.info(g.format_cluster_deleted_message(cluster))
+        return
+
    cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"})
    LOG.info(g.format_cluster_status(cluster))

--- a/sahara/utils/general.py
+++ b/sahara/utils/general.py
@ -65,6 +65,13 @@ def format_cluster_status(cluster):
    return msg % ("Unknown", "Unknown")


+def format_cluster_deleted_message(cluster):
+    msg = "Cluster %s (id=%s) was deleted. Canceling current operation."
+    if cluster:
+        return msg % (cluster.name, cluster.id)
+    return msg % ("Unknown", "Unknown")
+
+
 def check_cluster_exists(cluster):
    ctx = context.ctx()
    # check if cluster still exists (it might have been removed)