From 3fb86557d790963bad086334422fe9ec11786077 Mon Sep 17 00:00:00 2001
From: wanghao <wanghao749@huawei.com>
Date: Tue, 26 May 2015 17:25:26 +0800
Subject: [PATCH] Fix weird change of volume status in re-scheduling

When create a volume failed as some exception, cinder will re-schedule the
creation request, the retry number is 3 in default.
But when re-scheduling, this volume's status was changed like this:

creating ----> error -----> creating------>error------>error/available

This is weird to user, since if this volume is in re-scheduling process,
it's status should always be creating, and then become error or available
at last.

Change-Id: I7a2d40585b5ef515a9400349a1397cba63278c78
Closes-Bug: #1445601
---
 cinder/tests/unit/test_volume.py             | 39 ++++++++++++++
 cinder/volume/flows/manager/create_volume.py | 54 +++++++++++++-------
 2 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/cinder/tests/unit/test_volume.py b/cinder/tests/unit/test_volume.py
index 9f90f9ccb8d..e61331fcad4 100644
--- a/cinder/tests/unit/test_volume.py
+++ b/cinder/tests/unit/test_volume.py
@@ -5040,6 +5040,45 @@ class VolumeTestCase(BaseVolumeTestCase):
         ret_flag = self.volume.driver.secure_file_operations_enabled()
         self.assertFalse(ret_flag)
 
+    @mock.patch('cinder.volume.flows.common.make_pretty_name')
+    @mock.patch('cinder.scheduler.rpcapi.SchedulerAPI.create_volume')
+    @mock.patch('cinder.volume.flows.manager.create_volume.'
+                'CreateVolumeFromSpecTask.execute')
+    def test_create_volume_raise_rescheduled_exception(self, mock_execute,
+                                                       mock_reschedule,
+                                                       mock_make_name):
+        # Create source volume
+        mock_execute.side_effect = exception.DriverNotInitialized()
+        mock_reschedule.return_value = None
+        test_vol = tests_utils.create_volume(self.context,
+                                             **self.volume_params)
+        test_vol_id = test_vol['id']
+        self.assertRaises(exception.DriverNotInitialized,
+                          self.volume.create_volume,
+                          self.context, test_vol_id,
+                          {'volume_properties': self.volume_params},
+                          {'retry': {'num_attempts': 1, 'host': []}})
+        self.assertTrue(mock_reschedule.called)
+        volume = db.volume_get(context.get_admin_context(), test_vol_id)
+        self.assertEqual('creating', volume['status'])
+
+    @mock.patch('cinder.volume.flows.manager.create_volume.'
+                'CreateVolumeFromSpecTask.execute')
+    def test_create_volume_raise_unrescheduled_exception(self, mock_execute):
+        # create source volume
+        test_vol = tests_utils.create_volume(self.context,
+                                             **self.volume_params)
+        test_vol_id = test_vol['id']
+        mock_execute.side_effect = exception.VolumeNotFound(
+            volume_id=test_vol_id)
+        self.assertRaises(exception.VolumeNotFound,
+                          self.volume.create_volume,
+                          self.context, test_vol_id,
+                          {'volume_properties': self.volume_params},
+                          {'retry': {'num_attempts': 1, 'host': []}})
+        volume = db.volume_get(context.get_admin_context(), test_vol_id)
+        self.assertEqual('error', volume['status'])
+
 
 class CopyVolumeToImageTestCase(BaseVolumeTestCase):
     def fake_local_path(self, volume):
diff --git a/cinder/volume/flows/manager/create_volume.py b/cinder/volume/flows/manager/create_volume.py
index 1d9f898bdf2..8e58e0e347f 100644
--- a/cinder/volume/flows/manager/create_volume.py
+++ b/cinder/volume/flows/manager/create_volume.py
@@ -54,11 +54,13 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
     this volume elsewhere.
     """
 
-    def __init__(self, reschedule_context, db, scheduler_rpcapi):
+    def __init__(self, reschedule_context, db, scheduler_rpcapi,
+                 do_reschedule):
         requires = ['filter_properties', 'image_id', 'request_spec',
                     'snapshot_id', 'volume_id', 'context']
         super(OnFailureRescheduleTask, self).__init__(addons=[ACTION],
                                                       requires=requires)
+        self.do_reschedule = do_reschedule
         self.scheduler_rpcapi = scheduler_rpcapi
         self.db = db
         self.reschedule_context = reschedule_context
@@ -125,15 +127,16 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
         """Actions that happen before the rescheduling attempt occur here."""
 
         try:
-            # Reset the volume state.
+            # Update volume's timestamp and host.
             #
             # NOTE(harlowja): this is awkward to be done here, shouldn't
             # this happen at the scheduler itself and not before it gets
             # sent to the scheduler? (since what happens if it never gets
             # there??). It's almost like we need a status of 'on-the-way-to
             # scheduler' in the future.
+            # We don't need to update the volume's status to creating, since
+            # we haven't changed it to error.
             update = {
-                'status': 'creating',
                 'scheduled_at': timeutils.utcnow(),
                 'host': None
             }
@@ -141,12 +144,20 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
                       {'update': update, 'volume_id': volume_id})
             self.db.volume_update(context, volume_id, update)
         except exception.CinderException:
-            # Don't let resetting the status cause the rescheduling to fail.
-            LOG.exception(_LE("Volume %s: resetting 'creating' "
-                              "status failed."),
+            # Don't let updating the state cause the rescheduling to fail.
+            LOG.exception(_LE("Volume %s: update volume state failed."),
                           volume_id)
 
     def revert(self, context, result, flow_failures, **kwargs):
+        volume_id = kwargs['volume_id']
+
+        # If do not want to be rescheduled, just set the volume's status to
+        # error and return.
+        if not self.do_reschedule:
+            common.error_out_volume(context, self.db, volume_id)
+            LOG.error(_LE("Volume %s: create failed"), volume_id)
+            return
+
         # NOTE(dulek): Revert is occurring and manager need to know if
         # rescheduling happened. We're injecting this information into
         # exception that will be caught there. This is ugly and we need
@@ -154,12 +165,14 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
         cause = list(flow_failures.values())[0]
         cause.exception.rescheduled = False
 
-        # Check if we have a cause which can tell us not to reschedule.
+        # Check if we have a cause which can tell us not to reschedule and
+        # set the volume's status to error.
         for failure in flow_failures.values():
             if failure.check(*self.no_reschedule_types):
+                common.error_out_volume(context, self.db, volume_id)
+                LOG.error(_LE("Volume %s: create failed"), volume_id)
                 return
 
-        volume_id = kwargs['volume_id']
         # Use a different context when rescheduling.
         if self.reschedule_context:
             context = self.reschedule_context
@@ -178,10 +191,11 @@ class ExtractVolumeRefTask(flow_utils.CinderTask):
 
     default_provides = 'volume_ref'
 
-    def __init__(self, db, host):
+    def __init__(self, db, host, set_error=True):
         super(ExtractVolumeRefTask, self).__init__(addons=[ACTION])
         self.db = db
         self.host = host
+        self.set_error = set_error
 
     def execute(self, context, volume_id):
         # NOTE(harlowja): this will fetch the volume from the database, if
@@ -195,9 +209,9 @@ class ExtractVolumeRefTask(flow_utils.CinderTask):
     def revert(self, context, volume_id, result, **kwargs):
         if isinstance(result, ft.Failure):
             return
-
-        common.error_out_volume(context, self.db, volume_id)
-        LOG.error(_LE("Volume %s: create failed"), volume_id)
+        if self.set_error:
+            common.error_out_volume(context, self.db, volume_id)
+            LOG.error(_LE("Volume %s: create failed"), volume_id)
 
 
 class ExtractVolumeSpecTask(flow_utils.CinderTask):
@@ -623,9 +637,6 @@ class CreateVolumeFromSpecTask(flow_utils.CinderTask):
             driver_name = self.driver.__class__.__name__
             LOG.exception(_LE("Unable to create volume. "
                               "Volume driver %s not initialized"), driver_name)
-            # NOTE(flaper87): Set the error status before
-            # raising any exception.
-            self.db.volume_update(context, volume_id, dict(status='error'))
             raise exception.DriverNotInitialized()
 
         create_type = volume_spec.pop('type', None)
@@ -755,12 +766,17 @@ def get_flow(context, db, driver, scheduler_rpcapi, host, volume_id,
         'cgsnapshot_id': cgsnapshot_id,
     }
 
-    volume_flow.add(ExtractVolumeRefTask(db, host))
+    volume_flow.add(ExtractVolumeRefTask(db, host, set_error=False))
 
     retry = filter_properties.get('retry', None)
-    if allow_reschedule and request_spec and retry:
-        volume_flow.add(OnFailureRescheduleTask(reschedule_context,
-                                                db, scheduler_rpcapi))
+
+    # Always add OnFailureRescheduleTask and we handle the change of volume's
+    # status when revert task flow. Meanwhile, no need to revert process of
+    # ExtractVolumeRefTask.
+    do_reschedule = allow_reschedule and request_spec and retry
+    volume_flow.add(OnFailureRescheduleTask(reschedule_context, db,
+                                            scheduler_rpcapi,
+                                            do_reschedule))
 
     LOG.debug("Volume reschedule parameters: %(allow)s "
               "retry: %(retry)s", {'allow': allow_reschedule, 'retry': retry})