Merge "Fix weird change of volume status in re-scheduling"

2015-06-26 04:49:00 +00:00 · 2015-06-26 04:49:00 +00:00 · bff548c690
parent 42b2ecfd1c 3fb86557d7
commit bff548c690
2 changed files with 74 additions and 19 deletions
--- a/cinder/tests/unit/test_volume.py
+++ b/cinder/tests/unit/test_volume.py
@ -5040,6 +5040,45 @@ class VolumeTestCase(BaseVolumeTestCase):
        ret_flag = self.volume.driver.secure_file_operations_enabled()
        self.assertFalse(ret_flag)

+    @mock.patch('cinder.volume.flows.common.make_pretty_name')
+    @mock.patch('cinder.scheduler.rpcapi.SchedulerAPI.create_volume')
+    @mock.patch('cinder.volume.flows.manager.create_volume.'
+                'CreateVolumeFromSpecTask.execute')
+    def test_create_volume_raise_rescheduled_exception(self, mock_execute,
+                                                       mock_reschedule,
+                                                       mock_make_name):
+        # Create source volume
+        mock_execute.side_effect = exception.DriverNotInitialized()
+        mock_reschedule.return_value = None
+        test_vol = tests_utils.create_volume(self.context,
+                                             **self.volume_params)
+        test_vol_id = test_vol['id']
+        self.assertRaises(exception.DriverNotInitialized,
+                          self.volume.create_volume,
+                          self.context, test_vol_id,
+                          {'volume_properties': self.volume_params},
+                          {'retry': {'num_attempts': 1, 'host': []}})
+        self.assertTrue(mock_reschedule.called)
+        volume = db.volume_get(context.get_admin_context(), test_vol_id)
+        self.assertEqual('creating', volume['status'])
+
+    @mock.patch('cinder.volume.flows.manager.create_volume.'
+                'CreateVolumeFromSpecTask.execute')
+    def test_create_volume_raise_unrescheduled_exception(self, mock_execute):
+        # create source volume
+        test_vol = tests_utils.create_volume(self.context,
+                                             **self.volume_params)
+        test_vol_id = test_vol['id']
+        mock_execute.side_effect = exception.VolumeNotFound(
+            volume_id=test_vol_id)
+        self.assertRaises(exception.VolumeNotFound,
+                          self.volume.create_volume,
+                          self.context, test_vol_id,
+                          {'volume_properties': self.volume_params},
+                          {'retry': {'num_attempts': 1, 'host': []}})
+        volume = db.volume_get(context.get_admin_context(), test_vol_id)
+        self.assertEqual('error', volume['status'])
+

 class CopyVolumeToImageTestCase(BaseVolumeTestCase):
    def fake_local_path(self, volume):
--- a/cinder/volume/flows/manager/create_volume.py
+++ b/cinder/volume/flows/manager/create_volume.py
@ -54,11 +54,13 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
    this volume elsewhere.
    """

-    def __init__(self, reschedule_context, db, scheduler_rpcapi):
+    def __init__(self, reschedule_context, db, scheduler_rpcapi,
+                 do_reschedule):
        requires = ['filter_properties', 'image_id', 'request_spec',
                    'snapshot_id', 'volume_id', 'context']
        super(OnFailureRescheduleTask, self).__init__(addons=[ACTION],
                                                      requires=requires)
+        self.do_reschedule = do_reschedule
        self.scheduler_rpcapi = scheduler_rpcapi
        self.db = db
        self.reschedule_context = reschedule_context
@ -125,15 +127,16 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
        """Actions that happen before the rescheduling attempt occur here."""

        try:
-            # Reset the volume state.
+            # Update volume's timestamp and host.
            #
            # NOTE(harlowja): this is awkward to be done here, shouldn't
            # this happen at the scheduler itself and not before it gets
            # sent to the scheduler? (since what happens if it never gets
            # there??). It's almost like we need a status of 'on-the-way-to
            # scheduler' in the future.
+            # We don't need to update the volume's status to creating, since
+            # we haven't changed it to error.
            update = {
-                'status': 'creating',
                'scheduled_at': timeutils.utcnow(),
                'host': None
            }
@ -141,12 +144,20 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
                      {'update': update, 'volume_id': volume_id})
            self.db.volume_update(context, volume_id, update)
        except exception.CinderException:
-            # Don't let resetting the status cause the rescheduling to fail.
-            LOG.exception(_LE("Volume %s: resetting 'creating' "
-                              "status failed."),
+            # Don't let updating the state cause the rescheduling to fail.
+            LOG.exception(_LE("Volume %s: update volume state failed."),
                          volume_id)

    def revert(self, context, result, flow_failures, **kwargs):
+        volume_id = kwargs['volume_id']
+
+        # If do not want to be rescheduled, just set the volume's status to
+        # error and return.
+        if not self.do_reschedule:
+            common.error_out_volume(context, self.db, volume_id)
+            LOG.error(_LE("Volume %s: create failed"), volume_id)
+            return
+
        # NOTE(dulek): Revert is occurring and manager need to know if
        # rescheduling happened. We're injecting this information into
        # exception that will be caught there. This is ugly and we need
@ -154,12 +165,14 @@ class OnFailureRescheduleTask(flow_utils.CinderTask):
        cause = list(flow_failures.values())[0]
        cause.exception.rescheduled = False

-        # Check if we have a cause which can tell us not to reschedule.
+        # Check if we have a cause which can tell us not to reschedule and
+        # set the volume's status to error.
        for failure in flow_failures.values():
            if failure.check(*self.no_reschedule_types):
+                common.error_out_volume(context, self.db, volume_id)
+                LOG.error(_LE("Volume %s: create failed"), volume_id)
                return

-        volume_id = kwargs['volume_id']
        # Use a different context when rescheduling.
        if self.reschedule_context:
            context = self.reschedule_context
@ -178,10 +191,11 @@ class ExtractVolumeRefTask(flow_utils.CinderTask):

    default_provides = 'volume_ref'

-    def __init__(self, db, host):
+    def __init__(self, db, host, set_error=True):
        super(ExtractVolumeRefTask, self).__init__(addons=[ACTION])
        self.db = db
        self.host = host
+        self.set_error = set_error

    def execute(self, context, volume_id):
        # NOTE(harlowja): this will fetch the volume from the database, if
@ -195,9 +209,9 @@ class ExtractVolumeRefTask(flow_utils.CinderTask):
    def revert(self, context, volume_id, result, **kwargs):
        if isinstance(result, ft.Failure):
            return
-
-        common.error_out_volume(context, self.db, volume_id)
-        LOG.error(_LE("Volume %s: create failed"), volume_id)
+        if self.set_error:
+            common.error_out_volume(context, self.db, volume_id)
+            LOG.error(_LE("Volume %s: create failed"), volume_id)


 class ExtractVolumeSpecTask(flow_utils.CinderTask):
@ -623,9 +637,6 @@ class CreateVolumeFromSpecTask(flow_utils.CinderTask):
            driver_name = self.driver.__class__.__name__
            LOG.exception(_LE("Unable to create volume. "
                              "Volume driver %s not initialized"), driver_name)
-            # NOTE(flaper87): Set the error status before
-            # raising any exception.
-            self.db.volume_update(context, volume_id, dict(status='error'))
            raise exception.DriverNotInitialized()

        create_type = volume_spec.pop('type', None)
@ -755,12 +766,17 @@ def get_flow(context, db, driver, scheduler_rpcapi, host, volume_id,
        'cgsnapshot_id': cgsnapshot_id,
    }

-    volume_flow.add(ExtractVolumeRefTask(db, host))
+    volume_flow.add(ExtractVolumeRefTask(db, host, set_error=False))

    retry = filter_properties.get('retry', None)
-    if allow_reschedule and request_spec and retry:
-        volume_flow.add(OnFailureRescheduleTask(reschedule_context,
-                                                db, scheduler_rpcapi))
+
+    # Always add OnFailureRescheduleTask and we handle the change of volume's
+    # status when revert task flow. Meanwhile, no need to revert process of
+    # ExtractVolumeRefTask.
+    do_reschedule = allow_reschedule and request_spec and retry
+    volume_flow.add(OnFailureRescheduleTask(reschedule_context, db,
+                                            scheduler_rpcapi,
+                                            do_reschedule))

    LOG.debug("Volume reschedule parameters: %(allow)s "
              "retry: %(retry)s", {'allow': allow_reschedule, 'retry': retry})