Merge "long flashcopy operation may block volume service"

2013-10-10 15:57:44 +00:00 · 2013-10-10 15:57:44 +00:00 · 934c8f3fa3
parent 18218bd161 7aa4f65a8c
commit 934c8f3fa3
4 changed files with 95 additions and 48 deletions
--- a/cinder/tests/test_storwize_svc.py
+++ b/cinder/tests/test_storwize_svc.py
@ -39,6 +39,7 @@ from cinder.volume import configuration as conf
 from cinder.volume.drivers import storwize_svc
 from cinder.volume import volume_types

+from eventlet import greenthread

 LOG = logging.getLogger(__name__)

@ -1165,7 +1166,7 @@ port_speed!N/A
                                'no'])

        for d in to_delete:
-            del self._fcmappings_list[k]
+            del self._fcmappings_list[d]

        return self._print_info_cmd(rows=rows, **kwargs)

@ -1485,6 +1486,8 @@ class StorwizeSVCDriverTestCase(test.TestCase):
        self.driver.do_setup(None)
        self.driver.check_for_setup_error()
        self.stubs.Set(storwize_svc.time, 'sleep', lambda s: None)
+        self.stubs.Set(greenthread, 'sleep', lambda *x, **y: None)
+        self.stubs.Set(storwize_svc, 'CHECK_FCMAPPING_INTERVAL', 0)

    def _set_flag(self, flag, value):
        group = self.driver.configuration.config_group
--- a/cinder/volume/drivers/storwize_svc.py
+++ b/cinder/volume/drivers/storwize_svc.py
@ -51,6 +51,7 @@ from cinder import context
 from cinder import exception
 from cinder.openstack.common import excutils
 from cinder.openstack.common import log as logging
+from cinder.openstack.common import loopingcall
 from cinder.openstack.common import processutils
 from cinder.openstack.common import strutils
 from cinder import utils
@ -112,6 +113,8 @@ storwize_svc_opts = [
 CONF = cfg.CONF
 CONF.register_opts(storwize_svc_opts)

+CHECK_FCMAPPING_INTERVAL = 300
+

 class StorwizeSVCDriver(san.SanDriver):
    """IBM Storwize V7000 and SVC iSCSI/FC volume driver.
@ -1243,55 +1246,73 @@ class StorwizeSVCDriver(san.SanDriver):
            return True

    def _ensure_vdisk_no_fc_mappings(self, name, allow_snaps=True):
-        # Ensure vdisk has no FlashCopy mappings
-        mapping_ids = self._get_vdisk_fc_mappings(name)
-        while len(mapping_ids):
-            wait_for_copy = False
-            for map_id in mapping_ids:
-                attrs = self._get_flashcopy_mapping_attributes(map_id)
-                if not attrs:
-                    continue
-                source = attrs['source_vdisk_name']
-                target = attrs['target_vdisk_name']
-                copy_rate = attrs['copy_rate']
-                status = attrs['status']
+        """Ensure vdisk has no flashcopy mappings."""
+        timer = loopingcall.FixedIntervalLoopingCall(
+            self._check_vdisk_fc_mappings, name, allow_snaps)
+        # Create a timer greenthread. The default volume service heart
+        # beat is every 10 seconds. The flashcopy usually takes hours
+        # before it finishes. Don't set the sleep interval shorter
+        # than the heartbeat. Otherwise volume service heartbeat
+        # will not be serviced.
+        LOG.debug(_('Calling _ensure_vdisk_no_fc_mappings: vdisk %s')
+                  % name)
+        ret = timer.start(interval=CHECK_FCMAPPING_INTERVAL).wait()
+        timer.stop()
+        return ret

-                if copy_rate == '0':
-                    # Case #2: A vdisk that has snapshots
-                    if source == name:
-                        if not allow_snaps:
-                            return False
-                        ssh_cmd = ['svctask', 'chfcmap', '-copyrate', '50',
-                                   '-autodelete', 'on', map_id]
-                        out, err = self._run_ssh(ssh_cmd)
-                        wait_for_copy = True
-                    # Case #3: A snapshot
-                    else:
-                        msg = (_('Vdisk %(name)s not involved in '
-                                 'mapping %(src)s -> %(tgt)s') %
-                               {'name': name, 'src': source, 'tgt': target})
-                        self._driver_assert(target == name, msg)
-                        if status in ['copying', 'prepared']:
-                            self._run_ssh(['svctask', 'stopfcmap', map_id])
-                        elif status in ['stopping', 'preparing']:
-                            wait_for_copy = True
-                        else:
-                            self._run_ssh(['svctask', 'rmfcmap', '-force',
-                                           map_id])
-                # Case 4: Copy in progress - wait and will autodelete
+    def _check_vdisk_fc_mappings(self, name, allow_snaps=True):
+        """FlashCopy mapping check helper."""
+
+        LOG.debug(_('Loopcall: _check_vdisk_fc_mappings(), vdisk %s') % name)
+        mapping_ids = self._get_vdisk_fc_mappings(name)
+        wait_for_copy = False
+        for map_id in mapping_ids:
+            attrs = self._get_flashcopy_mapping_attributes(map_id)
+            if not attrs:
+                continue
+            source = attrs['source_vdisk_name']
+            target = attrs['target_vdisk_name']
+            copy_rate = attrs['copy_rate']
+            status = attrs['status']
+
+            if copy_rate == '0':
+                # Case #2: A vdisk that has snapshots. Return
+                #          False if snapshot is not allowed.
+                if source == name:
+                    if not allow_snaps:
+                        raise loopingcall.LoopingCallDone(retvalue=False)
+                    ssh_cmd = ['svctask', 'chfcmap', '-copyrate', '50',
+                               '-autodelete', 'on', map_id]
+                    out, err = self._run_ssh(ssh_cmd)
+                    wait_for_copy = True
+                # Case #3: A snapshot
                else:
-                    if status == 'prepared':
+                    msg = (_('Vdisk %(name)s not involved in '
+                             'mapping %(src)s -> %(tgt)s') %
+                           {'name': name, 'src': source, 'tgt': target})
+                    self._driver_assert(target == name, msg)
+                    if status in ['copying', 'prepared']:
                        self._run_ssh(['svctask', 'stopfcmap', map_id])
-                        self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
-                    elif status == 'idle_or_copied':
-                        # Prepare failed
-                        self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
-                    else:
+                        # Need to wait for the fcmap to change to
+                        # stopped state before remove fcmap
                        wait_for_copy = True
-            if wait_for_copy:
-                time.sleep(5)
-            mapping_ids = self._get_vdisk_fc_mappings(name)
-        return True
+                    elif status in ['stopping', 'preparing']:
+                        wait_for_copy = True
+                    else:
+                        self._run_ssh(['svctask', 'rmfcmap', '-force',
+                                       map_id])
+            # Case 4: Copy in progress - wait and will autodelete
+            else:
+                if status == 'prepared':
+                    self._run_ssh(['svctask', 'stopfcmap', map_id])
+                    self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
+                elif status == 'idle_or_copied':
+                    # Prepare failed
+                    self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
+                else:
+                    wait_for_copy = True
+        if not wait_for_copy or not len(mapping_ids):
+            raise loopingcall.LoopingCallDone(retvalue=True)

    def _delete_vdisk(self, name, force):
        """Deletes existing vdisks.
--- a/cinder/volume/manager.py
+++ b/cinder/volume/manager.py
@ -64,6 +64,8 @@ from cinder.volume import volume_types

 from cinder.taskflow import states

+from eventlet.greenpool import GreenPool
+
 LOG = logging.getLogger(__name__)

 QUOTAS = quota.QUOTAS
@ -76,6 +78,10 @@ volume_manager_opts = [
               default=300,
               help='Timeout for creating the volume to migrate to '
                    'when performing volume migration (seconds)'),
+    cfg.BoolOpt('volume_service_inithost_offload',
+                default=False,
+                help='Offload pending volume delete during '
+                     'volume service startup'),
 ]

 CONF = cfg.CONF
@ -144,6 +150,8 @@ class VolumeManager(manager.SchedulerDependentManager):
                                            *args, **kwargs)
        self.configuration = Configuration(volume_manager_opts,
                                           config_group=service_name)
+        self._tp = GreenPool()
+
        if not volume_driver:
            # Get from configuration, which will get the default
            # if its not using the multi backend
@ -165,6 +173,9 @@ class VolumeManager(manager.SchedulerDependentManager):
            configuration=self.configuration,
            db=self.db)

+    def _add_to_threadpool(self, func, *args, **kwargs):
+        self._tp.spawn_n(func, *args, **kwargs)
+
    def init_host(self):
        """Do any initialization that needs to be run if this is a
           standalone service.
@ -208,7 +219,15 @@ class VolumeManager(manager.SchedulerDependentManager):
        for volume in volumes:
            if volume['status'] == 'deleting':
                LOG.info(_('Resuming delete on volume: %s') % volume['id'])
-                self.delete_volume(ctxt, volume['id'])
+                if CONF.volume_service_inithost_offload:
+                    # Offload all the pending volume delete operations to the
+                    # threadpool to prevent the main volume service thread
+                    # from being blocked.
+                    self._add_to_threadpool(self.delete_volume(ctxt,
+                                                               volume['id']))
+                else:
+                    # By default, delete volumes sequentially
+                    self.delete_volume(ctxt, volume['id'])

        # collect and publish service capabilities
        self.publish_service_capabilities(ctxt)
--- a/etc/cinder/cinder.conf.sample
+++ b/etc/cinder/cinder.conf.sample
@ -1760,6 +1760,10 @@
 # performing volume migration (seconds) (integer value)
 #migration_create_volume_timeout_secs=300

+# Offload pending volume delete during volume service startup
+# (boolean value)
+#volume_service_inithost_offload=false
+

 #
 # Options defined in cinder.volume.utils
@ -1770,4 +1774,4 @@
 #volume_dd_blocksize=1M


-# Total option count: 381
+# Total option count: 382