Merge "long flashcopy operation may block volume service"

This commit is contained in:
Jenkins 2013-10-10 15:57:44 +00:00 committed by Gerrit Code Review
commit 934c8f3fa3
4 changed files with 95 additions and 48 deletions

View File

@ -39,6 +39,7 @@ from cinder.volume import configuration as conf
from cinder.volume.drivers import storwize_svc
from cinder.volume import volume_types
from eventlet import greenthread
LOG = logging.getLogger(__name__)
@ -1165,7 +1166,7 @@ port_speed!N/A
'no'])
for d in to_delete:
del self._fcmappings_list[k]
del self._fcmappings_list[d]
return self._print_info_cmd(rows=rows, **kwargs)
@ -1485,6 +1486,8 @@ class StorwizeSVCDriverTestCase(test.TestCase):
self.driver.do_setup(None)
self.driver.check_for_setup_error()
self.stubs.Set(storwize_svc.time, 'sleep', lambda s: None)
self.stubs.Set(greenthread, 'sleep', lambda *x, **y: None)
self.stubs.Set(storwize_svc, 'CHECK_FCMAPPING_INTERVAL', 0)
def _set_flag(self, flag, value):
group = self.driver.configuration.config_group

View File

@ -51,6 +51,7 @@ from cinder import context
from cinder import exception
from cinder.openstack.common import excutils
from cinder.openstack.common import log as logging
from cinder.openstack.common import loopingcall
from cinder.openstack.common import processutils
from cinder.openstack.common import strutils
from cinder import utils
@ -112,6 +113,8 @@ storwize_svc_opts = [
CONF = cfg.CONF
CONF.register_opts(storwize_svc_opts)
CHECK_FCMAPPING_INTERVAL = 300
class StorwizeSVCDriver(san.SanDriver):
"""IBM Storwize V7000 and SVC iSCSI/FC volume driver.
@ -1243,55 +1246,73 @@ class StorwizeSVCDriver(san.SanDriver):
return True
def _ensure_vdisk_no_fc_mappings(self, name, allow_snaps=True):
# Ensure vdisk has no FlashCopy mappings
mapping_ids = self._get_vdisk_fc_mappings(name)
while len(mapping_ids):
wait_for_copy = False
for map_id in mapping_ids:
attrs = self._get_flashcopy_mapping_attributes(map_id)
if not attrs:
continue
source = attrs['source_vdisk_name']
target = attrs['target_vdisk_name']
copy_rate = attrs['copy_rate']
status = attrs['status']
"""Ensure vdisk has no flashcopy mappings."""
timer = loopingcall.FixedIntervalLoopingCall(
self._check_vdisk_fc_mappings, name, allow_snaps)
# Create a timer greenthread. The default volume service heart
# beat is every 10 seconds. The flashcopy usually takes hours
# before it finishes. Don't set the sleep interval shorter
# than the heartbeat. Otherwise volume service heartbeat
# will not be serviced.
LOG.debug(_('Calling _ensure_vdisk_no_fc_mappings: vdisk %s')
% name)
ret = timer.start(interval=CHECK_FCMAPPING_INTERVAL).wait()
timer.stop()
return ret
if copy_rate == '0':
# Case #2: A vdisk that has snapshots
if source == name:
if not allow_snaps:
return False
ssh_cmd = ['svctask', 'chfcmap', '-copyrate', '50',
'-autodelete', 'on', map_id]
out, err = self._run_ssh(ssh_cmd)
wait_for_copy = True
# Case #3: A snapshot
else:
msg = (_('Vdisk %(name)s not involved in '
'mapping %(src)s -> %(tgt)s') %
{'name': name, 'src': source, 'tgt': target})
self._driver_assert(target == name, msg)
if status in ['copying', 'prepared']:
self._run_ssh(['svctask', 'stopfcmap', map_id])
elif status in ['stopping', 'preparing']:
wait_for_copy = True
else:
self._run_ssh(['svctask', 'rmfcmap', '-force',
map_id])
# Case 4: Copy in progress - wait and will autodelete
def _check_vdisk_fc_mappings(self, name, allow_snaps=True):
"""FlashCopy mapping check helper."""
LOG.debug(_('Loopcall: _check_vdisk_fc_mappings(), vdisk %s') % name)
mapping_ids = self._get_vdisk_fc_mappings(name)
wait_for_copy = False
for map_id in mapping_ids:
attrs = self._get_flashcopy_mapping_attributes(map_id)
if not attrs:
continue
source = attrs['source_vdisk_name']
target = attrs['target_vdisk_name']
copy_rate = attrs['copy_rate']
status = attrs['status']
if copy_rate == '0':
# Case #2: A vdisk that has snapshots. Return
# False if snapshot is not allowed.
if source == name:
if not allow_snaps:
raise loopingcall.LoopingCallDone(retvalue=False)
ssh_cmd = ['svctask', 'chfcmap', '-copyrate', '50',
'-autodelete', 'on', map_id]
out, err = self._run_ssh(ssh_cmd)
wait_for_copy = True
# Case #3: A snapshot
else:
if status == 'prepared':
msg = (_('Vdisk %(name)s not involved in '
'mapping %(src)s -> %(tgt)s') %
{'name': name, 'src': source, 'tgt': target})
self._driver_assert(target == name, msg)
if status in ['copying', 'prepared']:
self._run_ssh(['svctask', 'stopfcmap', map_id])
self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
elif status == 'idle_or_copied':
# Prepare failed
self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
else:
# Need to wait for the fcmap to change to
# stopped state before remove fcmap
wait_for_copy = True
if wait_for_copy:
time.sleep(5)
mapping_ids = self._get_vdisk_fc_mappings(name)
return True
elif status in ['stopping', 'preparing']:
wait_for_copy = True
else:
self._run_ssh(['svctask', 'rmfcmap', '-force',
map_id])
# Case 4: Copy in progress - wait and will autodelete
else:
if status == 'prepared':
self._run_ssh(['svctask', 'stopfcmap', map_id])
self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
elif status == 'idle_or_copied':
# Prepare failed
self._run_ssh(['svctask', 'rmfcmap', '-force', map_id])
else:
wait_for_copy = True
if not wait_for_copy or not len(mapping_ids):
raise loopingcall.LoopingCallDone(retvalue=True)
def _delete_vdisk(self, name, force):
"""Deletes existing vdisks.

View File

@ -64,6 +64,8 @@ from cinder.volume import volume_types
from cinder.taskflow import states
from eventlet.greenpool import GreenPool
LOG = logging.getLogger(__name__)
QUOTAS = quota.QUOTAS
@ -76,6 +78,10 @@ volume_manager_opts = [
default=300,
help='Timeout for creating the volume to migrate to '
'when performing volume migration (seconds)'),
cfg.BoolOpt('volume_service_inithost_offload',
default=False,
help='Offload pending volume delete during '
'volume service startup'),
]
CONF = cfg.CONF
@ -144,6 +150,8 @@ class VolumeManager(manager.SchedulerDependentManager):
*args, **kwargs)
self.configuration = Configuration(volume_manager_opts,
config_group=service_name)
self._tp = GreenPool()
if not volume_driver:
# Get from configuration, which will get the default
# if its not using the multi backend
@ -165,6 +173,9 @@ class VolumeManager(manager.SchedulerDependentManager):
configuration=self.configuration,
db=self.db)
def _add_to_threadpool(self, func, *args, **kwargs):
self._tp.spawn_n(func, *args, **kwargs)
def init_host(self):
"""Do any initialization that needs to be run if this is a
standalone service.
@ -208,7 +219,15 @@ class VolumeManager(manager.SchedulerDependentManager):
for volume in volumes:
if volume['status'] == 'deleting':
LOG.info(_('Resuming delete on volume: %s') % volume['id'])
self.delete_volume(ctxt, volume['id'])
if CONF.volume_service_inithost_offload:
# Offload all the pending volume delete operations to the
# threadpool to prevent the main volume service thread
# from being blocked.
self._add_to_threadpool(self.delete_volume(ctxt,
volume['id']))
else:
# By default, delete volumes sequentially
self.delete_volume(ctxt, volume['id'])
# collect and publish service capabilities
self.publish_service_capabilities(ctxt)

View File

@ -1760,6 +1760,10 @@
# performing volume migration (seconds) (integer value)
#migration_create_volume_timeout_secs=300
# Offload pending volume delete during volume service startup
# (boolean value)
#volume_service_inithost_offload=false
#
# Options defined in cinder.volume.utils
@ -1770,4 +1774,4 @@
#volume_dd_blocksize=1M
# Total option count: 381
# Total option count: 382