Fix concurrency issues in container driver

Previously container driver failed in concurrent environments
deleting shares and share servers.
Fix it by adding shared external lock.

Change-Id: I543fd1226b3d99e06c736f4d925991d0bda7ea6d
Closes-Bug: 1613676
This commit is contained in:
Alexey Ovchinnikov 2016-09-07 15:56:18 +03:00 committed by Valeriy Ponomaryov
parent b9b3b96249
commit a915b75d00
8 changed files with 111 additions and 15 deletions

View File

@ -212,7 +212,8 @@ elif [[ "$DRIVER" == "dummy" ]]; then
iniset $TEMPEST_CONFIG share create_networks_when_multitenancy_enabled False iniset $TEMPEST_CONFIG share create_networks_when_multitenancy_enabled False
iniset $TEMPEST_CONFIG share multi_backend True iniset $TEMPEST_CONFIG share multi_backend True
elif [[ "$DRIVER" == "container" ]]; then elif [[ "$DRIVER" == "container" ]]; then
MANILA_TEMPEST_CONCURRENCY=1 MANILA_TESTS="(^manila_tempest_tests.tests.api)(?=.*\[.*\bbackend\b.*\])"
MANILA_TEMPEST_CONCURRENCY=8
RUN_MANILA_CG_TESTS=False RUN_MANILA_CG_TESTS=False
RUN_MANILA_MANAGE_TESTS=False RUN_MANILA_MANAGE_TESTS=False
iniset $TEMPEST_CONFIG share run_host_assisted_migration_tests False iniset $TEMPEST_CONFIG share run_host_assisted_migration_tests False

View File

@ -157,7 +157,7 @@ elif [[ "$DRIVER" == "zfsonlinux" ]]; then
save_configuration "MANILA_ZFSONLINUX_USE_SSH" "True" save_configuration "MANILA_ZFSONLINUX_USE_SSH" "True"
elif [[ "$DRIVER" == "container" ]]; then elif [[ "$DRIVER" == "container" ]]; then
save_configuration "SHARE_DRIVER" "manila.share.drivers.container.driver.ContainerShareDriver" save_configuration "SHARE_DRIVER" "manila.share.drivers.container.driver.ContainerShareDriver"
save_configuration "SHARE_BACKING_FILE_SIZE" "32000M" save_configuration "SHARE_BACKING_FILE_SIZE" "64000M"
save_configuration "MANILA_DEFAULT_SHARE_TYPE_EXTRA_SPECS" "'snapshot_support=false'" save_configuration "MANILA_DEFAULT_SHARE_TYPE_EXTRA_SPECS" "'snapshot_support=false'"
fi fi

View File

@ -132,6 +132,7 @@ class ContainerShareDriver(driver.ShareDriver, driver.ExecuteMixin):
location = self._get_helper(share).create_share(server_id) location = self._get_helper(share).create_share(server_id)
return location return location
@utils.synchronized('container_driver_delete_share_lock', external=True)
def delete_share(self, context, share, share_server=None): def delete_share(self, context, share, share_server=None):
LOG.debug("Deleting share %(share)s on server '%(server)s'." % LOG.debug("Deleting share %(share)s on server '%(server)s'." %
{"server": share_server["id"], {"server": share_server["id"],
@ -143,10 +144,21 @@ class ContainerShareDriver(driver.ShareDriver, driver.ExecuteMixin):
server_id, server_id,
["umount", "/shares/%s" % share.share_id] ["umount", "/shares/%s" % share.share_id]
) )
self.container.execute( # (aovchinnikov): bug 1621784 manifests itself here as well as in
server_id, # storage helper. There is a chance that we won't be able to remove
["rm", "-fR", "/shares/%s" % share.share_id] # this directory, despite the fact that it is not shared anymore and
) # already contains nothing. In such case the driver should not fail
# share deletion, but issue a warning.
try:
self.container.execute(
server_id,
["rm", "-fR", "/shares/%s" % share.share_id]
)
except exception.ProcessExecutionError as e:
LOG.warning(_LW("Failed to remove /shares/%(share)s directory in "
"container %(cont)s."), {"share": share.share_id,
"cont": server_id})
LOG.error(e)
self.storage.remove_storage(share) self.storage.remove_storage(share)
LOG.debug("Deletion of share %s is completed!", share.share_id) LOG.debug("Deletion of share %s is completed!", share.share_id)
@ -243,6 +255,7 @@ class ContainerShareDriver(driver.ShareDriver, driver.ExecuteMixin):
LOG.debug("Now container %(id)s should be accessible from network " LOG.debug("Now container %(id)s should be accessible from network "
"%(subnet)s by address %(ip)s." % msg_helper) "%(subnet)s by address %(ip)s." % msg_helper)
@utils.synchronized("container_driver_teardown_lock", external=True)
def _teardown_server(self, *args, **kwargs): def _teardown_server(self, *args, **kwargs):
server_id = self._get_container_name(kwargs["server_details"]["id"]) server_id = self._get_container_name(kwargs["server_details"]["id"])
self.container.stop_container(server_id) self.container.stop_container(server_id)
@ -259,9 +272,14 @@ class ContainerShareDriver(driver.ShareDriver, driver.ExecuteMixin):
continue continue
elif container_id[0] == server_id: elif container_id[0] == server_id:
LOG.debug("Deleting veth %s.", veth) LOG.debug("Deleting veth %s.", veth)
self._execute("ovs-vsctl", "--", "del-port", try:
self.configuration.container_ovs_bridge_name, self._execute("ovs-vsctl", "--", "del-port",
veth, run_as_root=True) self.configuration.container_ovs_bridge_name,
veth, run_as_root=True)
except exception.ProcessExecutionError as e:
LOG.warning(_LW("Failed to delete port %s: port "
"vanished."), veth)
LOG.error(e)
def _get_veth_state(self): def _get_veth_state(self):
result = self._execute("brctl", "show", result = self._execute("brctl", "show",

View File

@ -17,9 +17,10 @@ import os
import re import re
from oslo_config import cfg from oslo_config import cfg
from oslo_log import log
from manila import exception from manila import exception
from manila.i18n import _ from manila.i18n import _, _LW
from manila.share import driver from manila.share import driver
@ -34,6 +35,7 @@ lv_opts = [
] ]
CONF.register_opts(lv_opts) CONF.register_opts(lv_opts)
LOG = log.getLogger(__name__)
class LVMHelper(driver.ExecuteMixin): class LVMHelper(driver.ExecuteMixin):
@ -79,8 +81,21 @@ class LVMHelper(driver.ExecuteMixin):
run_as_root=True) run_as_root=True)
def remove_storage(self, share): def remove_storage(self, share):
self._execute("lvremove", "-f", "--autobackup", "n", to_remove = self._get_lv_device(share)
self._get_lv_device(share), run_as_root=True) try:
self._execute("umount", to_remove, run_as_root=True)
except exception.ProcessExecutionError as e:
LOG.warning(_LW("Failed to umount helper directory %s."),
to_remove)
LOG.error(e)
# (aovchinnikov): bug 1621784 manifests itself in jamming logical
# volumes, so try removing once and issue warning until it is fixed.
try:
self._execute("lvremove", "-f", "--autobackup", "n",
to_remove, run_as_root=True)
except exception.ProcessExecutionError as e:
LOG.warning(_LW("Failed to remove logical volume %s.") % to_remove)
LOG.error(e)
def extend_share(self, share, new_size, share_server=None): def extend_share(self, share, new_size, share_server=None):
lv_device = self._get_lv_device(share) lv_device = self._get_lv_device(share)

View File

@ -119,7 +119,7 @@ class ContainerShareDriverTestCase(test.TestCase):
self._driver.create_share(self._context, self.share, self._driver.create_share(self._context, self.share,
{'id': 'fake'})) {'id': 'fake'}))
def test_delete_share(self): def test_delete_share_ok(self):
helper = mock.Mock() helper = mock.Mock()
self.mock_object(self._driver, "_get_helper", self.mock_object(self._driver, "_get_helper",
mock.Mock(return_value=helper)) mock.Mock(return_value=helper))
@ -133,6 +133,19 @@ class ContainerShareDriverTestCase(test.TestCase):
['rm', '-fR', '/shares/fakeshareid'] ['rm', '-fR', '/shares/fakeshareid']
) )
def test_delete_share_rm_fails(self):
def fake_execute(*args):
if 'rm' in args[1]:
raise exception.ProcessExecutionError()
self.mock_object(driver.LOG, "warning")
self.mock_object(self._driver, "_get_helper")
self.mock_object(self._driver.container, "execute", fake_execute)
self.mock_object(self._driver.storage, 'remove_storage')
self._driver.delete_share(self._context, self.share, {'id': 'fake'})
self.assertTrue(driver.LOG.warning.called)
def test_extend_share(self): def test_extend_share(self):
share = cont_fakes.fake_share() share = cont_fakes.fake_share()
actual_arguments = [] actual_arguments = []
@ -193,7 +206,7 @@ class ContainerShareDriverTestCase(test.TestCase):
setattr(self._driver.configuration.local_conf, setattr(self._driver.configuration.local_conf,
'network_api_class', 'network_api_class',
'manila.share.drivers.container.driver.ContainerShareDriver') 'manila.share.drivers.container.driver.ContainerShareDriver')
driver.LOG.warning = mock.Mock() self.mock_object(driver.LOG, "warning")
self._driver.check_for_setup_error() self._driver.check_for_setup_error()
@ -239,6 +252,26 @@ class ContainerShareDriverTestCase(test.TestCase):
self.assertEqual(expected_arguments.sort(), actual_arguments.sort()) self.assertEqual(expected_arguments.sort(), actual_arguments.sort())
@ddt.data(['veth0000000'], ['veth0000000' * 2])
def test__teardown_server_veth_disappeared_mysteriously(self,
list_of_veths):
def fake_ovs_execute(*args, **kwargs):
if len(args) == 3:
return list_of_veths
if len(args) == 4:
return ('fake:manila_b5afb5c1_6011_43c4_8a37_29820e6951a7', '')
if 'del-port' in args:
raise exception.ProcessExecutionError()
else:
return 0
self.mock_object(driver.LOG, "warning")
self.mock_object(self._driver, "_execute", fake_ovs_execute)
self._driver._teardown_server(
server_details={"id": "b5afb5c1-6011-43c4-8a37-29820e6951a7"})
self.assertTrue(driver.LOG.warning.called)
@ddt.data(['veth0000000'], ['veth0000000' * 2]) @ddt.data(['veth0000000'], ['veth0000000' * 2])
def test__teardown_server_check_continuation(self, list_of_veths): def test__teardown_server_check_continuation(self, list_of_veths):
def fake_ovs_execute(*args, **kwargs): def fake_ovs_execute(*args, **kwargs):

View File

@ -216,7 +216,7 @@ class DockerCIFSHelperTestCase(test.TestCase):
self.DockerCIFSHelper._get_existing_users = mock.Mock() self.DockerCIFSHelper._get_existing_users = mock.Mock()
self.DockerCIFSHelper._get_existing_users.side_effect = TypeError self.DockerCIFSHelper._get_existing_users.side_effect = TypeError
self.DockerCIFSHelper._set_users = mock.Mock() self.DockerCIFSHelper._set_users = mock.Mock()
protocol_helper.LOG.warning = mock.Mock() self.mock_object(protocol_helper.LOG, "warning")
self.DockerCIFSHelper._deny_access("fake_share", "fake_server_id", self.DockerCIFSHelper._deny_access("fake_share", "fake_server_id",
"fake_user2", "rw") "fake_user2", "rw")

View File

@ -85,6 +85,7 @@ class LVMHelperTestCase(test.TestCase):
def test_remove_storage(self): def test_remove_storage(self):
actual_arguments = [] actual_arguments = []
expected_arguments = [ expected_arguments = [
('umount', '/dev/manila_docker_volumes/fakeshareid'),
('lvremove', '-f', '--autobackup', 'n', ('lvremove', '-f', '--autobackup', 'n',
'/dev/manila_docker_volumes/fakeshareid') '/dev/manila_docker_volumes/fakeshareid')
] ]
@ -96,6 +97,30 @@ class LVMHelperTestCase(test.TestCase):
self.assertEqual(expected_arguments, actual_arguments) self.assertEqual(expected_arguments, actual_arguments)
def test_remove_storage_umount_failed(self):
def fake_execute(*args, **kwargs):
if 'umount' in args:
raise exception.ProcessExecutionError()
self.mock_object(storage_helper.LOG, "warning")
self.mock_object(self.LVMHelper, "_execute", fake_execute)
self.LVMHelper.remove_storage(self.share)
self.assertTrue(storage_helper.LOG.warning.called)
def test_remove_storage_lvremove_failed(self):
def fake_execute(*args, **kwargs):
if 'lvremove' in args:
raise exception.ProcessExecutionError()
self.mock_object(storage_helper.LOG, "warning")
self.mock_object(self.LVMHelper, "_execute", fake_execute)
self.LVMHelper.remove_storage(self.share)
self.assertTrue(storage_helper.LOG.warning.called)
def test_extend_share(self): def test_extend_share(self):
actual_arguments = [] actual_arguments = []
expected_arguments = [ expected_arguments = [

View File

@ -0,0 +1,4 @@
---
fixes:
- Container driver. Fixed share and share server deletion concurrencies
by adding shared external lock.