Make NBD retry logic more generic, add retry to loop.

Other device implementations require retry logic as well. This
change pushes the retry logic up one layer so that its easy to
implement for the other drivers that will benefit from it, and
then adds retry to the loop driver.

This change also adds some unit test coverage for the loop
driver.

Change-Id: Iab0d42d5075e9d50b4e7eb8c7fcef12cae281b40
This commit is contained in:
Michael Still 2012-12-15 18:32:20 +11:00
parent 30ddc85a10
commit a246c5576d
5 changed files with 154 additions and 18 deletions

View File

@ -0,0 +1,105 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2012 Michael Still
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import fixtures
from testtools.matchers import Equals
from testtools.matchers import MatchesListwise
from nova import test
from nova import utils
from nova.virt.disk.mount import loop
def _fake_noop(*args, **kwargs):
return
def _fake_trycmd_losetup_works(*args, **kwargs):
return '/dev/loop0', ''
def _fake_trycmd_losetup_fails(*args, **kwards):
return '', 'doh'
class LoopTestCase(test.TestCase):
def test_get_dev(self):
tempdir = self.useFixture(fixtures.TempDir()).path
l = loop.LoopMount(None, tempdir)
self.useFixture(fixtures.MonkeyPatch('nova.utils.trycmd',
_fake_trycmd_losetup_works))
self.useFixture(fixtures.MonkeyPatch('nova.utils.execute',
_fake_noop))
# No error logged, device consumed
self.assertTrue(l.get_dev())
self.assertTrue(l.linked)
self.assertEquals('', l.error)
self.assertEquals('/dev/loop0', l.device)
# Free
l.unget_dev()
self.assertFalse(l.linked)
self.assertEquals('', l.error)
self.assertEquals(None, l.device)
def test_inner_get_dev_fails(self):
tempdir = self.useFixture(fixtures.TempDir()).path
l = loop.LoopMount(None, tempdir)
self.useFixture(fixtures.MonkeyPatch('nova.utils.trycmd',
_fake_trycmd_losetup_fails))
# No error logged, device consumed
self.assertFalse(l._inner_get_dev())
self.assertFalse(l.linked)
self.assertNotEquals('', l.error)
self.assertEquals(None, l.device)
# Free
l.unget_dev()
self.assertFalse(l.linked)
self.assertEquals(None, l.device)
def test_get_dev_timeout(self):
tempdir = self.useFixture(fixtures.TempDir()).path
l = loop.LoopMount(None, tempdir)
self.useFixture(fixtures.MonkeyPatch('time.sleep', _fake_noop))
self.useFixture(fixtures.MonkeyPatch('nova.utils.trycmd',
_fake_trycmd_losetup_fails))
self.useFixture(fixtures.MonkeyPatch(('nova.virt.disk.mount.api.'
'MAX_DEVICE_WAIT'), -10))
# Always fail to get a device
def fake_get_dev_fails():
return False
l._inner_get_dev = fake_get_dev_fails
# Fail to get a device
self.assertFalse(l.get_dev())
def test_unget_dev(self):
tempdir = self.useFixture(fixtures.TempDir()).path
l = loop.LoopMount(None, tempdir)
self.useFixture(fixtures.MonkeyPatch('nova.utils.execute',
_fake_noop))
# This just checks that a free of something we don't have doesn't
# throw an exception
l.unget_dev()

View File

@ -268,8 +268,8 @@ class NbdTestCase(test.TestCase):
self.fake_exists_one))
self.useFixture(fixtures.MonkeyPatch('nova.utils.trycmd',
self.fake_trycmd_creates_pid))
self.useFixture(fixtures.MonkeyPatch(('nova.virt.disk.mount.nbd.'
'MAX_NBD_WAIT'), -10))
self.useFixture(fixtures.MonkeyPatch(('nova.virt.disk.mount.api.'
'MAX_DEVICE_WAIT'), -10))
# No error logged, device consumed
self.assertFalse(n.get_dev())

View File

@ -16,6 +16,7 @@
"""Support for mounting virtual image files"""
import os
import time
from nova.openstack.common import importutils
from nova.openstack.common import log as logging
@ -23,6 +24,8 @@ from nova import utils
LOG = logging.getLogger(__name__)
MAX_DEVICE_WAIT = 30
class Mount(object):
"""Standard mounting operations, that can be overridden by subclasses.
@ -102,6 +105,26 @@ class Mount(object):
self.linked = True
return True
def _get_dev_retry_helper(self):
"""Some implementations need to retry their get_dev."""
# NOTE(mikal): This method helps implement retries. The implementation
# simply calls _get_dev_retry_helper from their get_dev, and implements
# _inner_get_dev with their device acquistion logic. The NBD
# implementation has an example.
start_time = time.time()
device = self._inner_get_dev()
while not device:
LOG.info(_('Device allocation failed. Will retry in 2 seconds.'))
time.sleep(2)
if time.time() - start_time > MAX_DEVICE_WAIT:
LOG.warn(_('Device allocation failed after repeated retries.'))
return False
device = self._inner_get_dev()
return True
def _inner_get_dev(self):
raise NotImplementedError()
def unget_dev(self):
"""Release the block device from the file system namespace."""
self.linked = False

View File

@ -26,11 +26,14 @@ class LoopMount(api.Mount):
"""loop back support for raw images."""
mode = 'loop'
def get_dev(self):
def _inner_get_dev(self):
out, err = utils.trycmd('losetup', '--find', '--show', self.image,
run_as_root=True)
if err:
self.error = _('Could not attach image to loopback: %s') % err
LOG.info(_('Loop mount error: %s'), self.error)
self.linked = False
self.device = None
return False
self.device = out.strip()
@ -38,9 +41,22 @@ class LoopMount(api.Mount):
self.linked = True
return True
def get_dev(self):
# NOTE(mikal): the retry is required here in case we are low on loop
# devices. Note however that modern kernels will use more loop devices
# if they exist. If you're seeing lots of retries, consider adding
# more devices.
return self._get_dev_retry_helper()
def unget_dev(self):
if not self.linked:
return
# NOTE(mikal): On some kernels, losetup -d will intermittently fail,
# thus leaking a loop device unless the losetup --detach is retried:
# https://lkml.org/lkml/2012/9/28/62
LOG.debug(_("Release loop device %s"), self.device)
utils.execute('losetup', '--detach', self.device, run_as_root=True)
utils.execute('losetup', '--detach', self.device, run_as_root=True,
attempts=3)
self.linked = False
self.device = None

View File

@ -37,7 +37,6 @@ CONF = cfg.CONF
CONF.register_opts(nbd_opts)
NBD_DEVICE_RE = re.compile('nbd[0-9]+')
MAX_NBD_WAIT = 30
class NbdMount(api.Mount):
@ -89,6 +88,7 @@ class NbdMount(api.Mount):
run_as_root=True)
if err:
self.error = _('qemu-nbd error: %s') % err
LOG.info(_('NBD mount error: %s'), self.error)
return False
# NOTE(vish): this forks into another process, so give it a chance
@ -100,12 +100,15 @@ class NbdMount(api.Mount):
break
time.sleep(1)
else:
self.error = _('nbd device %s did not show up') % device
LOG.info(_('NBD mount error: %s'), self.error)
# Cleanup
_out, err = utils.trycmd('qemu-nbd', '-d', device,
run_as_root=True)
if err:
LOG.warn(_('Detaching from erroneous nbd device returned '
'error: %s'), err)
self.error = _('nbd device %s did not show up') % device
return False
self.error = ''
@ -114,18 +117,7 @@ class NbdMount(api.Mount):
def get_dev(self):
"""Retry requests for NBD devices."""
start_time = time.time()
device = self._inner_get_dev()
while not device:
LOG.info(_('nbd device allocation failed. Will retry in 2 '
'seconds.'))
time.sleep(2)
if time.time() - start_time > MAX_NBD_WAIT:
LOG.warn(_('nbd device allocation failed after repeated '
'retries.'))
return False
device = self._inner_get_dev()
return True
return self._get_dev_retry_helper()
def unget_dev(self):
if not self.linked: