Merge "Make libvirt able to trigger a backend image copy when needed"
This commit is contained in:
commit
4ae5b61c4f
|
@ -940,6 +940,50 @@ Create sparse logical volumes (with virtualsize) if this flag is set to True.
|
|||
cfg.StrOpt('images_rbd_ceph_conf',
|
||||
default='', # default determined by librados
|
||||
help='Path to the ceph configuration file to use'),
|
||||
cfg.StrOpt('images_rbd_glance_store_name',
|
||||
default='',
|
||||
help="""
|
||||
The name of the Glance store that represents the rbd cluster in use by
|
||||
this node. If set, this will allow Nova to request that Glance copy an
|
||||
image from an existing non-local store into the one named by this option
|
||||
before booting so that proper Copy-on-Write behavior is maintained.
|
||||
|
||||
Related options:
|
||||
|
||||
* images_type - must be set to ``rbd``
|
||||
* images_rbd_glance_copy_poll_interval - controls the status poll frequency
|
||||
* images_rbd_glance_copy_timeout - controls the overall copy timeout
|
||||
"""),
|
||||
cfg.IntOpt('images_rbd_glance_copy_poll_interval',
|
||||
default=15,
|
||||
help="""
|
||||
The interval in seconds with which to poll Glance after asking for it
|
||||
to copy an image to the local rbd store. This affects how often we ask
|
||||
Glance to report on copy completion, and thus should be short enough that
|
||||
we notice quickly, but not too aggressive that we generate undue load on
|
||||
the Glance server.
|
||||
|
||||
Related options:
|
||||
|
||||
* images_type - must be set to ``rbd``
|
||||
* images_rbd_glance_store_name - must be set to a store name
|
||||
"""),
|
||||
cfg.IntOpt('images_rbd_glance_copy_timeout',
|
||||
default=600,
|
||||
help="""
|
||||
The overall maximum time we will wait for Glance to complete an image
|
||||
copy to our local rbd store. This should be long enough to allow large
|
||||
images to be copied over the network link between our local store and the
|
||||
one where images typically reside. The downside of setting this too long
|
||||
is just to catch the case where the image copy is stalled or proceeding too
|
||||
slowly to be useful. Actual errors will be reported by Glance and noticed
|
||||
according to the poll interval.
|
||||
|
||||
Related options:
|
||||
* images_type - must be set to ``rbd``
|
||||
* images_rbd_glance_store_name - must be set to a store name
|
||||
* images_rbd_glance_copy_poll_interval - controls the failure time-to-notice
|
||||
"""),
|
||||
cfg.StrOpt('hw_disk_discard',
|
||||
choices=('ignore', 'unmap'),
|
||||
help="""
|
||||
|
|
|
@ -25,6 +25,7 @@ import fixtures
|
|||
import mock
|
||||
from oslo_concurrency import lockutils
|
||||
from oslo_config import fixture as config_fixture
|
||||
from oslo_service import loopingcall
|
||||
from oslo_utils import imageutils
|
||||
from oslo_utils import units
|
||||
from oslo_utils import uuidutils
|
||||
|
@ -1768,6 +1769,230 @@ class RbdTestCase(_ImageTestCase, test.NoDBTestCase):
|
|||
mock_destroy.assert_called_once_with(image.rbd_name,
|
||||
pool=image.driver.pool)
|
||||
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_copy_to_store(self, mock_imgapi):
|
||||
# Test copy_to_store() happy path where we ask for the image
|
||||
# to be copied, it goes into progress and then completes.
|
||||
self.flags(images_rbd_glance_copy_poll_interval=0,
|
||||
group='libvirt')
|
||||
self.flags(images_rbd_glance_store_name='store',
|
||||
group='libvirt')
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
mock_imgapi.get.side_effect = [
|
||||
# Simulate a race between starting the copy and the first poll
|
||||
{'stores': []},
|
||||
# Second poll shows it in progress
|
||||
{'os_glance_importing_to_stores': ['store'],
|
||||
'stores': []},
|
||||
# Third poll shows it has also been copied to a non-local store
|
||||
{'os_glance_importing_to_stores': ['store'],
|
||||
'stores': ['other']},
|
||||
# Should-be-last poll shows it complete
|
||||
{'os_glance_importing_to_stores': [],
|
||||
'stores': ['other', 'store']},
|
||||
]
|
||||
image.copy_to_store(self.CONTEXT, {'id': 'foo'})
|
||||
mock_imgapi.copy_image_to_store.assert_called_once_with(
|
||||
self.CONTEXT, 'foo', 'store')
|
||||
self.assertEqual(4, mock_imgapi.get.call_count)
|
||||
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_copy_to_store_race_with_existing(self, mock_imgapi):
|
||||
# Test copy_to_store() where we race to ask Glance to do the
|
||||
# copy with another node. One of us will get a BadRequest, which
|
||||
# should not cause us to fail. If our desired store is now
|
||||
# in progress, continue to wait like we would have if we had
|
||||
# won the race.
|
||||
self.flags(images_rbd_glance_copy_poll_interval=0,
|
||||
group='libvirt')
|
||||
self.flags(images_rbd_glance_store_name='store',
|
||||
group='libvirt')
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
|
||||
mock_imgapi.copy_image_to_store.side_effect = (
|
||||
exception.ImageBadRequest(image_id='foo',
|
||||
response='already in progress'))
|
||||
# Make the first poll indicate that the image has already
|
||||
# been copied
|
||||
mock_imgapi.get.return_value = {'stores': ['store', 'other']}
|
||||
|
||||
# Despite the (expected) exception from the copy, we should
|
||||
# not raise here if the subsequent poll works.
|
||||
image.copy_to_store(self.CONTEXT, {'id': 'foo'})
|
||||
|
||||
mock_imgapi.get.assert_called_once_with(self.CONTEXT,
|
||||
'foo',
|
||||
include_locations=True)
|
||||
mock_imgapi.copy_image_to_store.assert_called_once_with(
|
||||
self.CONTEXT, 'foo', 'store')
|
||||
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_copy_to_store_import_impossible(self, mock_imgapi):
|
||||
# Test copy_to_store() where Glance tells us that the image
|
||||
# is not copy-able for some reason (like it is not active yet
|
||||
# or some other workflow reason).
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
mock_imgapi.copy_image_to_store.side_effect = (
|
||||
exception.ImageImportImpossible(image_id='foo',
|
||||
reason='because tests'))
|
||||
self.assertRaises(exception.ImageUnacceptable,
|
||||
image.copy_to_store,
|
||||
self.CONTEXT, {'id': 'foo'})
|
||||
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_copy_to_store_import_failed_other_reason(self, mock_imgapi):
|
||||
# Test copy_to_store() where some unexpected failure gets raised.
|
||||
# We should bubble that up so it gets all the way back to the caller
|
||||
# of the clone() itself, which can handle it independent of one of
|
||||
# the image-specific exceptions.
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
mock_imgapi.copy_image_to_store.side_effect = test.TestingException
|
||||
# Make sure any other exception makes it through, as those are already
|
||||
# expected failures by the callers of the imagebackend code.
|
||||
self.assertRaises(test.TestingException,
|
||||
image.copy_to_store,
|
||||
self.CONTEXT, {'id': 'foo'})
|
||||
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_copy_to_store_import_failed_in_progress(self, mock_imgapi):
|
||||
# Test copy_to_store() in the situation where we ask for the copy,
|
||||
# things start to look good (in progress) and later get reported
|
||||
# as failed.
|
||||
self.flags(images_rbd_glance_copy_poll_interval=0,
|
||||
group='libvirt')
|
||||
self.flags(images_rbd_glance_store_name='store',
|
||||
group='libvirt')
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
mock_imgapi.get.side_effect = [
|
||||
# First poll shows it in progress
|
||||
{'os_glance_importing_to_stores': ['store'],
|
||||
'stores': []},
|
||||
# Second poll shows it failed
|
||||
{'os_glance_failed_import': ['store'],
|
||||
'stores': []},
|
||||
]
|
||||
exc = self.assertRaises(exception.ImageUnacceptable,
|
||||
image.copy_to_store,
|
||||
self.CONTEXT, {'id': 'foo'})
|
||||
self.assertIn('unsuccessful because', str(exc))
|
||||
|
||||
@mock.patch.object(loopingcall.FixedIntervalWithTimeoutLoopingCall,
|
||||
'start')
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_copy_to_store_import_failed_timeout(self, mock_imgapi,
|
||||
mock_timer_start):
|
||||
# Test copy_to_store() simulating the case where we timeout waiting
|
||||
# for Glance to do the copy.
|
||||
self.flags(images_rbd_glance_store_name='store',
|
||||
group='libvirt')
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
mock_timer_start.side_effect = loopingcall.LoopingCallTimeOut()
|
||||
exc = self.assertRaises(exception.ImageUnacceptable,
|
||||
image.copy_to_store,
|
||||
self.CONTEXT, {'id': 'foo'})
|
||||
self.assertIn('timed out', str(exc))
|
||||
mock_imgapi.copy_image_to_store.assert_called_once_with(
|
||||
self.CONTEXT, 'foo', 'store')
|
||||
|
||||
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_clone_copy_to_store(self, mock_imgapi, mock_driver_):
|
||||
# Call image.clone() in a way that will cause it to fall through
|
||||
# the locations check to the copy-to-store behavior, and assert
|
||||
# that after the copy, we recurse (without becoming infinite) and
|
||||
# do the check again.
|
||||
self.flags(images_rbd_glance_store_name='store', group='libvirt')
|
||||
fake_image = {
|
||||
'id': 'foo',
|
||||
'disk_format': 'raw',
|
||||
'locations': ['fake'],
|
||||
}
|
||||
mock_imgapi.get.return_value = fake_image
|
||||
mock_driver = mock_driver_.return_value
|
||||
mock_driver.is_cloneable.side_effect = [False, True]
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
with mock.patch.object(image, 'copy_to_store') as mock_copy:
|
||||
image.clone(self.CONTEXT, 'foo')
|
||||
mock_copy.assert_called_once_with(self.CONTEXT, fake_image)
|
||||
mock_driver.is_cloneable.assert_has_calls([
|
||||
# First call is the initial check
|
||||
mock.call('fake', fake_image),
|
||||
# Second call with the same location must be because we
|
||||
# recursed after the copy-to-store operation
|
||||
mock.call('fake', fake_image)])
|
||||
|
||||
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_clone_copy_to_store_failed(self, mock_imgapi, mock_driver_):
|
||||
# Call image.clone() in a way that will cause it to fall through
|
||||
# the locations check to the copy-to-store behavior, but simulate
|
||||
# some situation where we didn't actually copy the image and the
|
||||
# recursed check does not succeed. Assert that we do not copy again,
|
||||
# nor recurse again, and raise the expected error.
|
||||
self.flags(images_rbd_glance_store_name='store', group='libvirt')
|
||||
fake_image = {
|
||||
'id': 'foo',
|
||||
'disk_format': 'raw',
|
||||
'locations': ['fake'],
|
||||
}
|
||||
mock_imgapi.get.return_value = fake_image
|
||||
mock_driver = mock_driver_.return_value
|
||||
mock_driver.is_cloneable.side_effect = [False, False]
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
with mock.patch.object(image, 'copy_to_store') as mock_copy:
|
||||
self.assertRaises(exception.ImageUnacceptable,
|
||||
image.clone, self.CONTEXT, 'foo')
|
||||
mock_copy.assert_called_once_with(self.CONTEXT, fake_image)
|
||||
mock_driver.is_cloneable.assert_has_calls([
|
||||
# First call is the initial check
|
||||
mock.call('fake', fake_image),
|
||||
# Second call with the same location must be because we
|
||||
# recursed after the copy-to-store operation
|
||||
mock.call('fake', fake_image)])
|
||||
|
||||
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_clone_without_needed_copy(self, mock_imgapi, mock_driver_):
|
||||
# Call image.clone() in a way that will cause it to pass the locations
|
||||
# check the first time. Assert that we do not call copy-to-store
|
||||
# nor recurse.
|
||||
self.flags(images_rbd_glance_store_name='store', group='libvirt')
|
||||
fake_image = {
|
||||
'id': 'foo',
|
||||
'disk_format': 'raw',
|
||||
'locations': ['fake'],
|
||||
}
|
||||
mock_imgapi.get.return_value = fake_image
|
||||
mock_driver = mock_driver_.return_value
|
||||
mock_driver.is_cloneable.return_value = True
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
with mock.patch.object(image, 'copy_to_store') as mock_copy:
|
||||
image.clone(self.CONTEXT, 'foo')
|
||||
mock_copy.assert_not_called()
|
||||
mock_driver.is_cloneable.assert_called_once_with('fake', fake_image)
|
||||
|
||||
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
|
||||
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
|
||||
def test_clone_copy_not_configured(self, mock_imgapi, mock_driver_):
|
||||
# Call image.clone() in a way that will cause it to fail the locations
|
||||
# check the first time. Assert that if the store name is not configured
|
||||
# we do not try to copy-to-store and just raise the original exception
|
||||
# indicating that the image is not reachable.
|
||||
fake_image = {
|
||||
'id': 'foo',
|
||||
'disk_format': 'raw',
|
||||
'locations': ['fake'],
|
||||
}
|
||||
mock_imgapi.get.return_value = fake_image
|
||||
mock_driver = mock_driver_.return_value
|
||||
mock_driver.is_cloneable.return_value = False
|
||||
image = self.image_class(self.INSTANCE, self.NAME)
|
||||
with mock.patch.object(image, 'copy_to_store') as mock_copy:
|
||||
self.assertRaises(exception.ImageUnacceptable,
|
||||
image.clone, self.CONTEXT, 'foo')
|
||||
mock_copy.assert_not_called()
|
||||
mock_driver.is_cloneable.assert_called_once_with('fake', fake_image)
|
||||
|
||||
|
||||
class PloopTestCase(_ImageTestCase, test.NoDBTestCase):
|
||||
SIZE = 1024
|
||||
|
|
|
@ -25,6 +25,7 @@ from castellan import key_manager
|
|||
from oslo_concurrency import processutils
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
from oslo_service import loopingcall
|
||||
from oslo_utils import excutils
|
||||
from oslo_utils import fileutils
|
||||
from oslo_utils import strutils
|
||||
|
@ -958,7 +959,77 @@ class Rbd(Image):
|
|||
def is_shared_block_storage():
|
||||
return True
|
||||
|
||||
def clone(self, context, image_id_or_uri):
|
||||
def copy_to_store(self, context, image_meta):
|
||||
store_name = CONF.libvirt.images_rbd_glance_store_name
|
||||
image_id = image_meta['id']
|
||||
try:
|
||||
IMAGE_API.copy_image_to_store(context, image_id, store_name)
|
||||
except exception.ImageBadRequest:
|
||||
# NOTE(danms): This means that we raced with another node to start
|
||||
# the copy. Fall through to polling the image for completion
|
||||
pass
|
||||
except exception.ImageImportImpossible as exc:
|
||||
# NOTE(danms): This means we can not do this operation at all,
|
||||
# so fold this into the kind of imagebackend failure that is
|
||||
# expected by our callers
|
||||
raise exception.ImageUnacceptable(image_id=image_id,
|
||||
reason=str(exc))
|
||||
|
||||
def _wait_for_copy():
|
||||
image = IMAGE_API.get(context, image_id, include_locations=True)
|
||||
if store_name in image.get('os_glance_failed_import', []):
|
||||
# Our store is reported as failed
|
||||
raise loopingcall.LoopingCallDone('failed import')
|
||||
elif (store_name not in image.get('os_glance_importing_to_stores',
|
||||
[]) and
|
||||
store_name in image['stores']):
|
||||
# No longer importing and our store is listed in the stores
|
||||
raise loopingcall.LoopingCallDone()
|
||||
else:
|
||||
LOG.debug('Glance reports copy of image %(image)s to '
|
||||
'rbd store %(store)s is still in progress',
|
||||
{'image': image_id,
|
||||
'store': store_name})
|
||||
return True
|
||||
|
||||
LOG.info('Asking glance to copy image %(image)s to our '
|
||||
'rbd store %(store)s',
|
||||
{'image': image_id,
|
||||
'store': store_name})
|
||||
|
||||
timer = loopingcall.FixedIntervalWithTimeoutLoopingCall(_wait_for_copy)
|
||||
|
||||
# NOTE(danms): We *could* do something more complicated like try
|
||||
# to scale our polling interval based on image size. The problem with
|
||||
# that is that we do not get progress indication from Glance, so if
|
||||
# we scale our interval to something long, and happen to poll right
|
||||
# near the end of the copy, we will wait another long interval before
|
||||
# realizing that the copy is complete. A simple interval per compute
|
||||
# allows an operator to set this short on central/fast/inexpensive
|
||||
# computes, and longer on nodes that are remote/slow/expensive across
|
||||
# a slower link.
|
||||
interval = CONF.libvirt.images_rbd_glance_copy_poll_interval
|
||||
timeout = CONF.libvirt.images_rbd_glance_copy_timeout
|
||||
try:
|
||||
result = timer.start(interval=interval, timeout=timeout).wait()
|
||||
except loopingcall.LoopingCallTimeOut:
|
||||
raise exception.ImageUnacceptable(
|
||||
image_id=image_id,
|
||||
reason='Copy to store %(store)s timed out' % {
|
||||
'store': store_name})
|
||||
|
||||
if result is not True:
|
||||
raise exception.ImageUnacceptable(
|
||||
image_id=image_id,
|
||||
reason=('Copy to store %(store)s unsuccessful '
|
||||
'because: %(reason)s') % {'store': store_name,
|
||||
'reason': result})
|
||||
|
||||
LOG.info('Image %(image)s copied to rbd store %(store)s',
|
||||
{'image': image_id,
|
||||
'store': store_name})
|
||||
|
||||
def clone(self, context, image_id_or_uri, copy_to_store=True):
|
||||
image_meta = IMAGE_API.get(context, image_id_or_uri,
|
||||
include_locations=True)
|
||||
locations = image_meta['locations']
|
||||
|
@ -975,6 +1046,12 @@ class Rbd(Image):
|
|||
LOG.debug('Selected location: %(loc)s', {'loc': location})
|
||||
return self.driver.clone(location, self.rbd_name)
|
||||
|
||||
# Not clone-able in our ceph, so try to get glance to copy it for us
|
||||
# and then retry
|
||||
if CONF.libvirt.images_rbd_glance_store_name and copy_to_store:
|
||||
self.copy_to_store(context, image_meta)
|
||||
return self.clone(context, image_id_or_uri, copy_to_store=False)
|
||||
|
||||
reason = _('No image locations are accessible')
|
||||
raise exception.ImageUnacceptable(image_id=image_id_or_uri,
|
||||
reason=reason)
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
The libvirt RBD image backend module can now handle a Glance
|
||||
multistore environment where multiple RBD clusters are in use
|
||||
across a single Nova/Glance deployment, configured as independent
|
||||
Glance stores. In the case where an instance is booted with an
|
||||
image that does not exist in the RBD cluster that Nova is
|
||||
configured to use, Nova can ask Glance to copy the image from
|
||||
whatever store it is currently in to the one that represents its
|
||||
RBD cluster. To enable this feature, set
|
||||
``[libvirt]/images_rbd_glance_store_name`` to tell Nova the Glance
|
||||
store name of the RBD cluster it uses.
|
Loading…
Reference in New Issue