Check quota before creating volume snapshots

When creating a snapshot of a volume-backed instance, we
create a snapshot of every volume BDM associated with the
instance. The default volume snapshot limit is 10, so if
you have a volume-backed instance with several volumes attached
and snapshot it a few times, you're likely to fail the
volume snapshot at some point with an OverLimit error from
Cinder. This can lead to orphaned volume snapshots in Cinder
that the user then has to cleanup.

This change makes the snapshot operation a bit more robust by
first checking the quota limit and current usage for the given
project before attempting to create any volume snapshots.

It's not fail-safe since we could still fail with racing snapshot
requests for the same project, but it's a simple improvement to
avoid this issue in the general case.

Change-Id: I4e7b46deb43c0c2430b480f1a498a52fc4a9daf0
Related-Bug: #1731986
This commit is contained in:
Matt Riedemann 2017-11-15 12:45:08 -05:00
parent ad389244ba
commit 289d2703c7
3 changed files with 92 additions and 30 deletions

View File

@ -2812,6 +2812,40 @@ class API(base.Base):
if instance.root_device_name:
properties['root_device_name'] = instance.root_device_name
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
mapping = [] # list of BDM dicts that can go into the image properties
# Do some up-front filtering of the list of BDMs from
# which we are going to create snapshots.
volume_bdms = []
for bdm in bdms:
if bdm.no_device:
continue
if bdm.is_volume:
# These will be handled below.
volume_bdms.append(bdm)
else:
mapping.append(bdm.get_image_mapping())
# Check limits in Cinder before creating snapshots to avoid going over
# quota in the middle of a list of volumes. This is a best-effort check
# but concurrently running snapshot requests from the same project
# could still fail to create volume snapshots if they go over limit.
if volume_bdms:
limits = self.volume_api.get_absolute_limits(context)
total_snapshots_used = limits['totalSnapshotsUsed']
max_snapshots = limits['maxTotalSnapshots']
# -1 means there is unlimited quota for snapshots
if (max_snapshots > -1 and
len(volume_bdms) + total_snapshots_used > max_snapshots):
LOG.debug('Unable to create volume snapshots for instance. '
'Currently has %s snapshots, requesting %s new '
'snapshots, with a limit of %s.',
total_snapshots_used, len(volume_bdms),
max_snapshots, instance=instance)
raise exception.OverQuota(overs='snapshots')
quiesced = False
if instance.vm_state == vm_states.ACTIVE:
try:
@ -2830,35 +2864,24 @@ class API(base.Base):
{'reason': err},
instance=instance)
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
@wrap_instance_event(prefix='api')
def snapshot_instance(self, context, instance, bdms):
mapping = []
try:
for bdm in bdms:
if bdm.no_device:
continue
if bdm.is_volume:
# create snapshot based on volume_id
volume = self.volume_api.get(context, bdm.volume_id)
# NOTE(yamahata): Should we wait for snapshot creation?
# Linux LVM snapshot creation completes in short time,
# it doesn't matter for now.
name = _('snapshot for %s') % image_meta['name']
LOG.debug('Creating snapshot from volume %s.',
volume['id'], instance=instance)
snapshot = self.volume_api.create_snapshot_force(
context, volume['id'],
name, volume['display_description'])
mapping_dict = block_device.snapshot_from_bdm(
snapshot['id'], bdm)
mapping_dict = mapping_dict.get_image_mapping()
else:
mapping_dict = bdm.get_image_mapping()
for bdm in volume_bdms:
# create snapshot based on volume_id
volume = self.volume_api.get(context, bdm.volume_id)
# NOTE(yamahata): Should we wait for snapshot creation?
# Linux LVM snapshot creation completes in
# short time, it doesn't matter for now.
name = _('snapshot for %s') % image_meta['name']
LOG.debug('Creating snapshot from volume %s.',
volume['id'], instance=instance)
snapshot = self.volume_api.create_snapshot_force(
context, volume['id'],
name, volume['display_description'])
mapping_dict = block_device.snapshot_from_bdm(
snapshot['id'], bdm)
mapping_dict = mapping_dict.get_image_mapping()
mapping.append(mapping_dict)
return mapping
# NOTE(tasker): No error handling is done in the above for loop.

View File

@ -982,6 +982,10 @@ class ServerActionsControllerTestV21(test.TestCase):
snapshot = dict(id=_fake_id('d'))
with test.nested(
mock.patch.object(
self.controller.compute_api.volume_api, 'get_absolute_limits',
return_value={'totalSnapshotsUsed': 0,
'maxTotalSnapshots': 10}),
mock.patch.object(self.controller.compute_api.compute_rpcapi,
'quiesce_instance',
side_effect=exception.InstanceQuiesceNotSupported(
@ -991,7 +995,7 @@ class ServerActionsControllerTestV21(test.TestCase):
mock.patch.object(self.controller.compute_api.volume_api,
'create_snapshot_force',
return_value=snapshot),
) as (mock_quiesce, mock_vol_get, mock_vol_create):
) as (mock_get_limits, mock_quiesce, mock_vol_get, mock_vol_create):
if mock_vol_create_side_effect:
mock_vol_create.side_effect = mock_vol_create_side_effect
@ -1086,6 +1090,10 @@ class ServerActionsControllerTestV21(test.TestCase):
snapshot = dict(id=_fake_id('d'))
with test.nested(
mock.patch.object(
self.controller.compute_api.volume_api, 'get_absolute_limits',
return_value={'totalSnapshotsUsed': 0,
'maxTotalSnapshots': 10}),
mock.patch.object(self.controller.compute_api.compute_rpcapi,
'quiesce_instance',
side_effect=exception.InstanceQuiesceNotSupported(
@ -1095,7 +1103,7 @@ class ServerActionsControllerTestV21(test.TestCase):
mock.patch.object(self.controller.compute_api.volume_api,
'create_snapshot_force',
return_value=snapshot),
) as (mock_quiesce, mock_vol_get, mock_vol_create):
) as (mock_get_limits, mock_quiesce, mock_vol_get, mock_vol_create):
response = self.controller._action_create_image(self.req,
FAKE_UUID, body=body)

View File

@ -2930,7 +2930,7 @@ class _ComputeAPIUnitTestMixIn(object):
def _test_snapshot_volume_backed(self, quiesce_required, quiesce_fails,
vm_state=vm_states.ACTIVE,
snapshot_fails=False):
snapshot_fails=False, limits=None):
fake_sys_meta = {'image_min_ram': '11',
'image_min_disk': '22',
'image_container_format': 'ami',
@ -2989,6 +2989,11 @@ class _ComputeAPIUnitTestMixIn(object):
def fake_unquiesce_instance(context, instance, mapping=None):
quiesced[1] = True
def fake_get_absolute_limits(context):
if limits is not None:
return limits
return {"totalSnapshotsUsed": 0, "maxTotalSnapshots": 10}
self.stub_out('nova.objects.BlockDeviceMappingList'
'.get_by_instance_uuid',
fake_bdm_list_get_by_instance_uuid)
@ -3037,6 +3042,12 @@ class _ComputeAPIUnitTestMixIn(object):
'destination_type': 'volume', 'delete_on_termination': False,
'tag': None})
limits_patcher = mock.patch.object(
self.compute_api.volume_api, 'get_absolute_limits',
side_effect=fake_get_absolute_limits)
limits_patcher.start()
self.addCleanup(limits_patcher.stop)
with test.nested(
mock.patch.object(compute_api.API, '_record_action_start'),
mock.patch.object(compute_utils, 'EventReporter')) as (
@ -3081,7 +3092,9 @@ class _ComputeAPIUnitTestMixIn(object):
'guest_format': 'swap', 'delete_on_termination': True,
'tag': None})
instance_bdms.append(bdm)
expect_meta['properties']['block_device_mapping'].append(
# The non-volume image mapping will go at the front of the list
# because the volume BDMs are processed separately.
expect_meta['properties']['block_device_mapping'].insert(0,
{'guest_format': 'swap', 'boot_index': -1, 'no_device': False,
'image_id': None, 'volume_id': None, 'disk_bus': None,
'volume_size': None, 'source_type': 'blank',
@ -3127,6 +3140,24 @@ class _ComputeAPIUnitTestMixIn(object):
quiesce_fails=False,
snapshot_fails=True)
def test_snapshot_volume_backed_unlimited_quota(self):
"""Tests that there is unlimited quota on volume snapshots so we
don't perform a quota check.
"""
limits = {'maxTotalSnapshots': -1, 'totalSnapshotsUsed': 0}
self._test_snapshot_volume_backed(
quiesce_required=False, quiesce_fails=False, limits=limits)
def test_snapshot_volume_backed_over_quota_before_snapshot(self):
"""Tests that the up-front check on quota fails before actually
attempting to snapshot any volumes.
"""
limits = {'maxTotalSnapshots': 1, 'totalSnapshotsUsed': 1}
self.assertRaises(exception.OverQuota,
self._test_snapshot_volume_backed,
quiesce_required=False, quiesce_fails=False,
limits=limits)
def test_snapshot_volume_backed_with_quiesce_skipped(self):
self._test_snapshot_volume_backed(False, True)