Fix allocated_capacity_gb race on create volume

Because of the stat reporting mechanism used in Cinder and because we
don't update the allocated_capacity_gb in the volume service until the
volume's creation has been successfully completed we have race
conditions between the scheduler's value and the volume's value that may
induce the scheduler to provision more volumes than what it should
allow.

This patches fixes this by assuming that the create volume operation on
the volume service will be successful, and thus increases the
allocated_capacity_gb value when the operation starts and reverts the
increase if we reschedule the creation.

Closes-bug: #1751057
Change-Id: Ia321a08d102a9a4f535e46158764cb7f9bc81a9d
This commit is contained in:
Gorka Eguileor 2018-02-22 14:35:30 +01:00
parent b8076c55d5
commit bf6df89a9e
2 changed files with 35 additions and 27 deletions

View File

@ -233,7 +233,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
# NOTE(dulek): Volume should be rescheduled as we passed request_spec
# and filter_properties, assert that it wasn't counted in
# allocated_capacity tracking.
self.assertEqual({}, self.volume.stats['pools'])
self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
self.volume.stats['pools'])
# NOTE(dulek): As we've rescheduled, make sure delete_volume was
# called.
@ -260,7 +261,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
# NOTE(dulek): Volume should be rescheduled as we passed request_spec
# and filter_properties, assert that it wasn't counted in
# allocated_capacity tracking.
self.assertEqual({}, self.volume.stats['pools'])
self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
self.volume.stats['pools'])
db.volume_destroy(context.get_admin_context(), volume_id)
@ -317,6 +319,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.assert_notify_called(mock_notify,
(['INFO', 'volume.create.start'],
['INFO', 'volume.create.end']))
self.assertEqual({'_pool0': {'allocated_capacity_gb': 1}},
self.volume.stats['pools'])
self.volume.delete_volume(self.context, volume)
vol = db.volume_get(context.get_admin_context(read_deleted='yes'),
@ -328,6 +332,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
['INFO', 'volume.create.end'],
['INFO', 'volume.delete.start'],
['INFO', 'volume.delete.end']))
self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
self.volume.stats['pools'])
self.assertRaises(exception.NotFound,
db.volume_get,
@ -2757,6 +2763,10 @@ class VolumeTestCase(base.BaseVolumeTestCase):
self.assertTrue(mock_reschedule.called)
volume = db.volume_get(context.get_admin_context(), test_vol_id)
self.assertEqual('creating', volume['status'])
# We increase the stats on entering the create method, but we must
# have cleared them on reschedule.
self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
self.volume.stats['pools'])
@mock.patch('cinder.volume.flows.manager.create_volume.'
'CreateVolumeFromSpecTask.execute')
@ -2774,6 +2784,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
{'retry': {'num_attempts': 1, 'host': []}})
volume = db.volume_get(context.get_admin_context(), test_vol_id)
self.assertEqual('error', volume['status'])
self.assertEqual({'_pool0': {'allocated_capacity_gb': 1}},
self.volume.stats['pools'])
def test_cascade_delete_volume_with_snapshots(self):
"""Test volume deletion with dependent snapshots."""

View File

@ -586,6 +586,12 @@ class VolumeManager(manager.CleanableManager,
# Make sure the host in the DB matches our own when clustered
self._set_resource_host(volume)
# Update our allocated capacity counter early to minimize race
# conditions with the scheduler.
self._update_allocated_capacity(volume)
# We lose the host value if we reschedule, so keep it here
original_host = volume.host
context_elevated = context.elevated()
if filter_properties is None:
filter_properties = {}
@ -658,10 +664,12 @@ class VolumeManager(manager.CleanableManager,
except tfe.NotFound:
pass
if not rescheduled:
# NOTE(dulek): Volume wasn't rescheduled so we need to update
# volume stats as these are decremented on delete.
self._update_allocated_capacity(volume)
if rescheduled:
# NOTE(geguileo): Volume was rescheduled so we need to update
# volume stats because the volume wasn't created here.
# Volume.host is None now, so we pass the original host value.
self._update_allocated_capacity(volume, decrement=True,
host=original_host)
shared_targets = (
1
@ -844,20 +852,7 @@ class VolumeManager(manager.CleanableManager,
if reservations:
QUOTAS.commit(context, reservations, project_id=project_id)
pool = vol_utils.extract_host(volume.host, 'pool')
if pool is None:
# Legacy volume, put them into default pool
pool = self.driver.configuration.safe_get(
'volume_backend_name') or vol_utils.extract_host(
volume.host, 'pool', True)
size = volume.size
try:
self.stats['pools'][pool]['allocated_capacity_gb'] -= size
except KeyError:
self.stats['pools'][pool] = dict(
allocated_capacity_gb=-size)
self._update_allocated_capacity(volume, decrement=True)
self.publish_service_capabilities(context)
msg = "Deleted volume successfully."
@ -3265,21 +3260,22 @@ class VolumeManager(manager.CleanableManager,
self.db.volume_update(context, vol['id'], update)
def _update_allocated_capacity(self, vol):
def _update_allocated_capacity(self, vol, decrement=False, host=None):
# Update allocated capacity in volume stats
pool = vol_utils.extract_host(vol['host'], 'pool')
host = host or vol['host']
pool = vol_utils.extract_host(host, 'pool')
if pool is None:
# Legacy volume, put them into default pool
pool = self.driver.configuration.safe_get(
'volume_backend_name') or vol_utils.extract_host(
vol['host'], 'pool', True)
'volume_backend_name') or vol_utils.extract_host(host, 'pool',
True)
vol_size = -vol['size'] if decrement else vol['size']
try:
self.stats['pools'][pool]['allocated_capacity_gb'] += (
vol['size'])
self.stats['pools'][pool]['allocated_capacity_gb'] += vol_size
except KeyError:
self.stats['pools'][pool] = dict(
allocated_capacity_gb=vol['size'])
allocated_capacity_gb=max(vol_size, 0))
def delete_group(self, context, group):
"""Deletes group and the volumes in the group."""