Fix allocated_capacity_gb race on create volume

Because of the stat reporting mechanism used in Cinder and because we don't update the allocated_capacity_gb in the volume service until the volume's creation has been successfully completed we have race conditions between the scheduler's value and the volume's value that may induce the scheduler to provision more volumes than what it should allow. This patches fixes this by assuming that the create volume operation on the volume service will be successful, and thus increases the allocated_capacity_gb value when the operation starts and reverts the increase if we reschedule the creation. Closes-bug: #1751057 Change-Id: Ia321a08d102a9a4f535e46158764cb7f9bc81a9d
2018-02-22 14:35:30 +01:00 · 2018-02-22 14:35:30 +01:00 · bf6df89a9e
commit bf6df89a9e
parent b8076c55d5
2 changed files with 35 additions and 27 deletions
--- a/cinder/tests/unit/volume/test_volume.py
+++ b/cinder/tests/unit/volume/test_volume.py
@ -233,7 +233,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
        # NOTE(dulek): Volume should be rescheduled as we passed request_spec
        # and filter_properties, assert that it wasn't counted in
        # allocated_capacity tracking.
-        self.assertEqual({}, self.volume.stats['pools'])
+        self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
+                         self.volume.stats['pools'])

        # NOTE(dulek): As we've rescheduled, make sure delete_volume was
        # called.
@ -260,7 +261,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
        # NOTE(dulek): Volume should be rescheduled as we passed request_spec
        # and filter_properties, assert that it wasn't counted in
        # allocated_capacity tracking.
-        self.assertEqual({}, self.volume.stats['pools'])
+        self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
+                         self.volume.stats['pools'])

        db.volume_destroy(context.get_admin_context(), volume_id)

@ -317,6 +319,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
        self.assert_notify_called(mock_notify,
                                  (['INFO', 'volume.create.start'],
                                   ['INFO', 'volume.create.end']))
+        self.assertEqual({'_pool0': {'allocated_capacity_gb': 1}},
+                         self.volume.stats['pools'])

        self.volume.delete_volume(self.context, volume)
        vol = db.volume_get(context.get_admin_context(read_deleted='yes'),
@ -328,6 +332,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
                                   ['INFO', 'volume.create.end'],
                                   ['INFO', 'volume.delete.start'],
                                   ['INFO', 'volume.delete.end']))
+        self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
+                         self.volume.stats['pools'])

        self.assertRaises(exception.NotFound,
                          db.volume_get,
@ -2757,6 +2763,10 @@ class VolumeTestCase(base.BaseVolumeTestCase):
        self.assertTrue(mock_reschedule.called)
        volume = db.volume_get(context.get_admin_context(), test_vol_id)
        self.assertEqual('creating', volume['status'])
+        # We increase the stats on entering the create method, but we must
+        # have cleared them on reschedule.
+        self.assertEqual({'_pool0': {'allocated_capacity_gb': 0}},
+                         self.volume.stats['pools'])

    @mock.patch('cinder.volume.flows.manager.create_volume.'
                'CreateVolumeFromSpecTask.execute')
@ -2774,6 +2784,8 @@ class VolumeTestCase(base.BaseVolumeTestCase):
                          {'retry': {'num_attempts': 1, 'host': []}})
        volume = db.volume_get(context.get_admin_context(), test_vol_id)
        self.assertEqual('error', volume['status'])
+        self.assertEqual({'_pool0': {'allocated_capacity_gb': 1}},
+                         self.volume.stats['pools'])

    def test_cascade_delete_volume_with_snapshots(self):
        """Test volume deletion with dependent snapshots."""
--- a/cinder/volume/manager.py
+++ b/cinder/volume/manager.py
@ -586,6 +586,12 @@ class VolumeManager(manager.CleanableManager,
        # Make sure the host in the DB matches our own when clustered
        self._set_resource_host(volume)

+        # Update our allocated capacity counter early to minimize race
+        # conditions with the scheduler.
+        self._update_allocated_capacity(volume)
+        # We lose the host value if we reschedule, so keep it here
+        original_host = volume.host
+
        context_elevated = context.elevated()
        if filter_properties is None:
            filter_properties = {}
@ -658,10 +664,12 @@ class VolumeManager(manager.CleanableManager,
                except tfe.NotFound:
                    pass

-            if not rescheduled:
-                # NOTE(dulek): Volume wasn't rescheduled so we need to update
-                # volume stats as these are decremented on delete.
-                self._update_allocated_capacity(volume)
+            if rescheduled:
+                # NOTE(geguileo): Volume was rescheduled so we need to update
+                # volume stats because the volume wasn't created here.
+                # Volume.host is None now, so we pass the original host value.
+                self._update_allocated_capacity(volume, decrement=True,
+                                                host=original_host)

        shared_targets = (
            1
@ -844,20 +852,7 @@ class VolumeManager(manager.CleanableManager,
            if reservations:
                QUOTAS.commit(context, reservations, project_id=project_id)

-            pool = vol_utils.extract_host(volume.host, 'pool')
-            if pool is None:
-                # Legacy volume, put them into default pool
-                pool = self.driver.configuration.safe_get(
-                    'volume_backend_name') or vol_utils.extract_host(
-                        volume.host, 'pool', True)
-            size = volume.size
-
-            try:
-                self.stats['pools'][pool]['allocated_capacity_gb'] -= size
-            except KeyError:
-                self.stats['pools'][pool] = dict(
-                    allocated_capacity_gb=-size)
-
+            self._update_allocated_capacity(volume, decrement=True)
            self.publish_service_capabilities(context)

        msg = "Deleted volume successfully."
@ -3265,21 +3260,22 @@ class VolumeManager(manager.CleanableManager,

        self.db.volume_update(context, vol['id'], update)

-    def _update_allocated_capacity(self, vol):
+    def _update_allocated_capacity(self, vol, decrement=False, host=None):
        # Update allocated capacity in volume stats
-        pool = vol_utils.extract_host(vol['host'], 'pool')
+        host = host or vol['host']
+        pool = vol_utils.extract_host(host, 'pool')
        if pool is None:
            # Legacy volume, put them into default pool
            pool = self.driver.configuration.safe_get(
-                'volume_backend_name') or vol_utils.extract_host(
-                    vol['host'], 'pool', True)
+                'volume_backend_name') or vol_utils.extract_host(host, 'pool',
+                                                                 True)

+        vol_size = -vol['size'] if decrement else vol['size']
        try:
-            self.stats['pools'][pool]['allocated_capacity_gb'] += (
-                vol['size'])
+            self.stats['pools'][pool]['allocated_capacity_gb'] += vol_size
        except KeyError:
            self.stats['pools'][pool] = dict(
-                allocated_capacity_gb=vol['size'])
+                allocated_capacity_gb=max(vol_size, 0))

    def delete_group(self, context, group):
        """Deletes group and the volumes in the group."""