From eab1d4b5cc6dd424c5c7dfd9989383a8e716cae5 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Wed, 15 Mar 2017 16:18:10 -0700
Subject: [PATCH] Count server groups to check quota

This changes server groups from a ReservableResource to a
CountableResource and replaces quota reserve/commit/rollback with
check_deltas accordingly.

A new configuration option [quota]/recheck_quota has also been added
to control whether quota should be checked a second time after
resources have been created, to prevent allowing quota to be exceeded
as a result of racing requests.

Co-Authored-By: melanie witt <melwittt@gmail.com>

Part of blueprint cells-count-resources-to-check-quota-in-api

Change-Id: If87c84001673e5e463136435327044cc06f87a17
---
 nova/api/openstack/compute/server_groups.py   | 41 +++++++-------
 nova/conf/quota.py                            | 26 ++++++++-
 nova/quota.py                                 | 19 ++++++-
 .../compute/test_server_group_quotas.py       | 21 ++++++--
 nova/tests/unit/test_quota.py                 | 54 +++++++------------
 .../recheck-quota-conf-043a5d6057b33282.yaml  | 12 +++++
 6 files changed, 111 insertions(+), 62 deletions(-)
 create mode 100644 releasenotes/notes/recheck-quota-conf-043a5d6057b33282.yaml

diff --git a/nova/api/openstack/compute/server_groups.py b/nova/api/openstack/compute/server_groups.py
index cef35b8d7d3f..9df7a5b3a25c 100644
--- a/nova/api/openstack/compute/server_groups.py
+++ b/nova/api/openstack/compute/server_groups.py
@@ -27,6 +27,7 @@ from nova.api.openstack.compute.schemas import server_groups as schema
 from nova.api.openstack import extensions
 from nova.api.openstack import wsgi
 from nova.api import validation
+import nova.conf
 from nova import context as nova_context
 import nova.exception
 from nova.i18n import _
@@ -37,6 +38,8 @@ LOG = logging.getLogger(__name__)
 
 ALIAS = "os-server-groups"
 
+CONF = nova.conf.CONF
+
 
 def _authorize_context(req, action):
     context = req.environ['nova.context']
@@ -124,28 +127,11 @@ class ServerGroupController(wsgi.Controller):
             sg = objects.InstanceGroup.get_by_uuid(context, id)
         except nova.exception.InstanceGroupNotFound as e:
             raise webob.exc.HTTPNotFound(explanation=e.format_message())
-
-        quotas = objects.Quotas(context=context)
-        project_id, user_id = objects.quotas.ids_from_server_group(context, sg)
-        try:
-            # We have to add the quota back to the user that created
-            # the server group
-            quotas.reserve(project_id=project_id,
-                           user_id=user_id, server_groups=-1)
-        except Exception:
-            quotas = None
-            LOG.exception("Failed to update usages deallocating server group")
-
         try:
             sg.destroy()
         except nova.exception.InstanceGroupNotFound as e:
-            if quotas:
-                quotas.rollback()
             raise webob.exc.HTTPNotFound(explanation=e.format_message())
 
-        if quotas:
-            quotas.commit()
-
     @extensions.expected_errors(())
     def index(self, req):
         """Returns a list of server groups."""
@@ -169,10 +155,9 @@ class ServerGroupController(wsgi.Controller):
         """Creates a new server group."""
         context = _authorize_context(req, 'create')
 
-        quotas = objects.Quotas(context=context)
         try:
-            quotas.reserve(project_id=context.project_id,
-                           user_id=context.user_id, server_groups=1)
+            objects.Quotas.check_deltas(context, {'server_groups': 1},
+                                        context.project_id, context.user_id)
         except nova.exception.OverQuota:
             msg = _("Quota exceeded, too many server groups.")
             raise exc.HTTPForbidden(explanation=msg)
@@ -186,10 +171,22 @@ class ServerGroupController(wsgi.Controller):
             sg.policies = vals.get('policies')
             sg.create()
         except ValueError as e:
-            quotas.rollback()
             raise exc.HTTPBadRequest(explanation=e)
 
-        quotas.commit()
+        # NOTE(melwitt): We recheck the quota after creating the object to
+        # prevent users from allocating more resources than their allowed quota
+        # in the event of a race. This is configurable because it can be
+        # expensive if strict quota limits are not required in a deployment.
+        if CONF.quota.recheck_quota:
+            try:
+                objects.Quotas.check_deltas(context, {'server_groups': 0},
+                                            context.project_id,
+                                            context.user_id)
+            except nova.exception.OverQuota:
+                sg.destroy()
+                msg = _("Quota exceeded, too many server groups.")
+                raise exc.HTTPForbidden(explanation=msg)
+
         return {'server_group': self._format_server_group(context, sg, req)}
 
 
diff --git a/nova/conf/quota.py b/nova/conf/quota.py
index 964494ea7644..4766a8b28607 100644
--- a/nova/conf/quota.py
+++ b/nova/conf/quota.py
@@ -287,7 +287,6 @@ to help keep quota usage up-to-date and reduce the impact of out of sync usage
 issues. Note that quotas are not updated on a periodic task, they will update
 on a new reservation if max_age has passed since the last reservation.
 """),
-
 # TODO(pumaranikar): Add a new config to select between the db_driver and
 # the no_op driver using stevedore.
     cfg.StrOpt('driver',
@@ -306,6 +305,31 @@ Possible values:
 
 * nova.quota.DbQuotaDriver (default) or any string representing fully
   qualified class name.
+"""),
+    cfg.BoolOpt('recheck_quota',
+        default=True,
+        help="""
+Recheck quota after resource creation to prevent allowing quota to be exceeded.
+
+This defaults to True (recheck quota after resource creation) but can be set to
+False to avoid additional load if allowing quota to be exceeded because of
+racing requests is considered acceptable. For example, when set to False, if a
+user makes highly parallel REST API requests to create servers, it will be
+possible for them to create more servers than their allowed quota during the
+race. If their quota is 10 servers, they might be able to create 50 during the
+burst. After the burst, they will not be able to create any more servers but
+they will be able to keep their 50 servers until they delete them.
+
+The initial quota check is done before resources are created, so if multiple
+parallel requests arrive at the same time, all could pass the quota check and
+create resources, potentially exceeding quota. When recheck_quota is True,
+quota will be checked a second time after resources have been created and if
+the resource is over quota, it will be deleted and OverQuota will be raised,
+usually resulting in a 403 response to the REST API user. This makes it
+impossible for a user to exceed their quota with the caveat that it will,
+however, be possible for a REST API user to be rejected with a 403 response in
+the event of a collision close to reaching their quota limit, even if the user
+has enough quota available when they made the request.
 """),
 ]
 
diff --git a/nova/quota.py b/nova/quota.py
index deba89ec7fbe..843ba36aaccd 100644
--- a/nova/quota.py
+++ b/nova/quota.py
@@ -1857,6 +1857,22 @@ def _server_group_count_members_by_user(context, group, user_id):
     return {'user': {'server_group_members': count}}
 
 
+def _server_group_count(context, project_id, user_id=None):
+    """Get the counts of server groups in the database.
+
+    :param context: The request context for database access
+    :param project_id: The project_id to count across
+    :param user_id: The user_id to count across
+    :returns: A dict containing the project-scoped counts and user-scoped
+              counts if user_id is specified. For example:
+
+                {'project': {'server_groups': <count across project>},
+                 'user': {'server_groups': <count across user>}}
+    """
+    return objects.InstanceGroupList.get_counts(context, project_id,
+                                                user_id=user_id)
+
+
 def _security_group_rule_count_by_group(context, security_group_id):
     count = db.security_group_rule_count_by_group(context, security_group_id)
     # NOTE(melwitt): Neither 'project' nor 'user' fit perfectly here as
@@ -1888,8 +1904,7 @@ resources = [
                       _security_group_rule_count_by_group,
                       'security_group_rules'),
     CountableResource('key_pairs', _keypair_get_count_by_user, 'key_pairs'),
-    ReservableResource('server_groups', '_sync_server_groups',
-                      'server_groups'),
+    CountableResource('server_groups', _server_group_count, 'server_groups'),
     CountableResource('server_group_members',
                       _server_group_count_members_by_user,
                       'server_group_members'),
diff --git a/nova/tests/unit/api/openstack/compute/test_server_group_quotas.py b/nova/tests/unit/api/openstack/compute/test_server_group_quotas.py
index 516bfec56fc1..c212484e597c 100644
--- a/nova/tests/unit/api/openstack/compute/test_server_group_quotas.py
+++ b/nova/tests/unit/api/openstack/compute/test_server_group_quotas.py
@@ -13,13 +13,14 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+import mock
 from oslo_config import cfg
 from oslo_utils import uuidutils
 import webob
 
 from nova.api.openstack.compute import server_groups as sg_v21
 from nova import context
-from nova import quota
+from nova import objects
 from nova import test
 from nova.tests.unit.api.openstack import fakes
 from nova.tests import uuidsentinel as uuids
@@ -84,8 +85,10 @@ class ServerGroupQuotasTestV21(test.TestCase):
 
     def _assert_server_groups_in_use(self, project_id, user_id, in_use):
         ctxt = context.get_admin_context()
-        result = quota.QUOTAS.get_user_quotas(ctxt, project_id, user_id)
-        self.assertEqual(result['server_groups']['in_use'], in_use)
+        counts = objects.InstanceGroupList.get_counts(ctxt, project_id,
+                                                      user_id=user_id)
+        self.assertEqual(in_use, counts['project']['server_groups'])
+        self.assertEqual(in_use, counts['user']['server_groups'])
 
     def test_create_server_group_normal(self):
         self._setup_quotas()
@@ -112,6 +115,18 @@ class ServerGroupQuotasTestV21(test.TestCase):
                           self.controller.create,
                           self.req, body={'server_group': sgroup})
 
+    @mock.patch('nova.objects.Quotas.check_deltas')
+    def test_create_server_group_recheck_disabled(self, mock_check):
+        self.flags(recheck_quota=False, group='quota')
+        self._setup_quotas()
+        sgroup = server_group_template()
+        policies = ['anti-affinity']
+        sgroup['policies'] = policies
+        self.controller.create(self.req, body={'server_group': sgroup})
+        ctxt = self.req.environ['nova.context']
+        mock_check.assert_called_once_with(ctxt, {'server_groups': 1},
+                                           ctxt.project_id, ctxt.user_id)
+
     def test_delete_server_group_by_admin(self):
         self._setup_quotas()
         sgroup = server_group_template()
diff --git a/nova/tests/unit/test_quota.py b/nova/tests/unit/test_quota.py
index 12a19a6207d3..e01c1f6df113 100644
--- a/nova/tests/unit/test_quota.py
+++ b/nova/tests/unit/test_quota.py
@@ -2329,8 +2329,7 @@ class DbQuotaDriverTestCase(test.TestCase):
                                                      'test_class'),
                                          quota.QUOTAS._resources,
                                          ['instances', 'cores', 'ram',
-                                          'floating_ips', 'security_groups',
-                                          'server_groups'],
+                                          'floating_ips', 'security_groups'],
                                          True,
                                          project_id='test_project')
 
@@ -2341,7 +2340,6 @@ class DbQuotaDriverTestCase(test.TestCase):
                 ram=50 * 1024,
                 floating_ips=10,
                 security_groups=10,
-                server_groups=10,
                 ))
 
     def test_get_quotas_no_sync(self):
@@ -2353,7 +2351,8 @@ class DbQuotaDriverTestCase(test.TestCase):
                                           'injected_file_content_bytes',
                                           'injected_file_path_bytes',
                                           'security_group_rules',
-                                          'server_group_members'], False,
+                                          'server_group_members',
+                                          'server_groups'], False,
                                          project_id='test_project')
 
         self.assertEqual(self.calls, ['get_project_quotas'])
@@ -2364,6 +2363,7 @@ class DbQuotaDriverTestCase(test.TestCase):
                 injected_file_path_bytes=255,
                 security_group_rules=20,
                 server_group_members=10,
+                server_groups=10,
                 ))
 
     def test_limit_check_under(self):
@@ -2698,7 +2698,7 @@ class QuotaSqlAlchemyBase(test.TestCase):
         self.addCleanup(restore_sync_functions)
 
         for res_name in ('instances', 'cores', 'ram', 'fixed_ips',
-                         'security_groups', 'server_groups', 'floating_ips'):
+                         'security_groups', 'floating_ips'):
             method_name = '_sync_%s' % res_name
             sqa_api.QUOTA_SYNC_FUNCTIONS[method_name] = make_sync(res_name)
             res = quota.ReservableResource(res_name, '_sync_%s' % res_name)
@@ -3085,8 +3085,7 @@ class QuotaReserveSqlAlchemyTestCase(QuotaSqlAlchemyBase):
 class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
     def _init_usages(self, *in_use, **kwargs):
         for i, option in enumerate(('instances', 'cores', 'ram', 'fixed_ips',
-                                    'server_groups', 'security_groups',
-                                    'floating_ips')):
+                                    'security_groups', 'floating_ips')):
             self.init_usage('test_project', 'fake_user',
                             option, in_use[i], **kwargs)
         return FakeContext('test_project', 'test_class')
@@ -3107,15 +3106,8 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
         # 1     cores            user
         # 2     ram              user
         # 3     fixed_ips        project
-        # 4     server_groups    user
-        # 5     security_groups  user
-        # 6     floating_ips     project
-        self.usages_list.append(dict(resource='server_groups',
-                     project_id='test_project',
-                     user_id='fake_user',
-                     in_use=2,
-                     reserved=2,
-                     until_refresh=None))
+        # 4     security_groups  user
+        # 5     floating_ips     project
         self.usages_list.append(dict(resource='security_groups',
                      project_id='test_project',
                      user_id='fake_user',
@@ -3136,7 +3128,6 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
         self.usages_list[3]['reserved'] = 0
         self.usages_list[4]['reserved'] = 0
         self.usages_list[5]['reserved'] = 0
-        self.usages_list[6]['reserved'] = 0
 
         def fake_quota_get_all_by_project_and_user(context, project_id,
                                                    user_id):
@@ -3182,7 +3173,7 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
         ctxt = context.get_admin_context()
         quota.QUOTAS.usage_refresh(ctxt, 'test_project', 'fake_user')
 
-        self.assertEqual(self.sync_called, set(['instances', 'server_groups',
+        self.assertEqual(self.sync_called, set(['instances',
                                                 'security_groups']))
 
         # Compare the expected usages with the actual usages.
@@ -3190,8 +3181,8 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
         self.usages_list[3]['in_use'] = 3
         self.usages_list[3]['until_refresh'] = 5
         # Expect floating_ips not to change since it is project scoped.
-        self.usages_list[6]['in_use'] = 3
-        self.usages_list[6]['until_refresh'] = 5
+        self.usages_list[5]['in_use'] = 3
+        self.usages_list[5]['until_refresh'] = 5
         self.compare_usage(self.usages, self.usages_list)
 
         # No usages were created.
@@ -3200,11 +3191,12 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
     def test_usage_refresh_user_two_keys(self):
         context = self._init_usages(3, 3, 3, 3, 3, 3, 3,
                                     until_refresh = 5)
-        keys = ['server_groups', 'ram']
+        keys = ['security_groups', 'ram']
         # Let the context determine the project_id and user_id
         quota.QUOTAS.usage_refresh(context, None, None, keys)
 
-        self.assertEqual(self.sync_called, set(['instances', 'server_groups']))
+        self.assertEqual(self.sync_called, set(['instances',
+                                                'security_groups']))
 
         # Compare the expected usages with the actual usages.
         # Expect fixed_ips not to change since it is project scoped.
@@ -3213,9 +3205,9 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
         # Expect security_groups not to change since it is not in keys list.
         self.usages_list[5]['in_use'] = 3
         self.usages_list[5]['until_refresh'] = 5
-        # Expect fixed_ips not to change since it is project scoped.
-        self.usages_list[6]['in_use'] = 3
-        self.usages_list[6]['until_refresh'] = 5
+        # Expect floating_ips not to change since it is project scoped.
+        self.usages_list[5]['in_use'] = 3
+        self.usages_list[5]['until_refresh'] = 5
         self.compare_usage(self.usages, self.usages_list)
 
         # No usages were created.
@@ -3259,12 +3251,9 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
         # Expect ram not to change since it is user scoped.
         self.usages_list[2]['in_use'] = 3
         self.usages_list[2]['until_refresh'] = 5
-        # Expect server_groups not to change since it is user scoped.
+        # Expect security_groups not to change since it is user scoped.
         self.usages_list[4]['in_use'] = 3
         self.usages_list[4]['until_refresh'] = 5
-        # Expect security_groups not to change since it is user scoped.
-        self.usages_list[5]['in_use'] = 3
-        self.usages_list[5]['until_refresh'] = 5
         self.compare_usage(self.usages, self.usages_list)
 
         self.assertEqual(self.usages_created, {})
@@ -3291,12 +3280,9 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
         # Expect fixed_ips not to change since it is not in the keys list.
         self.usages_list[3]['in_use'] = 3
         self.usages_list[3]['until_refresh'] = 5
-        # Expect server_groups not to change since it is user scoped.
+        # Expect security_groups not to change since it is user scoped.
         self.usages_list[4]['in_use'] = 3
         self.usages_list[4]['until_refresh'] = 5
-        # Expect security_groups not to change since it is user scoped.
-        self.usages_list[5]['in_use'] = 3
-        self.usages_list[5]['until_refresh'] = 5
         self.compare_usage(self.usages, self.usages_list)
 
         self.assertEqual(self.usages_created, {})
@@ -3312,7 +3298,7 @@ class QuotaEngineUsageRefreshTestCase(QuotaSqlAlchemyBase):
 
         # Compare the expected usages with the created usages.
         # Expect floating_ips to be created and initialized to 0
-        self.usages_list[6]['in_use'] = 0
+        self.usages_list[5]['in_use'] = 0
         self.compare_usage(self.usages_created, self.usages_list[6:])
 
         self.assertEqual(len(self.usages_created), 1)
diff --git a/releasenotes/notes/recheck-quota-conf-043a5d6057b33282.yaml b/releasenotes/notes/recheck-quota-conf-043a5d6057b33282.yaml
new file mode 100644
index 000000000000..f8eb3dcb78b6
--- /dev/null
+++ b/releasenotes/notes/recheck-quota-conf-043a5d6057b33282.yaml
@@ -0,0 +1,12 @@
+features:
+  - |
+    A new configuration option ``[quota]/recheck_quota`` has been added to
+    recheck quota after resource creation to prevent allowing quota to be
+    exceeded as a result of racing requests. It defaults to True, which makes
+    it impossible for a user to exceed their quota. However, it will be
+    possible for a REST API user to be rejected with an over quota 403 error
+    response in the event of a collision close to reaching their quota limit,
+    even if the user has enough quota available when they made the request.
+    Operators may want to set the option to False to avoid additional load on
+    the system if allowing quota to be exceeded because of racing requests is
+    considered acceptable.