From ccaf49a00cea1990027eb699bff8f10c664b2c9a Mon Sep 17 00:00:00 2001
From: Alistair Coles <alistairncoles@gmail.com>
Date: Thu, 31 Mar 2022 16:36:32 +0100
Subject: [PATCH] Add backend rate limiting middleware

This is a fairly blunt tool: ratelimiting is per device and
applied independently in each worker, but this at least provides
some limit to disk IO on backend servers.

GET, HEAD, PUT, POST, DELETE, UPDATE and REPLICATE methods may be
rate-limited.

Only requests with a path starting '<device>/<partition>', where
<partition> can be cast to an integer, will be rate-limited. Other
requests, including, for example, recon requests with paths such as
'recon/version', are unconditionally forwarded to the next app in the
pipeline.

OPTIONS and SSYNC methods are not rate-limited. Note that
SSYNC sub-requests are passed directly to the object server app
and will not pass though this middleware.

Change-Id: I78b59a081698a6bff0d74cbac7525e28f7b5d7c1
---
 doc/source/middleware.rst                     |   7 +
 etc/account-server.conf-sample                |  14 +-
 etc/container-server.conf-sample              |  14 +-
 etc/object-server.conf-sample                 |  14 +-
 setup.cfg                                     |   1 +
 swift/common/middleware/backend_ratelimit.py  |  86 +++++++++
 swift/common/swob.py                          |   3 +
 swift/common/utils.py                         |  10 +-
 test/unit/common/middleware/helpers.py        |   2 +-
 .../middleware/test_backend_ratelimit.py      | 170 ++++++++++++++++++
 test/unit/common/test_utils.py                |  20 ++-
 11 files changed, 331 insertions(+), 10 deletions(-)
 create mode 100644 swift/common/middleware/backend_ratelimit.py
 create mode 100644 test/unit/common/middleware/test_backend_ratelimit.py
diff --git a/doc/source/middleware.rst b/doc/source/middleware.rst
index ef282bfa35..ee2e3dc674 100644
--- a/doc/source/middleware.rst
+++ b/doc/source/middleware.rst
@@ -102,6 +102,13 @@ AWS S3 Api
     :members:
     :show-inheritance:
 
+Backend Ratelimit
+=================
+
+.. automodule:: swift.common.middleware.backend_ratelimit
+    :members:
+    :show-inheritance:
+
 .. _bulk:
 
 Bulk Operations (Delete and Archive Auto Extraction)
diff --git a/etc/account-server.conf-sample b/etc/account-server.conf-sample
index 89c1ea3506..485fb45e2e 100644
--- a/etc/account-server.conf-sample
+++ b/etc/account-server.conf-sample
@@ -80,7 +80,7 @@ bind_port = 6202
 # ionice_priority =
 
 [pipeline:main]
-pipeline = healthcheck recon account-server
+pipeline = healthcheck recon backend_ratelimit account-server
 
 [app:account-server]
 use = egg:swift#account
@@ -128,6 +128,18 @@ use = egg:swift#healthcheck
 use = egg:swift#recon
 # recon_cache_path = /var/cache/swift
 
+[filter:backend_ratelimit]
+use = egg:swift#backend_ratelimit
+# Set the maximum rate of requests per second per device per worker. Beyond
+# this rate the server will return 529 responses and emit a 'backend.ratelimit'
+# statsd metric without logging. The default value of zero causes no
+# rate-limiting to be applied.
+# requests_per_device_per_second = 0.0
+#
+# Set the number of seconds of unused rate-limiting allowance that can
+# accumulate and be used to allow a subsequent burst of requests.
+# requests_per_device_rate_buffer = 1.0
+
 [account-replicator]
 # You can override the default log routing for this app here (don't use set!):
 # log_name = account-replicator
diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample
index e1f8482c6e..5f949abc49 100644
--- a/etc/container-server.conf-sample
+++ b/etc/container-server.conf-sample
@@ -86,7 +86,7 @@ bind_port = 6201
 # ionice_priority =
 
 [pipeline:main]
-pipeline = healthcheck recon container-server
+pipeline = healthcheck recon backend_ratelimit container-server
 
 [app:container-server]
 use = egg:swift#container
@@ -138,6 +138,18 @@ use = egg:swift#healthcheck
 use = egg:swift#recon
 #recon_cache_path = /var/cache/swift
 
+[filter:backend_ratelimit]
+use = egg:swift#backend_ratelimit
+# Set the maximum rate of requests per second per device per worker. Beyond
+# this rate the server will return 529 responses and emit a 'backend.ratelimit'
+# statsd metric without logging. The default value of zero causes no
+# rate-limiting to be applied.
+# requests_per_device_per_second = 0.0
+#
+# Set the number of seconds of unused rate-limiting allowance that can
+# accumulate and be used to allow a subsequent burst of requests.
+# requests_per_device_rate_buffer = 1.0
+
 [container-replicator]
 # You can override the default log routing for this app here (don't use set!):
 # log_name = container-replicator
diff --git a/etc/object-server.conf-sample b/etc/object-server.conf-sample
index e7466bba83..c06f4365e8 100644
--- a/etc/object-server.conf-sample
+++ b/etc/object-server.conf-sample
@@ -111,7 +111,7 @@ bind_port = 6200
 # ionice_priority =
 
 [pipeline:main]
-pipeline = healthcheck recon object-server
+pipeline = healthcheck recon backend_ratelimit object-server
 
 [app:object-server]
 use = egg:swift#object
@@ -231,6 +231,18 @@ use = egg:swift#recon
 #recon_cache_path = /var/cache/swift
 #recon_lock_path = /var/lock
 
+[filter:backend_ratelimit]
+use = egg:swift#backend_ratelimit
+# Set the maximum rate of requests per second per device per worker. Beyond
+# this rate the server will return 529 responses and emit a 'backend.ratelimit'
+# statsd metric without logging. The default value of zero causes no
+# rate-limiting to be applied.
+# requests_per_device_per_second = 0.0
+#
+# Set the number of seconds of unused rate-limiting allowance that can
+# accumulate and be used to allow a subsequent burst of requests.
+# requests_per_device_rate_buffer = 1.0
+
 [object-replicator]
 # You can override the default log routing for this app here (don't use set!):
 # log_name = object-replicator
diff --git a/setup.cfg b/setup.cfg
index 318d502ef7..8999808877 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -98,6 +98,7 @@ paste.filter_factory =
     memcache = swift.common.middleware.memcache:filter_factory
     read_only = swift.common.middleware.read_only:filter_factory
     ratelimit = swift.common.middleware.ratelimit:filter_factory
+    backend_ratelimit = swift.common.middleware.backend_ratelimit:filter_factory
     cname_lookup = swift.common.middleware.cname_lookup:filter_factory
     catch_errors = swift.common.middleware.catch_errors:filter_factory
     domain_remap = swift.common.middleware.domain_remap:filter_factory
diff --git a/swift/common/middleware/backend_ratelimit.py b/swift/common/middleware/backend_ratelimit.py
new file mode 100644
index 0000000000..980e9edc4b
--- /dev/null
+++ b/swift/common/middleware/backend_ratelimit.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2022 NVIDIA
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+from collections import defaultdict
+
+from swift.common.request_helpers import split_and_validate_path
+from swift.common.swob import Request, HTTPTooManyBackendRequests
+from swift.common.utils import get_logger, non_negative_float, \
+    EventletRateLimiter
+
+RATE_LIMITED_METHODS = ('GET', 'HEAD', 'PUT', 'POST', 'DELETE', 'UPDATE',
+                        'REPLICATE')
+
+
+class BackendRateLimitMiddleware(object):
+    """
+    Backend rate-limiting middleware.
+
+    Rate-limits requests to backend storage node devices. Each device is
+    independently rate-limited. All requests with a 'GET', 'HEAD', 'PUT',
+    'POST', 'DELETE', 'UPDATE' or 'REPLICATE' method are included in a device's
+    rate limit.
+
+    If a request would cause the rate-limit to be exceeded then a response with
+    a 529 status code is returned.
+    """
+    def __init__(self, app, conf, logger=None):
+        self.app = app
+        self.logger = logger or get_logger(conf, log_route='backend_ratelimit')
+        self.requests_per_device_per_second = non_negative_float(
+            conf.get('requests_per_device_per_second', 0.0))
+        self.requests_per_device_rate_buffer = non_negative_float(
+            conf.get('requests_per_device_rate_buffer', 1.0))
+
+        # map device -> RateLimiter
+        self.rate_limiters = defaultdict(
+            lambda: EventletRateLimiter(
+                max_rate=self.requests_per_device_per_second,
+                rate_buffer=self.requests_per_device_rate_buffer,
+                running_time=time.time(),
+                burst_after_idle=True))
+
+    def __call__(self, env, start_response):
+        """
+        WSGI entry point.
+
+        :param env: WSGI environment dictionary
+        :param start_response: WSGI callable
+        """
+        req = Request(env)
+        handler = self.app
+        if req.method in RATE_LIMITED_METHODS:
+            try:
+                device, partition, _ = split_and_validate_path(req, 1, 3, True)
+                int(partition)  # check it's a valid partition
+                rate_limiter = self.rate_limiters[device]
+                if not rate_limiter.is_allowed():
+                    self.logger.increment('backend.ratelimit')
+                    handler = HTTPTooManyBackendRequests()
+            except Exception:  # noqa
+                # request may not have device/partition e.g. a healthcheck req
+                pass
+        return handler(env, start_response)
+
+
+def filter_factory(global_conf, **local_conf):
+    conf = global_conf.copy()
+    conf.update(local_conf)
+
+    def backend_ratelimit_filter(app):
+        return BackendRateLimitMiddleware(app, conf)
+
+    return backend_ratelimit_filter
diff --git a/swift/common/swob.py b/swift/common/swob.py
index 6eaa33a744..7a14d1cf10 100644
--- a/swift/common/swob.py
+++ b/swift/common/swob.py
@@ -114,6 +114,8 @@ RESPONSE_REASONS = {
           'backend server.'),
     507: ('Insufficient Storage', 'There was not enough space to save the '
           'resource. Drive: %(drive)s'),
+    529: ('Too Many Backend Requests', 'The server is incapable of performing '
+          'the requested operation due to too many requests. Slow down.')
 }
 
 MAX_RANGE_OVERLAPS = 2
@@ -1619,3 +1621,4 @@ HTTPNotImplemented = status_map[501]
 HTTPBadGateway = status_map[502]
 HTTPServiceUnavailable = status_map[503]
 HTTPInsufficientStorage = status_map[507]
+HTTPTooManyBackendRequests = status_map[529]
diff --git a/swift/common/utils.py b/swift/common/utils.py
index 2b7a75b7a1..fbbeaab11f 100644
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -367,9 +367,13 @@ def non_negative_float(value):
     :raises ValueError: if the value cannot be cast to a float or is negative.
     :return: a float
     """
-    value = float(value)
-    if value < 0:
-        raise ValueError
+    try:
+        value = float(value)
+        if value < 0:
+            raise ValueError
+    except (TypeError, ValueError):
+        raise ValueError('Value must be a non-negative float number, not "%s".'
+                         % value)
     return value
 
 
diff --git a/test/unit/common/middleware/helpers.py b/test/unit/common/middleware/helpers.py
index 1fe617ba47..e184c004f1 100644
--- a/test/unit/common/middleware/helpers.py
+++ b/test/unit/common/middleware/helpers.py
@@ -76,7 +76,7 @@ class FakeSwift(object):
     """
     ALLOWED_METHODS = [
         'PUT', 'POST', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'REPLICATE',
-        'UPDATE']
+        'SSYNC', 'UPDATE']
 
     def __init__(self):
         self._calls = []
diff --git a/test/unit/common/middleware/test_backend_ratelimit.py b/test/unit/common/middleware/test_backend_ratelimit.py
new file mode 100644
index 0000000000..15be2f50a6
--- /dev/null
+++ b/test/unit/common/middleware/test_backend_ratelimit.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2022 NVIDIA
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Used by get_swift_info and register_swift_info to store information about
+# the swift cluster.
+import time
+import unittest
+from collections import defaultdict
+
+import mock
+
+from swift.common.middleware import backend_ratelimit
+from swift.common.middleware.backend_ratelimit import \
+    BackendRateLimitMiddleware
+from swift.common.swob import Request, HTTPOk
+from test.debug_logger import debug_logger
+from test.unit.common.middleware.helpers import FakeSwift
+
+
+class FakeApp(object):
+    def __init__(self):
+        self.calls = []
+
+    def __call__(self, env, start_response):
+        start_response('200 OK', {})
+        return ['']
+
+
+class TestBackendRatelimitMiddleware(unittest.TestCase):
+    def setUp(self):
+        super(TestBackendRatelimitMiddleware, self).setUp()
+        self.swift = FakeSwift()
+
+    def test_init(self):
+        conf = {}
+        factory = backend_ratelimit.filter_factory(conf)
+        rl = factory(self.swift)
+        self.assertEqual(0.0, rl.requests_per_device_per_second)
+        self.assertEqual(1.0, rl.requests_per_device_rate_buffer)
+
+        conf = {'requests_per_device_per_second': 1.3,
+                'requests_per_device_rate_buffer': 2.4}
+        factory = backend_ratelimit.filter_factory(conf)
+        rl = factory(self.swift)
+        self.assertEqual(1.3, rl.requests_per_device_per_second)
+        self.assertEqual(2.4, rl.requests_per_device_rate_buffer)
+
+        conf = {'requests_per_device_per_second': -1}
+        factory = backend_ratelimit.filter_factory(conf)
+        with self.assertRaises(ValueError) as cm:
+            factory(self.swift)
+        self.assertEqual(
+            'Value must be a non-negative float number, not "-1.0".',
+            str(cm.exception))
+
+        conf = {'requests_per_device_rate_buffer': -1}
+        factory = backend_ratelimit.filter_factory(conf)
+        with self.assertRaises(ValueError):
+            factory(self.swift)
+        self.assertEqual(
+            'Value must be a non-negative float number, not "-1.0".',
+            str(cm.exception))
+
+    def _do_test_ratelimit(self, method, req_per_sec, rate_buffer):
+        # send 20 requests, time increments by 0.01 between each request
+        start = time.time()
+        fake_time = [start]
+
+        def mock_time():
+            return fake_time[0]
+
+        app = FakeSwift()
+        logger = debug_logger()
+        # apply a ratelimit
+        conf = {'requests_per_device_per_second': req_per_sec,
+                'requests_per_device_rate_buffer': rate_buffer}
+        rl = BackendRateLimitMiddleware(app, conf, logger)
+        success = defaultdict(int)
+        ratelimited = 0
+
+        with mock.patch('swift.common.utils.time.time', mock_time):
+            for i in range(20):
+                for dev in ['sda1', 'sda2', 'sda3']:
+                    req = Request.blank('/%s/99/a/c/o' % dev,
+                                        environ={'REQUEST_METHOD': method})
+                    app.register(method, req.path, HTTPOk, {})
+                    resp = req.get_response(rl)
+                    if resp.status_int == 200:
+                        success[dev] += 1
+                    else:
+                        self.assertEqual(529, resp.status_int)
+                        self.assertTrue(resp.status.startswith(
+                            '529 Too Many Backend Requests'))
+                        ratelimited += 1
+                fake_time[0] += 0.01
+        self.assertEqual(
+            ratelimited,
+            logger.get_increment_counts().get('backend.ratelimit', 0))
+        return success
+
+    def test_ratelimited(self):
+        def do_test_ratelimit(method):
+            # no rate-limiting
+            success_per_dev = self._do_test_ratelimit(method, 0, 0)
+            self.assertEqual([20] * 3, list(success_per_dev.values()))
+
+            # rate-limited
+            success_per_dev = self._do_test_ratelimit(method, 1, 0)
+            self.assertEqual([1] * 3, list(success_per_dev.values()))
+
+            success_per_dev = self._do_test_ratelimit(method, 10, 0)
+            self.assertEqual([2] * 3, list(success_per_dev.values()))
+
+            success_per_dev = self._do_test_ratelimit(method, 101, 0)
+            self.assertEqual([20] * 3, list(success_per_dev.values()))
+
+            # startup burst of 1 seconds allowance plus current allowance...
+            success_per_dev = self._do_test_ratelimit(method, 1, 1)
+            self.assertEqual([2] * 3, list(success_per_dev.values()))
+            success_per_dev = self._do_test_ratelimit(method, 10, 1)
+            self.assertEqual([12] * 3, list(success_per_dev.values()))
+
+        do_test_ratelimit('GET')
+        do_test_ratelimit('HEAD')
+        do_test_ratelimit('PUT')
+        do_test_ratelimit('POST')
+        do_test_ratelimit('DELETE')
+        do_test_ratelimit('UPDATE')
+        do_test_ratelimit('REPLICATE')
+
+    def test_not_ratelimited(self):
+        def do_test_no_ratelimit(method):
+            # verify no rate-limiting
+            success_per_dev = self._do_test_ratelimit(method, 1, 0)
+            self.assertEqual([20] * 3, list(success_per_dev.values()))
+
+        do_test_no_ratelimit('OPTIONS')
+        do_test_no_ratelimit('SSYNC')
+
+    def test_unhandled_request(self):
+        app = FakeSwift()
+        logger = debug_logger()
+        conf = {'requests_per_device_per_second': 1,
+                'requests_per_device_rate_buffer': 1}
+
+        def do_test(path):
+            rl = BackendRateLimitMiddleware(app, conf, logger)
+            req = Request.blank(path)
+            app.register('GET', req.path, HTTPOk, {})
+            for i in range(10):
+                resp = req.get_response(rl)
+                self.assertEqual(200, resp.status_int)
+            self.assertEqual(
+                0, logger.get_increment_counts().get('backend.ratelimit', 0))
+
+        do_test('/recon/version')
+        do_test('/healthcheck')
+        do_test('/v1/a/c/o')
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index 40a34fe8d8..69f90cfa06 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -3105,12 +3105,26 @@ cluster_dfw1 = http://dfw1.host/v1/
         self.assertEqual(1, utils.non_negative_float(True))
         self.assertEqual(0, utils.non_negative_float(False))
 
-        with self.assertRaises(ValueError):
+        with self.assertRaises(ValueError) as cm:
             utils.non_negative_float(-1.1)
-        with self.assertRaises(ValueError):
+        self.assertEqual(
+            'Value must be a non-negative float number, not "-1.1".',
+            str(cm.exception))
+        with self.assertRaises(ValueError) as cm:
             utils.non_negative_float('-1.1')
-        with self.assertRaises(ValueError):
+        self.assertEqual(
+            'Value must be a non-negative float number, not "-1.1".',
+            str(cm.exception))
+        with self.assertRaises(ValueError) as cm:
             utils.non_negative_float('one')
+        self.assertEqual(
+            'Value must be a non-negative float number, not "one".',
+            str(cm.exception))
+        with self.assertRaises(ValueError) as cm:
+            utils.non_negative_float(None)
+        self.assertEqual(
+            'Value must be a non-negative float number, not "None".',
+            str(cm.exception))
 
     def test_non_negative_int(self):
         self.assertEqual(0, utils.non_negative_int('0'))