Added container listing ratelimiting

Change-Id: If4e9cfe4e4c743de1f39704acf849164cf3f0bd0
2013-08-14 12:40:25 +00:00
parent 8a8499805b
commit c8795e6e85
4 changed files with 217 additions and 103 deletions
--- a/doc/source/ratelimit.rst
+++ b/doc/source/ratelimit.rst
@@ -15,38 +15,49 @@ Configuration
 All configuration is optional.  If no account or container limits are provided
 there will be no rate limiting.  Configuration available:

-======================== =========  ===========================================
-Option                   Default     Description
------------------------ ---------  -------------------------------------------
-clock_accuracy           1000       Represents how accurate the proxy servers'
-                                    system clocks are with each other. 1000
-                                    means that all the proxies' clock are
-                                    accurate to each other within 1
-                                    millisecond.  No ratelimit should be
-                                    higher than the clock accuracy.
-max_sleep_time_seconds   60         App will immediately return a 498 response
-                                    if the necessary sleep time ever exceeds
-                                    the given max_sleep_time_seconds.
-log_sleep_time_seconds   0          To allow visibility into rate limiting set
-                                    this value > 0 and all sleeps greater than
-                                    the number will be logged.
-rate_buffer_seconds      5          Number of seconds the rate counter can
-                                    drop and be allowed to catch up (at a
-                                    faster than listed rate). A larger number
-                                    will result in larger spikes in rate but
-                                    better average accuracy.
-account_ratelimit        0          If set, will limit PUT and DELETE requests
-                                    to /account_name/container_name.
-                                    Number is in requests per second.
-account_whitelist        ''         Comma separated lists of account names that
-                                    will not be rate limited.
-account_blacklist        ''         Comma separated lists of account names that
-                                    will not be allowed. Returns a 497 response.
-container_ratelimit_size ''         When set with container_limit_x = r:
-                                    for containers of size x, limit requests
-                                    per second to r.  Will limit PUT, DELETE,
-                                    and POST requests to /a/c/o.
-======================== =========  ===========================================
+================================ ======= ======================================
+Option                           Default Description
+-------------------------------- ------- --------------------------------------
+clock_accuracy                   1000    Represents how accurate the proxy
+                                         servers' system clocks are with each
+                                         other. 1000 means that all the
+                                         proxies' clock are accurate to each
+                                         other within 1 millisecond. No
+                                         ratelimit should be higher than the
+                                         clock accuracy.
+max_sleep_time_seconds           60      App will immediately return a 498
+                                         response if the necessary sleep time
+                                         ever exceeds the given
+                                         max_sleep_time_seconds.
+log_sleep_time_seconds           0       To allow visibility into rate limiting
+                                         set this value > 0 and all sleeps
+                                         greater than the number will be
+                                         logged.
+rate_buffer_seconds              5       Number of seconds the rate counter can
+                                         drop and be allowed to catch up (at a
+                                         faster than listed rate). A larger
+                                         number will result in larger spikes in
+                                         rate but better average accuracy.
+account_ratelimit                0       If set, will limit PUT and DELETE
+                                         requests to
+                                         /account_name/container_name. Number
+                                         is in requests per second.
+account_whitelist                ''      Comma separated lists of account names
+                                         that will not be rate limited.
+account_blacklist                ''      Comma separated lists of account names
+                                         that will not be allowed. Returns a
+                                         497 response.
+container_ratelimit_size         ''      When set with container_ratelimit_x =
+                                         r: for containers of size x, limit
+                                         requests per second to r. Will limit
+                                         PUT, DELETE, and POST requests to
+                                         /a/c/o.
+container_listing_ratelimit_size ''      When set with
+                                         container_listing_ratelimit_x = r: for
+                                         containers of size x, limit listing
+                                         requests per second to r. Will limit
+                                         GET requests to /a/c.
+================================ ======= ======================================

 The container rate limits are linearly interpolated from the values given.  A
 sample container rate limiting could be:
--- a/etc/proxy-server.conf-sample
+++ b/etc/proxy-server.conf-sample
@@ -323,13 +323,19 @@ use = egg:swift#ratelimit
 # account_blacklist = c,d

 # with container_limit_x = r
-# for containers of size x limit requests per second to r.  The container
+# for containers of size x limit write requests per second to r.  The container
 # rate will be linearly interpolated from the values given. With the values
 # below, a container of size 5 will get a rate of 75.
 # container_ratelimit_0 = 100
 # container_ratelimit_10 = 50
 # container_ratelimit_50 = 20

+# Similarly to the above container-level write limits, the following will limit
+# container GET (listing) requests.
+# container_listing_ratelimit_0 = 100
+# container_listing_ratelimit_10 = 50
+# container_listing_ratelimit_50 = 20
+
 [filter:domain_remap]
 use = egg:swift#domain_remap
 # You can override the default log routing for this filter here:
--- a/swift/common/middleware/ratelimit.py
+++ b/swift/common/middleware/ratelimit.py
@@ -23,6 +23,51 @@ from swift.common.memcached import MemcacheConnectionError
 from swift.common.swob import Request, Response


+def interpret_conf_limits(conf, name_prefix):
+    conf_limits = []
+    for conf_key in conf.keys():
+        if conf_key.startswith(name_prefix):
+            cont_size = int(conf_key[len(name_prefix):])
+            rate = float(conf[conf_key])
+            conf_limits.append((cont_size, rate))
+
+    conf_limits.sort()
+    ratelimits = []
+    while conf_limits:
+        cur_size, cur_rate = conf_limits.pop(0)
+        if conf_limits:
+            next_size, next_rate = conf_limits[0]
+            slope = (float(next_rate) - float(cur_rate)) \
+                / (next_size - cur_size)
+
+            def new_scope(cur_size, slope, cur_rate):
+                # making new scope for variables
+                return lambda x: (x - cur_size) * slope + cur_rate
+            line_func = new_scope(cur_size, slope, cur_rate)
+        else:
+            line_func = lambda x: cur_rate
+
+        ratelimits.append((cur_size, cur_rate, line_func))
+
+    return ratelimits
+
+
+def get_maxrate(ratelimits, size):
+    """
+    Returns number of requests allowed per second for given size.
+    """
+    last_func = None
+    if size:
+        size = int(size)
+        for ratesize, rate, func in ratelimits:
+            if size < ratesize:
+                break
+            last_func = func
+        if last_func:
+            return last_func(size)
+    return None
+
+
 class MaxSleepTimeHitError(Exception):
    pass

@@ -57,45 +102,20 @@ class RateLimitMiddleware(object):
            [acc.strip() for acc in
                conf.get('account_blacklist', '').split(',') if acc.strip()]
        self.memcache_client = None
-        conf_limits = []
-        for conf_key in conf.keys():
-            if conf_key.startswith('container_ratelimit_'):
-                cont_size = int(conf_key[len('container_ratelimit_'):])
-                rate = float(conf[conf_key])
-                conf_limits.append((cont_size, rate))
+        self.container_ratelimits = interpret_conf_limits(
+            conf, 'container_ratelimit_')
+        self.container_listing_ratelimits = interpret_conf_limits(
+            conf, 'container_listing_ratelimit_')

-        conf_limits.sort()
-        self.container_ratelimits = []
-        while conf_limits:
-            cur_size, cur_rate = conf_limits.pop(0)
-            if conf_limits:
-                next_size, next_rate = conf_limits[0]
-                slope = (float(next_rate) - float(cur_rate)) \
-                    / (next_size - cur_size)
-
-                def new_scope(cur_size, slope, cur_rate):
-                    # making new scope for variables
-                    return lambda x: (x - cur_size) * slope + cur_rate
-                line_func = new_scope(cur_size, slope, cur_rate)
-            else:
-                line_func = lambda x: cur_rate
-
-            self.container_ratelimits.append((cur_size, cur_rate, line_func))
-
-    def get_container_maxrate(self, container_size):
-        """
-        Returns number of requests allowed per second for given container size.
-        """
-        last_func = None
-        if container_size:
-            container_size = int(container_size)
-            for size, rate, func in self.container_ratelimits:
-                if container_size < size:
-                    break
-                last_func = func
-            if last_func:
-                return last_func(container_size)
-        return None
+    def get_container_size(self, account_name, container_name):
+        rv = 0
+        memcache_key = get_container_memcache_key(account_name,
+                                                  container_name)
+        container_info = self.memcache_client.get(memcache_key)
+        if isinstance(container_info, dict):
+            rv = container_info.get(
+                'object_count', container_info.get('container_size', 0))
+        return rv

    def get_ratelimitable_key_tuples(self, req_method, account_name,
                                     container_name=None, obj_name=None):
@@ -118,18 +138,26 @@ class RateLimitMiddleware(object):

        if account_name and container_name and obj_name and \
                req_method in ('PUT', 'DELETE', 'POST'):
-            container_size = None
-            memcache_key = get_container_memcache_key(account_name,
-                                                      container_name)
-            container_info = self.memcache_client.get(memcache_key)
-            if isinstance(container_info, dict):
-                container_size = container_info.get(
-                    'object_count', container_info.get('container_size', 0))
-                container_rate = self.get_container_maxrate(container_size)
-                if container_rate:
-                    keys.append(("ratelimit/%s/%s" % (account_name,
-                                                      container_name),
-                                 container_rate))
+            container_size = self.get_container_size(
+                account_name, container_name)
+            container_rate = get_maxrate(
+                self.container_ratelimits, container_size)
+            if container_rate:
+                keys.append((
+                    "ratelimit/%s/%s" % (account_name, container_name),
+                    container_rate))
+
+        if account_name and container_name and not obj_name and \
+                req_method == 'GET':
+            container_size = self.get_container_size(
+                account_name, container_name)
+            container_rate = get_maxrate(
+                self.container_listing_ratelimits, container_size)
+            if container_rate:
+                keys.append((
+                    "ratelimit_listing/%s/%s" % (account_name, container_name),
+                    container_rate))
+
        return keys

    def _get_sleep_time(self, key, max_rate):
--- a/test/unit/common/middleware/test_ratelimit.py
+++ b/test/unit/common/middleware/test_ratelimit.py
@@ -161,23 +161,28 @@ class TestRateLimit(unittest.TestCase):
        for x in range(0, num):
            callable_func()
        end = time.time()
-        total_time = float(num) / rate - 1.0 / rate # 1st request isn't limited
+        total_time = float(num) / rate - 1.0 / rate  # 1st request not limited
        # Allow for one second of variation in the total time.
        time_diff = abs(total_time - (end - begin))
        if check_time:
            self.assertEquals(round(total_time, 1), round(time_ticker, 1))
        return time_diff

-    def test_get_container_maxrate(self):
+    def test_get_maxrate(self):
        conf_dict = {'container_ratelimit_10': 200,
                     'container_ratelimit_50': 100,
                     'container_ratelimit_75': 30}
        test_ratelimit = dummy_filter_factory(conf_dict)(FakeApp())
-        self.assertEquals(test_ratelimit.get_container_maxrate(0), None)
-        self.assertEquals(test_ratelimit.get_container_maxrate(5), None)
-        self.assertEquals(test_ratelimit.get_container_maxrate(10), 200)
-        self.assertEquals(test_ratelimit.get_container_maxrate(60), 72)
-        self.assertEquals(test_ratelimit.get_container_maxrate(160), 30)
+        self.assertEquals(ratelimit.get_maxrate(
+            test_ratelimit.container_ratelimits, 0), None)
+        self.assertEquals(ratelimit.get_maxrate(
+            test_ratelimit.container_ratelimits, 5), None)
+        self.assertEquals(ratelimit.get_maxrate(
+            test_ratelimit.container_ratelimits, 10), 200)
+        self.assertEquals(ratelimit.get_maxrate(
+            test_ratelimit.container_ratelimits, 60), 72)
+        self.assertEquals(ratelimit.get_maxrate(
+            test_ratelimit.container_ratelimits, 160), 30)

    def test_get_ratelimitable_key_tuples(self):
        current_rate = 13
@@ -190,15 +195,15 @@ class TestRateLimit(unittest.TestCase):
                                                logger=FakeLogger())
        the_app.memcache_client = fake_memcache
        self.assertEquals(len(the_app.get_ratelimitable_key_tuples(
-                    'DELETE', 'a', None, None)), 0)
+            'DELETE', 'a', None, None)), 0)
        self.assertEquals(len(the_app.get_ratelimitable_key_tuples(
-                    'PUT', 'a', 'c', None)), 1)
+            'PUT', 'a', 'c', None)), 1)
        self.assertEquals(len(the_app.get_ratelimitable_key_tuples(
-                    'DELETE', 'a', 'c', None)), 1)
+            'DELETE', 'a', 'c', None)), 1)
        self.assertEquals(len(the_app.get_ratelimitable_key_tuples(
-                    'GET', 'a', 'c', 'o')), 0)
+            'GET', 'a', 'c', 'o')), 0)
        self.assertEquals(len(the_app.get_ratelimitable_key_tuples(
-                    'PUT', 'a', 'c', 'o')), 1)
+            'PUT', 'a', 'c', 'o')), 1)

    def test_memcached_container_info_dict(self):
        mdict = headers_to_container_info({'x-container-object-count': '45'})
@@ -223,8 +228,8 @@ class TestRateLimit(unittest.TestCase):
        conf_dict = {'account_ratelimit': current_rate}
        self.test_ratelimit = ratelimit.filter_factory(conf_dict)(FakeApp())
        ratelimit.http_connect = mock_http_connect(204)
-        for meth, exp_time in [('DELETE', 9.8), ('GET', 0),
-                           ('POST', 0), ('PUT', 9.8)]:
+        for meth, exp_time in [
+                ('DELETE', 9.8), ('GET', 0), ('POST', 0), ('PUT', 9.8)]:
            req = Request.blank('/v/a%s/c' % meth)
            req.method = meth
            req.environ['swift.cache'] = FakeMemcache()
@@ -281,8 +286,8 @@ class TestRateLimit(unittest.TestCase):
            threads.append(rc)
        for thread in threads:
            thread.join()
-        the_498s = [t for t in threads if \
-                        ''.join(t.result).startswith('Slow down')]
+        the_498s = [
+            t for t in threads if ''.join(t.result).startswith('Slow down')]
        self.assertEquals(len(the_498s), 0)
        self.assertEquals(time_ticker, 0)

@@ -316,8 +321,8 @@ class TestRateLimit(unittest.TestCase):
            threads.append(rc)
        for thread in threads:
            thread.join()
-        the_497s = [t for t in threads if \
-                        ''.join(t.result).startswith('Your account')]
+        the_497s = [
+            t for t in threads if ''.join(t.result).startswith('Your account')]
        self.assertEquals(len(the_497s), 5)
        self.assertEquals(time_ticker, 0)

@@ -350,6 +355,70 @@ class TestRateLimit(unittest.TestCase):
        r = self.test_ratelimit(req.environ, start_response)
        self.assertEquals(r[0], '204 No Content')

+    def test_ratelimit_max_rate_double_container(self):
+        global time_ticker
+        global time_override
+        current_rate = 2
+        conf_dict = {'container_ratelimit_0': current_rate,
+                     'clock_accuracy': 100,
+                     'max_sleep_time_seconds': 1}
+        self.test_ratelimit = dummy_filter_factory(conf_dict)(FakeApp())
+        ratelimit.http_connect = mock_http_connect(204)
+        self.test_ratelimit.log_sleep_time_seconds = .00001
+        req = Request.blank('/v/a/c/o')
+        req.method = 'PUT'
+        req.environ['swift.cache'] = FakeMemcache()
+        req.environ['swift.cache'].set(
+            ratelimit.get_container_memcache_key('a', 'c'),
+            {'container_size': 1})
+
+        time_override = [0, 0, 0, 0, None]
+        # simulates 4 requests coming in at same time, then sleeping
+        r = self.test_ratelimit(req.environ, start_response)
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        self.assertEquals(r[0], 'Slow down')
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        self.assertEquals(r[0], 'Slow down')
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        self.assertEquals(r[0], '204 No Content')
+
+    def test_ratelimit_max_rate_double_container_listing(self):
+        global time_ticker
+        global time_override
+        current_rate = 2
+        conf_dict = {'container_listing_ratelimit_0': current_rate,
+                     'clock_accuracy': 100,
+                     'max_sleep_time_seconds': 1}
+        self.test_ratelimit = dummy_filter_factory(conf_dict)(FakeApp())
+        ratelimit.http_connect = mock_http_connect(204)
+        self.test_ratelimit.log_sleep_time_seconds = .00001
+        req = Request.blank('/v/a/c')
+        req.method = 'GET'
+        req.environ['swift.cache'] = FakeMemcache()
+        req.environ['swift.cache'].set(
+            ratelimit.get_container_memcache_key('a', 'c'),
+            {'container_size': 1})
+
+        time_override = [0, 0, 0, 0, None]
+        # simulates 4 requests coming in at same time, then sleeping
+        r = self.test_ratelimit(req.environ, start_response)
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        self.assertEquals(r[0], 'Slow down')
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        self.assertEquals(r[0], 'Slow down')
+        mock_sleep(.1)
+        r = self.test_ratelimit(req.environ, start_response)
+        self.assertEquals(r[0], '204 No Content')
+
    def test_ratelimit_max_rate_multiple_acc(self):
        num_calls = 4
        current_rate = 2
@@ -420,7 +489,7 @@ class TestRateLimit(unittest.TestCase):
        begin = time.time()
        self._run(make_app_call, num_calls, current_rate, check_time=False)
        time_took = time.time() - begin
-        self.assertEquals(round(time_took, 1), 0) # no memcache, no limiting
+        self.assertEquals(round(time_took, 1), 0)  # no memcache, no limiting

    def test_restarting_memcache(self):
        current_rate = 2
@@ -437,7 +506,7 @@ class TestRateLimit(unittest.TestCase):
        begin = time.time()
        self._run(make_app_call, num_calls, current_rate, check_time=False)
        time_took = time.time() - begin
-        self.assertEquals(round(time_took, 1), 0) # no memcache, no limiting
+        self.assertEquals(round(time_took, 1), 0)  # no memcache, no limiting

 if __name__ == '__main__':
    unittest.main()