Merge "Finer grained ratelimit for update"

This commit is contained in:
Zuul 2022-01-12 22:42:25 +00:00 committed by Gerrit Code Review
commit 32da73f5c9
3 changed files with 398 additions and 63 deletions

View File

@ -473,6 +473,16 @@ use = egg:swift#recon
# Send at most this many object updates per second # Send at most this many object updates per second
# objects_per_second = 50 # objects_per_second = 50
# #
# Send at most this many object updates per bucket per second. The value must
# be a float greater than or equal to 0. Set to 0 for unlimited.
# max_objects_per_container_per_second = 0
#
# The per_container ratelimit implementation uses a hashring to constrain
# memory requirements. Orders of magnitude more buckets will use (nominally)
# more memory, but will ratelimit smaller groups of containers. The value must
# be an integer greater than 0.
# per_container_ratelimit_buckets = 1000
#
# slowdown will sleep that amount between objects. Deprecated; use # slowdown will sleep that amount between objects. Deprecated; use
# objects_per_second instead. # objects_per_second instead.
# slowdown = 0.01 # slowdown = 0.01

View File

@ -19,6 +19,7 @@ import os
import signal import signal
import sys import sys
import time import time
import uuid
from random import random, shuffle from random import random, shuffle
from eventlet import spawn, Timeout from eventlet import spawn, Timeout
@ -29,7 +30,8 @@ from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.utils import get_logger, renamer, write_pickle, \ from swift.common.utils import get_logger, renamer, write_pickle, \
dump_recon_cache, config_true_value, RateLimitedIterator, split_path, \ dump_recon_cache, config_true_value, RateLimitedIterator, split_path, \
eventlet_monkey_patch, get_redirect_data, ContextPool eventlet_monkey_patch, get_redirect_data, ContextPool, hash_path, \
non_negative_float, config_positive_int_value
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common.header_key_dict import HeaderKeyDict from swift.common.header_key_dict import HeaderKeyDict
from swift.common.storage_policy import split_policy_string, PolicyError from swift.common.storage_policy import split_policy_string, PolicyError
@ -39,18 +41,68 @@ from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR, \
HTTP_MOVED_PERMANENTLY HTTP_MOVED_PERMANENTLY
class BucketizedUpdateSkippingLimiter(object):
"""
Wrap an iterator to filter elements that show up too often.
:param update_iterable: an async_pending update iterable
:param num_buckets: number of buckets to divide container hashes into, the
more buckets total the less containers to a bucket
(once a busy container slows down a bucket the whole
bucket starts skipping)
:param max_elements_per_group_per_second: tunable, when skipping kicks in
:param skip_f: function to call with update_ctx when skipping it
"""
def __init__(self, update_iterable, num_buckets,
max_elements_per_group_per_second,
skip_f=lambda update_ctx: None):
self.iterator = iter(update_iterable)
# if we want a smaller "blast radius" we could make this number bigger
self.num_buckets = max(num_buckets, 1)
# an array might be more efficient; but this is pretty cheap
self.next_update = [0.0 for _ in range(self.num_buckets)]
try:
self.bucket_update_delta = 1.0 / max_elements_per_group_per_second
except ZeroDivisionError:
self.bucket_update_delta = -1
self.skip_f = skip_f
self.salt = str(uuid.uuid4())
def __iter__(self):
return self
def _bucket_key(self, update):
acct, cont = split_update_path(update)
return int(hash_path(acct, cont, self.salt), 16) % self.num_buckets
def next(self):
for update_ctx in self.iterator:
bucket_key = self._bucket_key(update_ctx['update'])
now = time.time()
if self.next_update[bucket_key] > now:
self.skip_f(update_ctx)
continue
self.next_update[bucket_key] = now + self.bucket_update_delta
return update_ctx
raise StopIteration()
__next__ = next
class SweepStats(object): class SweepStats(object):
""" """
Stats bucket for an update sweep Stats bucket for an update sweep
""" """
def __init__(self, errors=0, failures=0, quarantines=0, successes=0, def __init__(self, errors=0, failures=0, quarantines=0, successes=0,
unlinks=0, redirects=0): unlinks=0, redirects=0, skips=0):
self.errors = errors self.errors = errors
self.failures = failures self.failures = failures
self.quarantines = quarantines self.quarantines = quarantines
self.successes = successes self.successes = successes
self.unlinks = unlinks self.unlinks = unlinks
self.redirects = redirects self.redirects = redirects
self.skips = skips
def copy(self): def copy(self):
return type(self)(self.errors, self.failures, self.quarantines, return type(self)(self.errors, self.failures, self.quarantines,
@ -62,7 +114,8 @@ class SweepStats(object):
self.quarantines - other.quarantines, self.quarantines - other.quarantines,
self.successes - other.successes, self.successes - other.successes,
self.unlinks - other.unlinks, self.unlinks - other.unlinks,
self.redirects - other.redirects) self.redirects - other.redirects,
self.skips - other.skips)
def reset(self): def reset(self):
self.errors = 0 self.errors = 0
@ -71,6 +124,7 @@ class SweepStats(object):
self.successes = 0 self.successes = 0
self.unlinks = 0 self.unlinks = 0
self.redirects = 0 self.redirects = 0
self.skips = 0
def __str__(self): def __str__(self):
keys = ( keys = (
@ -80,10 +134,26 @@ class SweepStats(object):
(self.unlinks, 'unlinks'), (self.unlinks, 'unlinks'),
(self.errors, 'errors'), (self.errors, 'errors'),
(self.redirects, 'redirects'), (self.redirects, 'redirects'),
(self.skips, 'skips'),
) )
return ', '.join('%d %s' % pair for pair in keys) return ', '.join('%d %s' % pair for pair in keys)
def split_update_path(update):
"""
Split the account and container parts out of the async update data.
N.B. updates to shards set the container_path key while the account and
container keys are always the root.
"""
container_path = update.get('container_path')
if container_path:
acct, cont = split_path('/' + container_path, minsegs=2)
else:
acct, cont = update['account'], update['container']
return acct, cont
class ObjectUpdater(Daemon): class ObjectUpdater(Daemon):
"""Update object information in container listings.""" """Update object information in container listings."""
@ -110,6 +180,10 @@ class ObjectUpdater(Daemon):
self.max_objects_per_second = \ self.max_objects_per_second = \
float(conf.get('objects_per_second', float(conf.get('objects_per_second',
objects_per_second)) objects_per_second))
self.max_objects_per_container_per_second = non_negative_float(
conf.get('max_objects_per_container_per_second', 0))
self.per_container_ratelimit_buckets = config_positive_int_value(
conf.get('per_container_ratelimit_buckets', 1000))
self.node_timeout = float(conf.get('node_timeout', 10)) self.node_timeout = float(conf.get('node_timeout', 10))
self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.conn_timeout = float(conf.get('conn_timeout', 0.5))
self.report_interval = float(conf.get('report_interval', 300)) self.report_interval = float(conf.get('report_interval', 300))
@ -205,13 +279,40 @@ class ObjectUpdater(Daemon):
dump_recon_cache({'object_updater_sweep': elapsed}, dump_recon_cache({'object_updater_sweep': elapsed},
self.rcache, self.logger) self.rcache, self.logger)
def _load_update(self, device, update_path):
try:
return pickle.load(open(update_path, 'rb'))
except Exception as e:
if getattr(e, 'errno', None) == errno.ENOENT:
return
self.logger.exception(
'ERROR Pickle problem, quarantining %s', update_path)
self.stats.quarantines += 1
self.logger.increment('quarantines')
target_path = os.path.join(device, 'quarantined', 'objects',
os.path.basename(update_path))
renamer(update_path, target_path, fsync=False)
try:
# If this was the last async_pending in the directory,
# then this will succeed. Otherwise, it'll fail, and
# that's okay.
os.rmdir(os.path.dirname(update_path))
except OSError:
pass
return
def _iter_async_pendings(self, device): def _iter_async_pendings(self, device):
""" """
Locate and yield all the async pendings on the device. Multiple updates Locate and yield an update context for all the async pending files on
for the same object will come out in reverse-chronological order the device. Each update context contains details of the async pending
(i.e. newest first) so that callers can skip stale async_pendings. file location, its timestamp and the un-pickled update data.
Tries to clean up empty directories as it goes. Async pending files that fail to load will be quarantined.
Only the most recent update for the same object is yielded; older
(stale) async pending files are unlinked as they are located.
The iterator tries to clean up empty directories as it goes.
""" """
# loop through async pending dirs for all policies # loop through async pending dirs for all policies
for asyncdir in self._listdir(device): for asyncdir in self._listdir(device):
@ -238,12 +339,13 @@ class ObjectUpdater(Daemon):
if not os.path.isdir(prefix_path): if not os.path.isdir(prefix_path):
continue continue
last_obj_hash = None last_obj_hash = None
for update in sorted(self._listdir(prefix_path), reverse=True): for update_file in sorted(self._listdir(prefix_path),
update_path = os.path.join(prefix_path, update) reverse=True):
update_path = os.path.join(prefix_path, update_file)
if not os.path.isfile(update_path): if not os.path.isfile(update_path):
continue continue
try: try:
obj_hash, timestamp = update.split('-') obj_hash, timestamp = update_file.split('-')
except ValueError: except ValueError:
self.stats.errors += 1 self.stats.errors += 1
self.logger.increment('errors') self.logger.increment('errors')
@ -280,9 +382,14 @@ class ObjectUpdater(Daemon):
raise raise
else: else:
last_obj_hash = obj_hash last_obj_hash = obj_hash
yield {'device': device, 'policy': policy, update = self._load_update(device, update_path)
'path': update_path, if update is not None:
'obj_hash': obj_hash, 'timestamp': timestamp} yield {'device': device,
'policy': policy,
'update_path': update_path,
'obj_hash': obj_hash,
'timestamp': timestamp,
'update': update}
def object_sweep(self, device): def object_sweep(self, device):
""" """
@ -297,13 +404,21 @@ class ObjectUpdater(Daemon):
self.logger.info("Object update sweep starting on %s (pid: %d)", self.logger.info("Object update sweep starting on %s (pid: %d)",
device, my_pid) device, my_pid)
def skip_counting_f(update_ctx):
# in the future we could defer update_ctx
self.stats.skips += 1
self.logger.increment("skips")
ap_iter = RateLimitedIterator( ap_iter = RateLimitedIterator(
self._iter_async_pendings(device), self._iter_async_pendings(device),
elements_per_second=self.max_objects_per_second) elements_per_second=self.max_objects_per_second)
ap_iter = BucketizedUpdateSkippingLimiter(
ap_iter, self.per_container_ratelimit_buckets,
self.max_objects_per_container_per_second,
skip_f=skip_counting_f)
with ContextPool(self.concurrency) as pool: with ContextPool(self.concurrency) as pool:
for update in ap_iter: for update_ctx in ap_iter:
pool.spawn(self.process_object_update, pool.spawn(self.process_object_update, **update_ctx)
update['path'], update['device'], update['policy'])
now = time.time() now = time.time()
if now - last_status_update >= self.report_interval: if now - last_status_update >= self.report_interval:
this_sweep = self.stats.since(start_stats) this_sweep = self.stats.since(start_stats)
@ -326,6 +441,7 @@ class ObjectUpdater(Daemon):
'%(quarantines)d quarantines, ' '%(quarantines)d quarantines, '
'%(unlinks)d unlinks, %(errors)d errors, ' '%(unlinks)d unlinks, %(errors)d errors, '
'%(redirects)d redirects ' '%(redirects)d redirects '
'%(skips)d skips '
'(pid: %(pid)d)'), '(pid: %(pid)d)'),
{'device': device, {'device': device,
'elapsed': time.time() - start_time, 'elapsed': time.time() - start_time,
@ -335,36 +451,20 @@ class ObjectUpdater(Daemon):
'quarantines': sweep_totals.quarantines, 'quarantines': sweep_totals.quarantines,
'unlinks': sweep_totals.unlinks, 'unlinks': sweep_totals.unlinks,
'errors': sweep_totals.errors, 'errors': sweep_totals.errors,
'redirects': sweep_totals.redirects}) 'redirects': sweep_totals.redirects,
'skips': sweep_totals.skips})
def process_object_update(self, update_path, device, policy): def process_object_update(self, update_path, device, policy, update,
**kwargs):
""" """
Process the object information to be updated and update. Process the object information to be updated and update.
:param update_path: path to pickled object update file :param update_path: path to pickled object update file
:param device: path to device :param device: path to device
:param policy: storage policy of object update :param policy: storage policy of object update
:param update: the un-pickled update data
:param kwargs: un-used keys from update_ctx
""" """
try:
update = pickle.load(open(update_path, 'rb'))
except Exception as e:
if getattr(e, 'errno', None) == errno.ENOENT:
return
self.logger.exception(
'ERROR Pickle problem, quarantining %s', update_path)
self.stats.quarantines += 1
self.logger.increment('quarantines')
target_path = os.path.join(device, 'quarantined', 'objects',
os.path.basename(update_path))
renamer(update_path, target_path, fsync=False)
try:
# If this was the last async_pending in the directory,
# then this will succeed. Otherwise, it'll fail, and
# that's okay.
os.rmdir(os.path.dirname(update_path))
except OSError:
pass
return
def do_update(): def do_update():
successes = update.get('successes', []) successes = update.get('successes', [])
@ -374,11 +474,7 @@ class ObjectUpdater(Daemon):
str(int(policy))) str(int(policy)))
headers_out.setdefault('X-Backend-Accept-Redirect', 'true') headers_out.setdefault('X-Backend-Accept-Redirect', 'true')
headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true') headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true')
container_path = update.get('container_path') acct, cont = split_update_path(update)
if container_path:
acct, cont = split_path('/' + container_path, minsegs=2)
else:
acct, cont = update['account'], update['container']
part, nodes = self.get_container_ring().get_nodes(acct, cont) part, nodes = self.get_container_ring().get_nodes(acct, cont)
obj = '/%s/%s/%s' % (acct, cont, update['obj']) obj = '/%s/%s/%s' % (acct, cont, update['obj'])
events = [spawn(self.object_update, events = [spawn(self.object_update,

View File

@ -12,13 +12,14 @@
# implied. # implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import eventlet
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
import mock import mock
import os import os
import unittest import unittest
import random import random
import itertools import itertools
from collections import Counter
from contextlib import closing from contextlib import closing
from gzip import GzipFile from gzip import GzipFile
from tempfile import mkdtemp from tempfile import mkdtemp
@ -39,8 +40,7 @@ from swift.common.ring import RingData
from swift.common import utils from swift.common import utils
from swift.common.header_key_dict import HeaderKeyDict from swift.common.header_key_dict import HeaderKeyDict
from swift.common.swob import bytes_to_wsgi from swift.common.swob import bytes_to_wsgi
from swift.common.utils import ( from swift.common.utils import hash_path, normalize_timestamp, mkdirs
hash_path, normalize_timestamp, mkdirs, write_pickle)
from swift.common.storage_policy import StoragePolicy, POLICIES from swift.common.storage_policy import StoragePolicy, POLICIES
@ -135,6 +135,8 @@ class TestObjectUpdater(unittest.TestCase):
self.assertEqual(daemon.concurrency, 8) self.assertEqual(daemon.concurrency, 8)
self.assertEqual(daemon.updater_workers, 1) self.assertEqual(daemon.updater_workers, 1)
self.assertEqual(daemon.max_objects_per_second, 50.0) self.assertEqual(daemon.max_objects_per_second, 50.0)
self.assertEqual(daemon.max_objects_per_container_per_second, 0.0)
self.assertEqual(daemon.per_container_ratelimit_buckets, 1000)
# non-defaults # non-defaults
conf = { conf = {
@ -145,6 +147,8 @@ class TestObjectUpdater(unittest.TestCase):
'concurrency': '2', 'concurrency': '2',
'updater_workers': '3', 'updater_workers': '3',
'objects_per_second': '10.5', 'objects_per_second': '10.5',
'max_objects_per_container_per_second': '1.2',
'per_container_ratelimit_buckets': '100',
} }
daemon = object_updater.ObjectUpdater(conf, logger=self.logger) daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
self.assertEqual(daemon.devices, '/some/where/else') self.assertEqual(daemon.devices, '/some/where/else')
@ -154,6 +158,8 @@ class TestObjectUpdater(unittest.TestCase):
self.assertEqual(daemon.concurrency, 2) self.assertEqual(daemon.concurrency, 2)
self.assertEqual(daemon.updater_workers, 3) self.assertEqual(daemon.updater_workers, 3)
self.assertEqual(daemon.max_objects_per_second, 10.5) self.assertEqual(daemon.max_objects_per_second, 10.5)
self.assertEqual(daemon.max_objects_per_container_per_second, 1.2)
self.assertEqual(daemon.per_container_ratelimit_buckets, 100)
# check deprecated option # check deprecated option
daemon = object_updater.ObjectUpdater({'slowdown': '0.04'}, daemon = object_updater.ObjectUpdater({'slowdown': '0.04'},
@ -169,6 +175,12 @@ class TestObjectUpdater(unittest.TestCase):
check_bad({'concurrency': '1.0'}) check_bad({'concurrency': '1.0'})
check_bad({'slowdown': 'baz'}) check_bad({'slowdown': 'baz'})
check_bad({'objects_per_second': 'quux'}) check_bad({'objects_per_second': 'quux'})
check_bad({'max_objects_per_container_per_second': '-0.1'})
check_bad({'max_objects_per_container_per_second': 'auto'})
check_bad({'per_container_ratelimit_buckets': '1.2'})
check_bad({'per_container_ratelimit_buckets': '0'})
check_bad({'per_container_ratelimit_buckets': '-1'})
check_bad({'per_container_ratelimit_buckets': 'auto'})
@mock.patch('os.listdir') @mock.patch('os.listdir')
def test_listdir_with_exception(self, mock_listdir): def test_listdir_with_exception(self, mock_listdir):
@ -201,11 +213,12 @@ class TestObjectUpdater(unittest.TestCase):
self.assertEqual(len(log_lines), 0) self.assertEqual(len(log_lines), 0)
self.assertEqual(path, ['foo', 'bar']) self.assertEqual(path, ['foo', 'bar'])
def test_object_sweep(self): @mock.patch('swift.obj.updater.dump_recon_cache')
def check_with_idx(index, warn, should_skip): def test_object_sweep(self, mock_recon):
if int(index) > 0: def check_with_idx(policy_index, warn, should_skip):
if int(policy_index) > 0:
asyncdir = os.path.join(self.sda1, asyncdir = os.path.join(self.sda1,
ASYNCDIR_BASE + "-" + index) ASYNCDIR_BASE + "-" + policy_index)
else: else:
asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE) asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE)
@ -220,7 +233,8 @@ class TestObjectUpdater(unittest.TestCase):
os.path.join(self.sda1, os.path.join(self.sda1,
ASYNCDIR_BASE + '-' + 'twentington'), ASYNCDIR_BASE + '-' + 'twentington'),
os.path.join(self.sda1, os.path.join(self.sda1,
ASYNCDIR_BASE + '-' + str(int(index) + 100))) ASYNCDIR_BASE + '-' + str(
int(policy_index) + 100)))
for not_dir in not_dirs: for not_dir in not_dirs:
with open(not_dir, 'w'): with open(not_dir, 'w'):
@ -239,13 +253,13 @@ class TestObjectUpdater(unittest.TestCase):
o_path = os.path.join(prefix_dir, ohash + '-' + o_path = os.path.join(prefix_dir, ohash + '-' +
normalize_timestamp(t)) normalize_timestamp(t))
if t == timestamps[0]: if t == timestamps[0]:
expected.add((o_path, int(index))) expected.add((o_path, int(policy_index)))
write_pickle({}, o_path) self._write_dummy_pickle(o_path, 'account', 'container', o)
seen = set() seen = set()
class MockObjectUpdater(object_updater.ObjectUpdater): class MockObjectUpdater(object_updater.ObjectUpdater):
def process_object_update(self, update_path, device, policy): def process_object_update(self, update_path, policy, **kwargs):
seen.add((update_path, int(policy))) seen.add((update_path, int(policy)))
os.unlink(update_path) os.unlink(update_path)
@ -290,10 +304,10 @@ class TestObjectUpdater(unittest.TestCase):
ohash = hash_path('account', 'container', o) ohash = hash_path('account', 'container', o)
o_path = os.path.join(prefix_dir, ohash + '-' + o_path = os.path.join(prefix_dir, ohash + '-' +
normalize_timestamp(t)) normalize_timestamp(t))
write_pickle({}, o_path) self._write_dummy_pickle(o_path, 'account', 'container', o)
class MockObjectUpdater(object_updater.ObjectUpdater): class MockObjectUpdater(object_updater.ObjectUpdater):
def process_object_update(self, update_path, device, policy): def process_object_update(self, update_path, **kwargs):
os.unlink(update_path) os.unlink(update_path)
self.stats.successes += 1 self.stats.successes += 1
self.stats.unlinks += 1 self.stats.unlinks += 1
@ -312,12 +326,13 @@ class TestObjectUpdater(unittest.TestCase):
def mock_time_function(): def mock_time_function():
rv = now[0] rv = now[0]
now[0] += 5 now[0] += 4
return rv return rv
# With 10s between updates, time() advancing 5s every time we look, # With 10s between updates, time() advancing 4s every time we look,
# and 5 async_pendings on disk, we should get at least two progress # and 5 async_pendings on disk, we should get at least two progress
# lines. # lines. (time is incremented by 4 each time the update app iter yields
# and each time the elapsed time is sampled)
with mock.patch('swift.obj.updater.time', with mock.patch('swift.obj.updater.time',
mock.MagicMock(time=mock_time_function)), \ mock.MagicMock(time=mock_time_function)), \
mock.patch.object(object_updater, 'ContextPool', MockPool): mock.patch.object(object_updater, 'ContextPool', MockPool):
@ -360,10 +375,10 @@ class TestObjectUpdater(unittest.TestCase):
ohash = hash_path('account', 'container%d' % policy.idx, o) ohash = hash_path('account', 'container%d' % policy.idx, o)
o_path = os.path.join(prefix_dir, ohash + '-' + o_path = os.path.join(prefix_dir, ohash + '-' +
normalize_timestamp(t)) normalize_timestamp(t))
write_pickle({}, o_path) self._write_dummy_pickle(o_path, 'account', 'container', o)
class MockObjectUpdater(object_updater.ObjectUpdater): class MockObjectUpdater(object_updater.ObjectUpdater):
def process_object_update(self, update_path, device, policy): def process_object_update(self, update_path, **kwargs):
os.unlink(update_path) os.unlink(update_path)
self.stats.successes += 1 self.stats.successes += 1
self.stats.unlinks += 1 self.stats.unlinks += 1
@ -1196,7 +1211,7 @@ class TestObjectUpdater(unittest.TestCase):
def test_obj_update_gone_missing(self): def test_obj_update_gone_missing(self):
# if you've got multiple updaters running (say, both a background # if you've got multiple updaters running (say, both a background
# and foreground process), process_object_update may get a file # and foreground process), _load_update may get a file
# that doesn't exist # that doesn't exist
policies = list(POLICIES) policies = list(POLICIES)
random.shuffle(policies) random.shuffle(policies)
@ -1218,13 +1233,227 @@ class TestObjectUpdater(unittest.TestCase):
odir, odir,
'%s-%s' % (ohash, next(self.ts_iter).internal)) '%s-%s' % (ohash, next(self.ts_iter).internal))
self.assertEqual(os.listdir(async_dir), [ohash[-3:]])
self.assertFalse(os.listdir(odir))
with mocked_http_conn(): with mocked_http_conn():
with mock.patch('swift.obj.updater.dump_recon_cache'): with mock.patch('swift.obj.updater.dump_recon_cache'):
daemon.process_object_update(op_path, self.sda1, policies[0]) daemon._load_update(self.sda1, op_path)
self.assertEqual({}, daemon.logger.get_increment_counts()) self.assertEqual({}, daemon.logger.get_increment_counts())
self.assertEqual(os.listdir(async_dir), [ohash[-3:]]) self.assertEqual(os.listdir(async_dir), [ohash[-3:]])
self.assertFalse(os.listdir(odir)) self.assertFalse(os.listdir(odir))
def _write_dummy_pickle(self, path, a, c, o, cp=None):
update = {
'op': 'PUT',
'account': a,
'container': c,
'obj': o,
'headers': {'X-Container-Timestamp': normalize_timestamp(0)}
}
if cp:
update['container_path'] = cp
with open(path, 'wb') as async_pending:
pickle.dump(update, async_pending)
def _make_async_pending_pickle(self, a, c, o, cp=None):
ohash = hash_path(a, c, o)
odir = os.path.join(self.async_dir, ohash[-3:])
mkdirs(odir)
path = os.path.join(
odir,
'%s-%s' % (ohash, normalize_timestamp(time())))
self._write_dummy_pickle(path, a, c, o, cp)
def _find_async_pending_files(self):
found_files = []
for root, dirs, files in os.walk(self.async_dir):
found_files.extend(files)
return found_files
@mock.patch('swift.obj.updater.dump_recon_cache')
def test_per_container_rate_limit(self, mock_recon):
conf = {
'devices': self.devices_dir,
'mount_check': 'false',
'swift_dir': self.testdir,
'max_objects_per_container_per_second': 1,
}
daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
self.async_dir = os.path.join(self.sda1, get_async_dir(POLICIES[0]))
os.mkdir(self.async_dir)
num_c1_files = 10
for i in range(num_c1_files):
obj_name = 'o%02d' % i
self._make_async_pending_pickle('a', 'c1', obj_name)
c1_part, _ = daemon.get_container_ring().get_nodes('a', 'c1')
# make one more in a different container, with a container_path
self._make_async_pending_pickle('a', 'c2', obj_name,
cp='.shards_a/c2_shard')
c2_part, _ = daemon.get_container_ring().get_nodes('.shards_a',
'c2_shard')
expected_total = num_c1_files + 1
self.assertEqual(expected_total,
len(self._find_async_pending_files()))
expected_success = 2
fake_status_codes = [200] * 3 * expected_success
with mocked_http_conn(*fake_status_codes) as fake_conn:
daemon.run_once()
self.assertEqual(expected_success, daemon.stats.successes)
expected_skipped = expected_total - expected_success
self.assertEqual(expected_skipped, daemon.stats.skips)
self.assertEqual(expected_skipped,
len(self._find_async_pending_files()))
self.assertEqual(
Counter(
'/'.join(req['path'].split('/')[:5])
for req in fake_conn.requests),
{'/sda1/%s/a/c1' % c1_part: 3,
'/sda1/%s/.shards_a/c2_shard' % c2_part: 3})
@mock.patch('swift.obj.updater.dump_recon_cache')
def test_per_container_rate_limit_unlimited(self, mock_recon):
conf = {
'devices': self.devices_dir,
'mount_check': 'false',
'swift_dir': self.testdir,
'max_objects_per_container_per_second': 0,
}
daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
self.async_dir = os.path.join(self.sda1, get_async_dir(POLICIES[0]))
os.mkdir(self.async_dir)
num_c1_files = 10
for i in range(num_c1_files):
obj_name = 'o%02d' % i
self._make_async_pending_pickle('a', 'c1', obj_name)
c1_part, _ = daemon.get_container_ring().get_nodes('a', 'c1')
# make one more in a different container, with a container_path
self._make_async_pending_pickle('a', 'c2', obj_name,
cp='.shards_a/c2_shard')
c2_part, _ = daemon.get_container_ring().get_nodes('.shards_a',
'c2_shard')
expected_total = num_c1_files + 1
self.assertEqual(expected_total,
len(self._find_async_pending_files()))
fake_status_codes = [200] * 3 * expected_total
with mocked_http_conn(*fake_status_codes):
daemon.run_once()
self.assertEqual(expected_total, daemon.stats.successes)
self.assertEqual(0, daemon.stats.skips)
self.assertEqual([], self._find_async_pending_files())
@mock.patch('swift.obj.updater.dump_recon_cache')
def test_per_container_rate_limit_slow_responses(self, mock_recon):
conf = {
'devices': self.devices_dir,
'mount_check': 'false',
'swift_dir': self.testdir,
'max_objects_per_container_per_second': 10,
}
daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
self.async_dir = os.path.join(self.sda1, get_async_dir(POLICIES[0]))
os.mkdir(self.async_dir)
# all updates for same container
num_c1_files = 4
for i in range(num_c1_files):
obj_name = 'o%02d' % i
self._make_async_pending_pickle('a', 'c1', obj_name)
expected_total = num_c1_files
self.assertEqual(expected_total,
len(self._find_async_pending_files()))
latencies = [.11, 0, .11, 0]
expected_success = 2
fake_status_codes = [200] * 3 * expected_success
def fake_spawn(pool, *args, **kwargs):
# make each update delay the iter being called again
eventlet.sleep(latencies.pop(0))
return args[0](*args[1:], **kwargs)
with mocked_http_conn(*fake_status_codes):
with mock.patch('swift.obj.updater.ContextPool.spawn', fake_spawn):
daemon.run_once()
self.assertEqual(expected_success, daemon.stats.successes)
expected_skipped = expected_total - expected_success
self.assertEqual(expected_skipped, daemon.stats.skips)
self.assertEqual(expected_skipped,
len(self._find_async_pending_files()))
class TestObjectUpdaterFunctions(unittest.TestCase):
def test_split_update_path(self):
update = {
'op': 'PUT',
'account': 'a',
'container': 'c',
'obj': 'o',
'headers': {
'X-Container-Timestamp': normalize_timestamp(0),
}
}
actual = object_updater.split_update_path(update)
self.assertEqual(('a', 'c'), actual)
update['container_path'] = None
actual = object_updater.split_update_path(update)
self.assertEqual(('a', 'c'), actual)
update['container_path'] = '.shards_a/c_shard_n'
actual = object_updater.split_update_path(update)
self.assertEqual(('.shards_a', 'c_shard_n'), actual)
class TestBucketizedUpdateSkippingLimiter(unittest.TestCase):
def test_init(self):
it = object_updater.BucketizedUpdateSkippingLimiter([3, 1], 1000, 10)
self.assertEqual(1000, it.num_buckets)
self.assertEqual(0.1, it.bucket_update_delta)
self.assertEqual([3, 1], [x for x in it.iterator])
# rate of 0 implies unlimited
it = object_updater.BucketizedUpdateSkippingLimiter(iter([3, 1]), 9, 0)
self.assertEqual(9, it.num_buckets)
self.assertEqual(-1, it.bucket_update_delta)
self.assertEqual([3, 1], [x for x in it.iterator])
# num_buckets is collared at 1
it = object_updater.BucketizedUpdateSkippingLimiter(iter([3, 1]), 0, 1)
self.assertEqual(1, it.num_buckets)
self.assertEqual(1, it.bucket_update_delta)
self.assertEqual([3, 1], [x for x in it.iterator])
def test_iteration_unlimited(self):
# verify iteration at unlimited rate
update_ctxs = [
{'update': {'account': '%d' % i, 'container': '%s' % i}}
for i in range(20)]
it = object_updater.BucketizedUpdateSkippingLimiter(
iter(update_ctxs), 9, 0)
self.assertEqual(update_ctxs, [x for x in it])
def test_iteration_ratelimited(self):
# verify iteration at limited rate - single bucket
update_ctxs = [
{'update': {'account': '%d' % i, 'container': '%s' % i}}
for i in range(2)]
it = object_updater.BucketizedUpdateSkippingLimiter(
iter(update_ctxs), 1, 0.1)
self.assertEqual(update_ctxs[:1], [x for x in it])
def test_iteration_ratelimited_with_callback(self):
# verify iteration at limited rate - single bucket
skipped = []
def on_skip(update_ctx):
skipped.append(update_ctx)
update_ctxs = [
{'update': {'account': '%d' % i, 'container': '%s' % i}}
for i in range(2)]
it = object_updater.BucketizedUpdateSkippingLimiter(
iter(update_ctxs), 1, 0.1, skip_f=on_skip)
self.assertEqual(update_ctxs[:1], [x for x in it])
self.assertEqual(update_ctxs[1:], skipped)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()