account-reaper warns if not making progress

DocImpact
If account reaper has not managed to clean out an account after a long
period, it prints a message to the log (you can search your system looking
for such messages). Introduce reap_warn_after config variable to determine
when to emit the message (defaults to 30 days).

Also fix bug 1181995 (edge case where object name is an empty string)

Change-Id: Ic0dfee04742d06b6a51b59f302d7a272d7c1de92
This commit is contained in:
Donagh McCabe 2013-05-20 16:52:54 +01:00
parent 6b4cba8371
commit 34e2ab3f31
4 changed files with 34 additions and 1 deletions

View File

@ -40,6 +40,12 @@ troublesome spot. The account reaper will keep trying to delete an account
until it eventually becomes empty, at which point the database reclaim process until it eventually becomes empty, at which point the database reclaim process
within the db_replicator will eventually remove the database files. within the db_replicator will eventually remove the database files.
Sometimes a persistent error state can prevent some object or container
from being deleted. If this happens, you will see a message such as "Account
<name> has not been reaped since <date>" in the log. You can control when
this is logged with the reap_warn_after value in the [account-reaper] section
of the account-server.conf file. By default this is 30 days.
------- -------
History History
------- -------

View File

@ -117,3 +117,11 @@ use = egg:swift#recon
# immediately; you can set this to delay its work however. The value is in # immediately; you can set this to delay its work however. The value is in
# seconds; 2592000 = 30 days for example. # seconds; 2592000 = 30 days for example.
# delay_reaping = 0 # delay_reaping = 0
# If the account fails to be be reaped due to a persistent error, the
# account reaper will log a message such as:
# Account <name> has not been reaped since <date>
# You can search logs for this message if space is not being reclaimed
# after you delete account(s).
# Default is 2592000 seconds (30 days). This is in addition to any time
# requested by delay_reaping.
# reap_warn_after = 2592000

View File

@ -17,7 +17,7 @@ import os
import random import random
from logging import DEBUG from logging import DEBUG
from math import sqrt from math import sqrt
from time import time from time import time, ctime
from eventlet import GreenPool, sleep, Timeout from eventlet import GreenPool, sleep, Timeout
@ -72,6 +72,8 @@ class AccountReaper(Daemon):
swift.common.db.DB_PREALLOCATION = \ swift.common.db.DB_PREALLOCATION = \
config_true_value(conf.get('db_preallocation', 'f')) config_true_value(conf.get('db_preallocation', 'f'))
self.delay_reaping = int(conf.get('delay_reaping') or 0) self.delay_reaping = int(conf.get('delay_reaping') or 0)
reap_warn_after = float(conf.get('reap_warn_after') or 86400 * 30)
self.reap_not_done_after = reap_warn_after + self.delay_reaping
def get_account_ring(self): def get_account_ring(self):
""" The account :class:`swift.common.ring.Ring` for the cluster. """ """ The account :class:`swift.common.ring.Ring` for the cluster. """
@ -240,6 +242,8 @@ class AccountReaper(Daemon):
self.logger.exception( self.logger.exception(
_('Exception with containers for account %s'), account) _('Exception with containers for account %s'), account)
marker = containers[-1][0] marker = containers[-1][0]
if marker == '':
break
log = 'Completed pass on account %s' % account log = 'Completed pass on account %s' % account
except (Exception, Timeout): except (Exception, Timeout):
self.logger.exception( self.logger.exception(
@ -268,6 +272,10 @@ class AccountReaper(Daemon):
log += _(', elapsed: %.02fs') % (time() - begin) log += _(', elapsed: %.02fs') % (time() - begin)
self.logger.info(log) self.logger.info(log)
self.logger.timing_since('timing', self.start_time) self.logger.timing_since('timing', self.start_time)
if self.stats_containers_remaining and \
begin - float(info['delete_timestamp']) >= self.reap_not_done_after:
self.logger.warn(_('Account %s has not been reaped since %s') %
(account, ctime(float(info['delete_timestamp']))))
return True return True
def reap_container(self, account, account_partition, account_nodes, def reap_container(self, account, account_partition, account_nodes,
@ -346,6 +354,8 @@ class AccountReaper(Daemon):
{'container': container, {'container': container,
'account': account}) 'account': account})
marker = objects[-1]['name'] marker = objects[-1]['name']
if marker == '':
break
successes = 0 successes = 0
failures = 0 failures = 0
for node in nodes: for node in nodes:

View File

@ -46,6 +46,15 @@ class TestReaper(unittest.TestCase):
self.assertRaises(ValueError, reaper.AccountReaper, self.assertRaises(ValueError, reaper.AccountReaper,
{'delay_reaping': 'abc'}) {'delay_reaping': 'abc'})
def test_reap_warn_after_conf_set(self):
conf = {'delay_reaping': '2', 'reap_warn_after': '3'}
r = reaper.AccountReaper(conf)
self.assertEquals(r.reap_not_done_after, 5)
def test_reap_warn_after_conf_bad_value(self):
self.assertRaises(ValueError, reaper.AccountReaper,
{'reap_warn_after': 'abc'})
def test_reap_delay(self): def test_reap_delay(self):
time_value = [100] time_value = [100]