From 8b6126984c7af7eba182b7489f8a4fecdb6f1267 Mon Sep 17 00:00:00 2001 From: Florian Hines Date: Wed, 31 Aug 2011 02:14:35 -0500 Subject: [PATCH 1/4] object-replicator now optionally updates recon stats directly. also updated swift-recon-cron with a cleaner python version. --- bin/swift-recon-cron | 102 +++++++++++++++++++++------------------- swift/common/utils.py | 70 ++++++++++++++++++++++++++- swift/obj/replicator.py | 25 +++++++++- 3 files changed, 146 insertions(+), 51 deletions(-) diff --git a/bin/swift-recon-cron b/bin/swift-recon-cron index 3e4d3928c7..5a14de02df 100755 --- a/bin/swift-recon-cron +++ b/bin/swift-recon-cron @@ -1,56 +1,60 @@ -#!/bin/bash +#!/usr/bin/env python +""" +swift-recon-cron.py +""" -#ghetto temporary cronjob to pull some of the stats for swift-recon -#usage: swift-recon-cron /var/log/swift/storage.log -# run it as frequently as you like, will skip runs during periods -# of high async pendings when the find takes a while. -#todo: everything. - -SYSLOG_FACILITY="local2" -ASYNC_PATH="/srv/node/sd[a-z]/async_pending/" -RECON_CACHE_PATH="/var/cache/swift" - -LOCKFILE="/var/lock/swift-recon-object.lock" -if [ -e $LOCKFILE ]; then - echo "NOTICE - $0 lock present - cron jobs overlapping ?" - echo "$0 lock file present" | /usr/bin/logger -p $SYSLOG_FACILITY.err - exit 1 -else - touch $LOCKFILE -fi +import os +import sys +import optparse +from tempfile import NamedTemporaryFile +import simplejson +from ConfigParser import ConfigParser +from swift.common.utils import get_logger, dump_recon_cache -if [ -z "$1" ]; then - LOGFILE="/var/log/swift/storage.log" -else - LOGFILE=$1 -fi +def async_count(device_dir, logger): + async_count = 0 + for i in os.listdir(device_dir): + asyncdir = os.path.join(device_dir, i, "async_pending") + if os.path.isdir(asyncdir): + for entry in os.listdir(asyncdir): + if os.path.isdir(os.path.join(asyncdir, entry)): + async_hdir = os.path.join(asyncdir, entry) + async_count += len(os.listdir(async_hdir)) + return async_count -if [ ! -r "$LOGFILE" ]; then - echo "$0: error $LOGFILE not readable" | /usr/bin/logger -p $SYSLOG_FACILITY.err - rm $LOCKFILE - exit 1 -fi -if [ ! -d "$RECON_CACHE_PATH" ]; then - mkdir $RECON_CACHE_PATH -fi +def main(): + c = ConfigParser() + try: + conf_path = sys.argv[1] + except Exception: + print "Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1] + print "ex: swift-recon-cron /etc/swift/object-server.conf" + sys.exit(1) + if not c.read(conf_path): + print "Unable to read config file %s" % conf_path + sys.exit(1) + conf = dict(c.items('filter:recon')) + device_dir = conf.get('devices', '/srv/node') + recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') + cache_file = os.path.join(recon_cache_path, "object.recon") + conf['log_name'] = conf.get('log_name', 'recon-cron') + logger = get_logger(conf, log_route='recon-cron') + try: + os.mkdir("/var/lock/swift-recon-object-cron") + except OSError as e: + logger.critical("%s" % e) + sys.exit(1) + asyncs = async_count(device_dir, logger) + try: + dump_recon_cache('object_replication_time', total, cache_file) + except ValueError: + logger.exception(_('Exception decoding recon cache')) + except Exception: + logger.exception(_('Exception dumping recon cache')) + os.rmdir("/var/lock/swift-recon-object-cron") -TMPF=`/bin/mktemp` -asyncs=$(find $ASYNC_PATH -type f 2> /dev/null| wc -l) -#asyncs=$(find /srv/[1-4]/node/sd[a-z]1/async_pending/ -type f 2> /dev/null| wc -l) #saio -objrep=$(grep "Object replication complete." $LOGFILE | tail -n 1 | awk '{print $9}' | sed -e 's/(//g') -objincoming=$(netstat -aln | egrep "tcp.*:6000.*:.*ESTABLISHED" -c) -#objtw=$(netstat -aln | egrep "tcp.*:6000.*:.*TIME_WAIT" -c) - -echo "{\"async_pending\":$asyncs, \"object_replication_time\":$objrep, \"object_established_conns\":$objincoming}" > $TMPF - -mv $TMPF $RECON_CACHE_PATH/object.recon -if [ $? -ne 0 ]; then - echo "$0: $TMPF rename failed" | /usr/bin/logger -p $SYSLOG_FACILITY.err - rm -f $TMPF $LOCKFILE - exit 1 -fi -rm -f $TMPF $LOCKFILE -exit 0 +if __name__ == '__main__': + main() diff --git a/swift/common/utils.py b/swift/common/utils.py index 1a9c74668b..affdbf5938 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -33,7 +33,8 @@ import struct from ConfigParser import ConfigParser, NoSectionError, NoOptionError, \ RawConfigParser from optparse import OptionParser -from tempfile import mkstemp +from tempfile import mkstemp, NamedTemporaryFile +import simplejson import cPickle as pickle import glob from urlparse import urlparse as stdlib_urlparse, ParseResult @@ -634,6 +635,49 @@ def lock_path(directory, timeout=10): os.close(fd) +@contextmanager +def lock_file(filename, timeout=10, append=False, unlink=True): + """ + Context manager that acquires a lock on a file. This will block until + the lock can be acquired, or the timeout time has expired (whichever occurs + first). + + :param filename: file to be locked + :param timeout: timeout (in seconds) + :param append: True if file should be opened in append mode + :param unlink: True if the file should be unlinked at the end + """ + flags = os.O_CREAT | os.O_RDWR + if append: + flags |= os.O_APPEND + fd = os.open(filename, flags) + try: + with LockTimeout(timeout, filename): + attempt_lock = time.time() + while True: + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + break + except IOError, err: + if err.errno != errno.EAGAIN: + raise + time.sleep(0.01) + if time.time() - attempt_lock > timeout: + raise + mode = 'r+' + if append: + mode = 'a+' + file_obj = os.fdopen(fd, mode) + yield file_obj + finally: + try: + file_obj.close() + except UnboundLocalError: + pass # may have not actually opened the file + if unlink: + os.unlink(filename) + + def lock_parent_directory(filename, timeout=10): """ Context manager that acquires a lock on the parent directory of the given @@ -1030,3 +1074,27 @@ def human_readable(value): if index == -1: return '%d' % value return '%d%si' % (round(value), suffixes[index]) + + +def dump_recon_cache(cache_key, cache_value, cache_file, lock_timeout=2): + """Update recon cache values + + :param cache_key: key to update + :param cache_value: value you want to set key too + :param cache_file: cache file to update + :param lock_timeout: timeout (in seconds) + """ + try: + with lock_file(cache_file, lock_timeout, unlink=False) as cf: + try: + cache_entry = simplejson.loads(cf.readline()) + cache_entry[cache_key] = cache_value + with NamedTemporaryFile(delete=False) as tf: + tf.write(simplejson.dumps(cache_entry) + '\n') + os.rename(tf.name, cache_file) + except ValueError: + #logging.exception(_('Exception decoding recon cache')) + raise + except Exception: + #logging.exception(_('Exception dumping recon cache')) + raise diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index f4823776d6..dc1b975f03 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -24,6 +24,7 @@ import itertools import cPickle as pickle import errno import uuid +from tempfile import NamedTemporaryFile import eventlet from eventlet import GreenPool, tpool, Timeout, sleep, hubs @@ -32,7 +33,7 @@ from eventlet.support.greenlets import GreenletExit from swift.common.ring import Ring from swift.common.utils import whataremyips, unlink_older_than, lock_path, \ - compute_eta, get_logger, write_pickle, renamer + compute_eta, get_logger, write_pickle, renamer, dump_recon_cache from swift.common.bufferedhttp import http_connect from swift.common.daemon import Daemon @@ -243,6 +244,12 @@ class ObjectReplicator(Daemon): self.rsync_io_timeout = conf.get('rsync_io_timeout', '30') self.http_timeout = int(conf.get('http_timeout', 60)) self.lockup_timeout = int(conf.get('lockup_timeout', 1800)) + self.recon_enable = conf.get( + 'recon_enable', 'no').lower() in ('yes', 'true', 'on', '1') + self.recon_cache_path = conf.get( + 'recon_cache_path', '/var/cache/swift') + self.recon_object = os.path.join(self.recon_cache_path, "object.recon") + def _rsync(self, args): """ @@ -578,6 +585,14 @@ class ObjectReplicator(Daemon): total = (time.time() - start) / 60 self.logger.info( _("Object replication complete. (%.02f minutes)"), total) + if self.recon_enable: + try: + dump_recon_cache('object_replication_time', total, \ + self.recon_object) + except ValueError: + self.logger.exception(_('Exception decoding recon cache')) + except Exception: + self.logger.exception(_('Exception dumping recon cache')) def run_forever(self, *args, **kwargs): self.logger.info(_("Starting object replicator in daemon mode.")) @@ -590,6 +605,14 @@ class ObjectReplicator(Daemon): total = (time.time() - start) / 60 self.logger.info( _("Object replication complete. (%.02f minutes)"), total) + if self.recon_enable: + try: + dump_recon_cache('object_replication_time', total, \ + self.recon_object) + except ValueError: + self.logger.exception(_('Exception decoding recon cache')) + except Exception: + self.logger.exception(_('Exception dumping recon cache')) self.logger.debug(_('Replication sleeping for %s seconds.'), self.run_pause) sleep(self.run_pause) From 3f71bddbf71ae258e1f4c9df51805a210481fc0e Mon Sep 17 00:00:00 2001 From: Florian Hines Date: Wed, 31 Aug 2011 11:29:59 -0500 Subject: [PATCH 2/4] fixup file recon file locking --- swift/common/utils.py | 37 ++++++++++++++++++++----------------- swift/obj/replicator.py | 5 ----- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/swift/common/utils.py b/swift/common/utils.py index affdbf5938..44eaf4a507 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -653,7 +653,6 @@ def lock_file(filename, timeout=10, append=False, unlink=True): fd = os.open(filename, flags) try: with LockTimeout(timeout, filename): - attempt_lock = time.time() while True: try: fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) @@ -661,9 +660,7 @@ def lock_file(filename, timeout=10, append=False, unlink=True): except IOError, err: if err.errno != errno.EAGAIN: raise - time.sleep(0.01) - if time.time() - attempt_lock > timeout: - raise + sleep(0.01) mode = 'r+' if append: mode = 'a+' @@ -1084,17 +1081,23 @@ def dump_recon_cache(cache_key, cache_value, cache_file, lock_timeout=2): :param cache_file: cache file to update :param lock_timeout: timeout (in seconds) """ - try: - with lock_file(cache_file, lock_timeout, unlink=False) as cf: + with lock_file(cache_file, lock_timeout, unlink=False) as cf: + cache_entry = {} + try: + existing_entry = cf.readline() + if existing_entry: + cache_entry = simplejson.loads(existing_entry) + except ValueError: + #file doesn't have a valid entry, we'll recreate it + pass + cache_entry[cache_key] = cache_value + try: + with NamedTemporaryFile(delete=False) as tf: + tf.write(simplejson.dumps(cache_entry) + '\n') + os.rename(tf.name, cache_file) + finally: try: - cache_entry = simplejson.loads(cf.readline()) - cache_entry[cache_key] = cache_value - with NamedTemporaryFile(delete=False) as tf: - tf.write(simplejson.dumps(cache_entry) + '\n') - os.rename(tf.name, cache_file) - except ValueError: - #logging.exception(_('Exception decoding recon cache')) - raise - except Exception: - #logging.exception(_('Exception dumping recon cache')) - raise + os.unlink(tf.name) + except OSError, err: + if err.errno != errno.ENOENT: + raise diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index dc1b975f03..6c77cda9ba 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -24,7 +24,6 @@ import itertools import cPickle as pickle import errno import uuid -from tempfile import NamedTemporaryFile import eventlet from eventlet import GreenPool, tpool, Timeout, sleep, hubs @@ -589,8 +588,6 @@ class ObjectReplicator(Daemon): try: dump_recon_cache('object_replication_time', total, \ self.recon_object) - except ValueError: - self.logger.exception(_('Exception decoding recon cache')) except Exception: self.logger.exception(_('Exception dumping recon cache')) @@ -609,8 +606,6 @@ class ObjectReplicator(Daemon): try: dump_recon_cache('object_replication_time', total, \ self.recon_object) - except ValueError: - self.logger.exception(_('Exception decoding recon cache')) except Exception: self.logger.exception(_('Exception dumping recon cache')) self.logger.debug(_('Replication sleeping for %s seconds.'), From 9b276ad74b702183adf775312d03f986e93ff811 Mon Sep 17 00:00:00 2001 From: Florian Hines Date: Wed, 31 Aug 2011 15:19:16 -0500 Subject: [PATCH 3/4] pep8 --- swift/obj/replicator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index 6c77cda9ba..89d157aa09 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -249,7 +249,6 @@ class ObjectReplicator(Daemon): 'recon_cache_path', '/var/cache/swift') self.recon_object = os.path.join(self.recon_cache_path, "object.recon") - def _rsync(self, args): """ Execute the rsync binary to replicate a partition. From e9b5cb83acc0dd58160afd701ff59680d4025b34 Mon Sep 17 00:00:00 2001 From: Florian Hines Date: Thu, 1 Sep 2011 13:46:13 -0500 Subject: [PATCH 4/4] simplejson import and exception/logging fixes --- bin/swift-recon-cron | 18 +++++++++++------- swift/common/middleware/recon.py | 5 ++++- swift/common/utils.py | 9 ++++++--- swift/obj/replicator.py | 5 +++-- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/bin/swift-recon-cron b/bin/swift-recon-cron index 5a14de02df..a66a696f06 100755 --- a/bin/swift-recon-cron +++ b/bin/swift-recon-cron @@ -7,7 +7,10 @@ import os import sys import optparse from tempfile import NamedTemporaryFile -import simplejson +try: + import simplejson as json +except ImportError: + import json from ConfigParser import ConfigParser from swift.common.utils import get_logger, dump_recon_cache @@ -44,17 +47,18 @@ def main(): try: os.mkdir("/var/lock/swift-recon-object-cron") except OSError as e: - logger.critical("%s" % e) + logger.critical(_(str(e))) + print str(e) sys.exit(1) asyncs = async_count(device_dir, logger) try: - dump_recon_cache('object_replication_time', total, cache_file) - except ValueError: - logger.exception(_('Exception decoding recon cache')) + dump_recon_cache('async_pending', asyncs, cache_file) except Exception: logger.exception(_('Exception dumping recon cache')) - os.rmdir("/var/lock/swift-recon-object-cron") - + try: + os.rmdir("/var/lock/swift-recon-object-cron") + except Exception: + logger.exception(_('Exception remove cronjob lock')) if __name__ == '__main__': main() diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py index f2b9e777d4..438e1e8c36 100644 --- a/swift/common/middleware/recon.py +++ b/swift/common/middleware/recon.py @@ -17,7 +17,10 @@ from webob import Request, Response from swift.common.utils import split_path, cache_from_env, get_logger from swift.common.constraints import check_mount from hashlib import md5 -import simplejson as json +try: + import simplejson as json +except ImportError: + import json import os diff --git a/swift/common/utils.py b/swift/common/utils.py index 44eaf4a507..5ba49914d2 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -34,7 +34,10 @@ from ConfigParser import ConfigParser, NoSectionError, NoOptionError, \ RawConfigParser from optparse import OptionParser from tempfile import mkstemp, NamedTemporaryFile -import simplejson +try: + import simplejson as json +except ImportError: + import json import cPickle as pickle import glob from urlparse import urlparse as stdlib_urlparse, ParseResult @@ -1086,14 +1089,14 @@ def dump_recon_cache(cache_key, cache_value, cache_file, lock_timeout=2): try: existing_entry = cf.readline() if existing_entry: - cache_entry = simplejson.loads(existing_entry) + cache_entry = json.loads(existing_entry) except ValueError: #file doesn't have a valid entry, we'll recreate it pass cache_entry[cache_key] = cache_value try: with NamedTemporaryFile(delete=False) as tf: - tf.write(simplejson.dumps(cache_entry) + '\n') + tf.write(json.dumps(cache_entry) + '\n') os.rename(tf.name, cache_file) finally: try: diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index 89d157aa09..934df82ec5 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -32,7 +32,8 @@ from eventlet.support.greenlets import GreenletExit from swift.common.ring import Ring from swift.common.utils import whataremyips, unlink_older_than, lock_path, \ - compute_eta, get_logger, write_pickle, renamer, dump_recon_cache + compute_eta, get_logger, write_pickle, renamer, dump_recon_cache, \ + TRUE_VALUES from swift.common.bufferedhttp import http_connect from swift.common.daemon import Daemon @@ -244,7 +245,7 @@ class ObjectReplicator(Daemon): self.http_timeout = int(conf.get('http_timeout', 60)) self.lockup_timeout = int(conf.get('lockup_timeout', 1800)) self.recon_enable = conf.get( - 'recon_enable', 'no').lower() in ('yes', 'true', 'on', '1') + 'recon_enable', 'no').lower() in TRUE_VALUES self.recon_cache_path = conf.get( 'recon_cache_path', '/var/cache/swift') self.recon_object = os.path.join(self.recon_cache_path, "object.recon")