diff --git a/bin/swift-account-audit b/bin/swift-account-audit index 994492bb49..9d5d80a677 100755 --- a/bin/swift-account-audit +++ b/bin/swift-account-audit @@ -14,373 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function -import os -import sys -from hashlib import md5 -import getopt -from itertools import chain +from swift.cli import account_audit -import json -from eventlet.greenpool import GreenPool -from eventlet.event import Event -from six.moves.urllib.parse import quote - -from swift.common.ring import Ring -from swift.common.utils import split_path -from swift.common.bufferedhttp import http_connect - - -usage = """ -Usage! - -%(cmd)s [options] [url 1] [url 2] ... - -c [concurrency] Set the concurrency, default 50 - -r [ring dir] Ring locations, default /etc/swift - -e [filename] File for writing a list of inconsistent urls - -d Also download files and verify md5 - -You can also feed a list of urls to the script through stdin. - -Examples! - - %(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076 - %(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container/object - %(cmd)s -e errors.txt AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container - %(cmd)s < errors.txt - %(cmd)s -c 25 -d < errors.txt -""" % {'cmd': sys.argv[0]} - - -class Auditor(object): - def __init__(self, swift_dir='/etc/swift', concurrency=50, deep=False, - error_file=None): - self.pool = GreenPool(concurrency) - self.object_ring = Ring(swift_dir, ring_name='object') - self.container_ring = Ring(swift_dir, ring_name='container') - self.account_ring = Ring(swift_dir, ring_name='account') - self.deep = deep - self.error_file = error_file - # zero out stats - self.accounts_checked = self.account_exceptions = \ - self.account_not_found = self.account_container_mismatch = \ - self.account_object_mismatch = self.objects_checked = \ - self.object_exceptions = self.object_not_found = \ - self.object_checksum_mismatch = self.containers_checked = \ - self.container_exceptions = self.container_count_mismatch = \ - self.container_not_found = self.container_obj_mismatch = 0 - self.list_cache = {} - self.in_progress = {} - - def audit_object(self, account, container, name): - path = '/%s/%s/%s' % (account, container, name) - part, nodes = self.object_ring.get_nodes( - account, container.encode('utf-8'), name.encode('utf-8')) - container_listing = self.audit_container(account, container) - consistent = True - if name not in container_listing: - print(" Object %s missing in container listing!" % path) - consistent = False - hash = None - else: - hash = container_listing[name]['hash'] - etags = [] - for node in nodes: - try: - if self.deep: - conn = http_connect(node['ip'], node['port'], - node['device'], part, 'GET', path, {}) - resp = conn.getresponse() - calc_hash = md5() - chunk = True - while chunk: - chunk = resp.read(8192) - calc_hash.update(chunk) - calc_hash = calc_hash.hexdigest() - if resp.status // 100 != 2: - self.object_not_found += 1 - consistent = False - print(' Bad status %s GETting object "%s" on %s/%s' - % (resp.status, path, - node['ip'], node['device'])) - continue - if resp.getheader('ETag').strip('"') != calc_hash: - self.object_checksum_mismatch += 1 - consistent = False - print(' MD5 does not match etag for "%s" on %s/%s' - % (path, node['ip'], node['device'])) - else: - conn = http_connect(node['ip'], node['port'], - node['device'], part, 'HEAD', - path.encode('utf-8'), {}) - resp = conn.getresponse() - if resp.status // 100 != 2: - self.object_not_found += 1 - consistent = False - print(' Bad status %s HEADing object "%s" on %s/%s' - % (resp.status, path, - node['ip'], node['device'])) - continue - - override_etag = resp.getheader( - 'X-Object-Sysmeta-Container-Update-Override-Etag') - if override_etag: - etags.append((override_etag, node)) - else: - etags.append((resp.getheader('ETag'), node)) - except Exception: - self.object_exceptions += 1 - consistent = False - print(' Exception fetching object "%s" on %s/%s' - % (path, node['ip'], node['device'])) - continue - if not etags: - consistent = False - print(" Failed fo fetch object %s at all!" % path) - elif hash: - for etag, node in etags: - if etag.strip('"') != hash: - consistent = False - self.object_checksum_mismatch += 1 - print(' ETag mismatch for "%s" on %s/%s' - % (path, node['ip'], node['device'])) - if not consistent and self.error_file: - with open(self.error_file, 'a') as err_file: - print(path, file=err_file) - self.objects_checked += 1 - - def audit_container(self, account, name, recurse=False): - if (account, name) in self.in_progress: - self.in_progress[(account, name)].wait() - if (account, name) in self.list_cache: - return self.list_cache[(account, name)] - self.in_progress[(account, name)] = Event() - print('Auditing container "%s"' % name) - path = '/%s/%s' % (account, name) - account_listing = self.audit_account(account) - consistent = True - if name not in account_listing: - consistent = False - print(" Container %s not in account listing!" % path) - part, nodes = \ - self.container_ring.get_nodes(account, name.encode('utf-8')) - rec_d = {} - responses = {} - for node in nodes: - marker = '' - results = True - while results: - try: - conn = http_connect(node['ip'], node['port'], - node['device'], part, 'GET', - path.encode('utf-8'), {}, - 'format=json&marker=%s' % - quote(marker.encode('utf-8'))) - resp = conn.getresponse() - if resp.status // 100 != 2: - self.container_not_found += 1 - consistent = False - print(' Bad status GETting container "%s" on %s/%s' % - (path, node['ip'], node['device'])) - break - if node['id'] not in responses: - responses[node['id']] = { - h.lower(): v for h, v in resp.getheaders()} - results = json.loads(resp.read()) - except Exception: - self.container_exceptions += 1 - consistent = False - print(' Exception GETting container "%s" on %s/%s' % - (path, node['ip'], node['device'])) - break - if results: - marker = results[-1]['name'] - for obj in results: - obj_name = obj['name'] - if obj_name not in rec_d: - rec_d[obj_name] = obj - if (obj['last_modified'] != - rec_d[obj_name]['last_modified']): - self.container_obj_mismatch += 1 - consistent = False - print(" Different versions of %s/%s " - "in container dbs." % (name, obj['name'])) - if (obj['last_modified'] > - rec_d[obj_name]['last_modified']): - rec_d[obj_name] = obj - obj_counts = [int(header['x-container-object-count']) - for header in responses.values()] - if not obj_counts: - consistent = False - print(" Failed to fetch container %s at all!" % path) - else: - if len(set(obj_counts)) != 1: - self.container_count_mismatch += 1 - consistent = False - print( - " Container databases don't agree on number of objects.") - print( - " Max: %s, Min: %s" % (max(obj_counts), min(obj_counts))) - self.containers_checked += 1 - self.list_cache[(account, name)] = rec_d - self.in_progress[(account, name)].send(True) - del self.in_progress[(account, name)] - if recurse: - for obj in rec_d.keys(): - self.pool.spawn_n(self.audit_object, account, name, obj) - if not consistent and self.error_file: - with open(self.error_file, 'a') as error_file: - print(path, file=error_file) - return rec_d - - def audit_account(self, account, recurse=False): - if account in self.in_progress: - self.in_progress[account].wait() - if account in self.list_cache: - return self.list_cache[account] - self.in_progress[account] = Event() - print('Auditing account "%s"' % account) - consistent = True - path = '/%s' % account - part, nodes = self.account_ring.get_nodes(account) - responses = {} - for node in nodes: - marker = '' - results = True - while results: - node_id = node['id'] - try: - conn = http_connect(node['ip'], node['port'], - node['device'], part, 'GET', path, {}, - 'format=json&marker=%s' % - quote(marker.encode('utf-8'))) - resp = conn.getresponse() - if resp.status // 100 != 2: - self.account_not_found += 1 - consistent = False - print(" Bad status GETting account '%s' " - " from %s:%s" % - (account, node['ip'], node['device'])) - break - results = json.loads(resp.read()) - except Exception: - self.account_exceptions += 1 - consistent = False - print(" Exception GETting account '%s' on %s:%s" % - (account, node['ip'], node['device'])) - break - if node_id not in responses: - responses[node_id] = [ - {h.lower(): v for h, v in resp.getheaders()}, []] - responses[node_id][1].extend(results) - if results: - marker = results[-1]['name'] - headers = [r[0] for r in responses.values()] - cont_counts = [int(header['x-account-container-count']) - for header in headers] - if len(set(cont_counts)) != 1: - self.account_container_mismatch += 1 - consistent = False - print(" Account databases for '%s' don't agree on" - " number of containers." % account) - if cont_counts: - print(" Max: %s, Min: %s" % (max(cont_counts), - min(cont_counts))) - obj_counts = [int(header['x-account-object-count']) - for header in headers] - if len(set(obj_counts)) != 1: - self.account_object_mismatch += 1 - consistent = False - print(" Account databases for '%s' don't agree on" - " number of objects." % account) - if obj_counts: - print(" Max: %s, Min: %s" % (max(obj_counts), - min(obj_counts))) - containers = set() - for resp in responses.values(): - containers.update(container['name'] for container in resp[1]) - self.list_cache[account] = containers - self.in_progress[account].send(True) - del self.in_progress[account] - self.accounts_checked += 1 - if recurse: - for container in containers: - self.pool.spawn_n(self.audit_container, account, - container, True) - if not consistent and self.error_file: - with open(self.error_file, 'a') as error_file: - print(path, error_file) - return containers - - def audit(self, account, container=None, obj=None): - if obj and container: - self.pool.spawn_n(self.audit_object, account, container, obj) - elif container: - self.pool.spawn_n(self.audit_container, account, container, True) - else: - self.pool.spawn_n(self.audit_account, account, True) - - def wait(self): - self.pool.waitall() - - def print_stats(self): - - def _print_stat(name, stat): - # Right align stat name in a field of 18 characters - print("{0:>18}: {1}".format(name, stat)) - - print() - _print_stat("Accounts checked", self.accounts_checked) - if self.account_not_found: - _print_stat("Missing Replicas", self.account_not_found) - if self.account_exceptions: - _print_stat("Exceptions", self.account_exceptions) - if self.account_container_mismatch: - _print_stat("Container mismatch", self.account_container_mismatch) - if self.account_object_mismatch: - _print_stat("Object mismatch", self.account_object_mismatch) - print() - _print_stat("Containers checked", self.containers_checked) - if self.container_not_found: - _print_stat("Missing Replicas", self.container_not_found) - if self.container_exceptions: - _print_stat("Exceptions", self.container_exceptions) - if self.container_count_mismatch: - _print_stat("Count mismatch", self.container_count_mismatch) - if self.container_obj_mismatch: - _print_stat("Object mismatch", self.container_obj_mismatch) - print() - _print_stat("Objects checked", self.objects_checked) - if self.object_not_found: - _print_stat("Missing Replicas", self.object_not_found) - if self.object_exceptions: - _print_stat("Exceptions", self.object_exceptions) - if self.object_checksum_mismatch: - _print_stat("MD5 Mismatch", self.object_checksum_mismatch) - - -if __name__ == '__main__': - try: - optlist, args = getopt.getopt(sys.argv[1:], 'c:r:e:d') - except getopt.GetoptError as err: - print(str(err)) - print(usage) - sys.exit(2) - if not args and os.isatty(sys.stdin.fileno()): - print(usage) - sys.exit() - opts = dict(optlist) - options = { - 'concurrency': int(opts.get('-c', 50)), - 'error_file': opts.get('-e', None), - 'swift_dir': opts.get('-r', '/etc/swift'), - 'deep': '-d' in opts, - } - auditor = Auditor(**options) - if not os.isatty(sys.stdin.fileno()): - args = chain(args, sys.stdin) - for path in args: - path = '/' + path.rstrip('\r\n').lstrip('/') - auditor.audit(*split_path(path, 1, 3, True)) - auditor.wait() - auditor.print_stats() +if __name__ == "__main__": + account_audit.main() diff --git a/bin/swift-config b/bin/swift-config index 1a2db221ce..a8515ac486 100755 --- a/bin/swift-config +++ b/bin/swift-config @@ -12,80 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function -import optparse -import os -import sys - -from swift.common.manager import Server -from swift.common.utils import readconf -from swift.common.wsgi import appconfig - -parser = optparse.OptionParser('%prog [options] SERVER') -parser.add_option('-c', '--config-num', metavar="N", type="int", - dest="number", default=0, - help="parse config for the Nth server only") -parser.add_option('-s', '--section', help="only display matching sections") -parser.add_option('-w', '--wsgi', action='store_true', - help="use wsgi/paste parser instead of readconf") - - -def _context_name(context): - return ':'.join((context.object_type.name, context.name)) - - -def inspect_app_config(app_config): - conf = {} - context = app_config.context - section_name = _context_name(context) - conf[section_name] = context.config() - if context.object_type.name == 'pipeline': - filters = context.filter_contexts - pipeline = [] - for filter_context in filters: - conf[_context_name(filter_context)] = filter_context.config() - pipeline.append(filter_context.entry_point_name) - app_context = context.app_context - conf[_context_name(app_context)] = app_context.config() - pipeline.append(app_context.entry_point_name) - conf[section_name]['pipeline'] = ' '.join(pipeline) - return conf - - -def main(): - options, args = parser.parse_args() - options = dict(vars(options)) - - if not args: - return 'ERROR: specify type of server or conf_path' - conf_files = [] - for arg in args: - if os.path.exists(arg): - conf_files.append(arg) - else: - conf_files += Server(arg).conf_files(**options) - for conf_file in conf_files: - print('# %s' % conf_file) - if options['wsgi']: - app_config = appconfig(conf_file) - conf = inspect_app_config(app_config) - else: - conf = readconf(conf_file) - flat_vars = {} - for k, v in conf.items(): - if options['section'] and k != options['section']: - continue - if not isinstance(v, dict): - flat_vars[k] = v - continue - print('[%s]' % k) - for opt, value in v.items(): - print('%s = %s' % (opt, value)) - print() - for k, v in flat_vars.items(): - print('# %s = %s' % (k, v)) - print() - +from swift.cli import config if __name__ == "__main__": - sys.exit(main()) + config.main() diff --git a/bin/swift-drive-audit b/bin/swift-drive-audit index 638d673074..1316bdd393 100755 --- a/bin/swift-drive-audit +++ b/bin/swift-drive-audit @@ -14,251 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime -import glob -import locale -import os -import os.path -import re -import subprocess -import sys +from swift.cli import drive_audit - -import six -from six.moves.configparser import ConfigParser - -from swift.common.utils import backward, get_logger, dump_recon_cache, \ - config_true_value - - -def get_devices(device_dir, logger): - devices = [] - majmin_devices = {} - - # List /dev/block - # Using os.scandir on recent versions of python, else os.listdir - if 'scandir' in dir(os): - with os.scandir("/dev/block") as it: - for ent in it: - if ent.is_symlink(): - dev_name = os.path.basename(os.readlink(ent.path)) - majmin = os.path.basename(ent.path).split(':') - majmin_devices[dev_name] = {'major': majmin[0], - 'minor': majmin[1]} - else: - for ent in os.listdir("/dev/block"): - ent_path = os.path.join("/dev/block", ent) - if os.path.is_symlink(ent_path): - dev_name = os.path.basename(os.readlink(ent_path)) - majmin = os.path.basename(ent_path).split(':') - majmin_devices[dev_name] = {'major': majmin[0], - 'minor': majmin[1]} - - for line in open('/proc/mounts').readlines(): - data = line.strip().split() - block_device = data[0] - mount_point = data[1] - if mount_point.startswith(device_dir): - device = {} - device['mount_point'] = mount_point - device['block_device'] = block_device - dev_name = os.path.basename(block_device) - if dev_name in majmin_devices: - # If symlink is in /dev/block - device['major'] = majmin_devices[dev_name]['major'] - device['minor'] = majmin_devices[dev_name]['minor'] - else: - # Else we try to stat block_device - try: - device_num = os.stat(block_device).st_rdev - except OSError: - # If we can't stat the device, - # then something weird is going on - logger.error( - 'Could not determine major:minor numbers for %s ' - '(mounted at %s)! Skipping...', - block_device, mount_point) - continue - device['major'] = str(os.major(device_num)) - device['minor'] = str(os.minor(device_num)) - devices.append(device) - for line in open('/proc/partitions').readlines()[2:]: - major, minor, blocks, kernel_device = line.strip().split() - device = [d for d in devices - if d['major'] == major and d['minor'] == minor] - if device: - device[0]['kernel_device'] = kernel_device - return devices - - -def get_errors(error_re, log_file_pattern, minutes, logger, - log_file_encoding): - # Assuming log rotation is being used, we need to examine - # recently rotated files in case the rotation occurred - # just before the script is being run - the data we are - # looking for may have rotated. - # - # The globbing used before would not work with all out-of-box - # distro setup for logrotate and syslog therefore moving this - # to the config where one can set it with the desired - # globbing pattern. - log_files = [f for f in glob.glob(log_file_pattern)] - try: - log_files.sort(key=lambda f: os.stat(f).st_mtime, reverse=True) - except (IOError, OSError) as exc: - logger.error(exc) - print(exc) - sys.exit(1) - - now_time = datetime.datetime.now() - end_time = now_time - datetime.timedelta(minutes=minutes) - # kern.log does not contain the year so we need to keep - # track of the year and month in case the year recently - # ticked over - year = now_time.year - prev_ent_month = now_time.strftime('%b') - errors = {} - - reached_old_logs = False - for path in log_files: - try: - f = open(path, 'rb') - except IOError: - logger.error("Error: Unable to open " + path) - print("Unable to open " + path) - sys.exit(1) - for line in backward(f): - if not six.PY2: - line = line.decode(log_file_encoding, 'surrogateescape') - if '[ 0.000000]' in line \ - or 'KERNEL supported cpus:' in line \ - or 'BIOS-provided physical RAM map:' in line: - # Ignore anything before the last boot - reached_old_logs = True - break - # Solves the problem with year change - kern.log does not - # keep track of the year. - log_time_ent = line.split()[:3] - if log_time_ent[0] == 'Dec' and prev_ent_month == 'Jan': - year -= 1 - prev_ent_month = log_time_ent[0] - log_time_string = '%d %s' % (year, ' '.join(log_time_ent)) - try: - log_time = datetime.datetime.strptime( - log_time_string, '%Y %b %d %H:%M:%S') - except ValueError: - # Some versions use ISO timestamps instead - try: - log_time = datetime.datetime.strptime( - line[0:19], '%Y-%m-%dT%H:%M:%S') - except ValueError: - continue - if log_time > end_time: - for err in error_re: - for device in err.findall(line): - errors[device] = errors.get(device, 0) + 1 - else: - reached_old_logs = True - break - if reached_old_logs: - break - return errors - - -def comment_fstab(mount_point): - with open('/etc/fstab', 'r') as fstab: - with open('/etc/fstab.new', 'w') as new_fstab: - for line in fstab: - parts = line.split() - if len(parts) > 2 \ - and parts[1] == mount_point \ - and not line.startswith('#'): - new_fstab.write('#' + line) - else: - new_fstab.write(line) - os.rename('/etc/fstab.new', '/etc/fstab') - - -if __name__ == '__main__': - c = ConfigParser() - try: - conf_path = sys.argv[1] - except Exception: - print("Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1]) - sys.exit(1) - if not c.read(conf_path): - print("Unable to read config file %s" % conf_path) - sys.exit(1) - conf = dict(c.items('drive-audit')) - device_dir = conf.get('device_dir', '/srv/node') - minutes = int(conf.get('minutes', 60)) - error_limit = int(conf.get('error_limit', 1)) - recon_cache_path = conf.get('recon_cache_path', "/var/cache/swift") - log_file_pattern = conf.get('log_file_pattern', - '/var/log/kern.*[!.][!g][!z]') - log_file_encoding = conf.get('log_file_encoding', 'auto') - if log_file_encoding == 'auto': - log_file_encoding = locale.getpreferredencoding() - log_to_console = config_true_value(conf.get('log_to_console', False)) - error_re = [] - for conf_key in conf: - if conf_key.startswith('regex_pattern_'): - error_pattern = conf[conf_key] - try: - r = re.compile(error_pattern) - except re.error: - sys.exit('Error: unable to compile regex pattern "%s"' % - error_pattern) - error_re.append(r) - if not error_re: - error_re = [ - re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'), - re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'), - ] - conf['log_name'] = conf.get('log_name', 'drive-audit') - logger = get_logger(conf, log_to_console=log_to_console, - log_route='drive-audit') - devices = get_devices(device_dir, logger) - logger.debug("Devices found: %s" % str(devices)) - if not devices: - logger.error("Error: No devices found!") - recon_errors = {} - total_errors = 0 - for device in devices: - recon_errors[device['mount_point']] = 0 - errors = get_errors(error_re, log_file_pattern, minutes, logger, - log_file_encoding) - logger.debug("Errors found: %s" % str(errors)) - unmounts = 0 - for kernel_device, count in errors.items(): - if count >= error_limit: - device = \ - [d for d in devices if d['kernel_device'] == kernel_device] - if device: - mount_point = device[0]['mount_point'] - if mount_point.startswith(device_dir): - if config_true_value(conf.get('unmount_failed_device', - True)): - logger.info("Unmounting %s with %d errors" % - (mount_point, count)) - subprocess.call(['umount', '-fl', mount_point]) - logger.info("Commenting out %s from /etc/fstab" % - (mount_point)) - comment_fstab(mount_point) - unmounts += 1 - else: - logger.info("Detected %s with %d errors " - "(Device not unmounted)" % - (mount_point, count)) - recon_errors[mount_point] = count - total_errors += count - recon_file = recon_cache_path + "/drive.recon" - dump_recon_cache(recon_errors, recon_file, logger) - dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger, - set_owner=conf.get("user", "swift")) - - if unmounts == 0: - logger.info("No drives were unmounted") - elif os.path.isdir("/run/systemd/system"): - logger.debug("fstab updated, calling systemctl daemon-reload") - subprocess.call(["/usr/bin/systemctl", "daemon-reload"]) +if __name__ == "__main__": + drive_audit.main() diff --git a/bin/swift-get-nodes b/bin/swift-get-nodes index 5bc21c9456..a9f5f277ba 100755 --- a/bin/swift-get-nodes +++ b/bin/swift-get-nodes @@ -14,63 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -from optparse import OptionParser -from os.path import basename +from swift.cli import get_nodes -from swift.common.ring import Ring -from swift.common.storage_policy import reload_storage_policies -from swift.common.utils import set_swift_dir -from swift.cli.info import (parse_get_node_args, print_item_locations, - InfoSystemExit) - - -if __name__ == '__main__': - - usage = ''' - Shows the nodes responsible for the item specified. - Usage: %prog [-a] [ []] - Or: %prog [-a] -p partition - Or: %prog [-a] -P policy_name [ []] - Or: %prog [-a] -P policy_name -p partition - Note: account, container, object can also be a single arg separated by / - Example: - $ %prog -a /etc/swift/account.ring.gz MyAccount - Partition 5743883 - Hash 96ae332a60b58910784e4417a03e1ad0 - 10.1.1.7:8000 sdd1 - 10.1.9.2:8000 sdb1 - 10.1.5.5:8000 sdf1 - 10.1.5.9:8000 sdt1 # [Handoff] - ''' - parser = OptionParser(usage) - parser.add_option('-a', '--all', action='store_true', - help='Show all handoff nodes') - parser.add_option('-p', '--partition', metavar='PARTITION', - help='Show nodes for a given partition') - parser.add_option('-P', '--policy-name', dest='policy_name', - help='Specify which policy to use') - parser.add_option('-d', '--swift-dir', default='/etc/swift', - dest='swift_dir', help='Path to swift directory') - parser.add_option('-Q', '--quoted', action='store_true', - help='Assume swift paths are quoted') - options, args = parser.parse_args() - - if set_swift_dir(options.swift_dir): - reload_storage_policies() - - try: - ring_path, args = parse_get_node_args(options, args) - except InfoSystemExit as e: - parser.print_help() - sys.exit('ERROR: %s' % e) - - ring = ring_name = None - if ring_path: - ring_name = basename(ring_path)[:-len('.ring.gz')] - ring = Ring(ring_path) - - try: - print_item_locations(ring, ring_name, *args, **vars(options)) - except InfoSystemExit: - sys.exit(1) +if __name__ == "__main__": + get_nodes.main() diff --git a/bin/swift-oldies b/bin/swift-oldies index 5614371e9b..c9231d77bb 100755 --- a/bin/swift-oldies +++ b/bin/swift-oldies @@ -12,79 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function -import optparse -import subprocess -import sys - +from swift.cli import oldies if __name__ == '__main__': - parser = optparse.OptionParser(usage='''%prog [options] - -Lists old Swift processes. - '''.strip()) - parser.add_option('-a', '--age', dest='hours', type='int', default=720, - help='look for processes at least HOURS old; ' - 'default: 720 (30 days)') - parser.add_option('-p', '--pids', action='store_true', - help='only print the pids found; for example, to pipe ' - 'to xargs kill') - (options, args) = parser.parse_args() - - listing = [] - for line in subprocess.Popen( - ['ps', '-eo', 'etime,pid,args', '--no-headers'], - stdout=subprocess.PIPE).communicate()[0].split(b'\n'): - if not line: - continue - hours = 0 - try: - etime, pid, args = line.decode('ascii').split(None, 2) - except ValueError: - # This covers both decoding and not-enough-values-to-unpack errors - sys.exit('Could not process ps line %r' % line) - if not args.startswith(( - '/usr/bin/python /usr/bin/swift-', - '/usr/bin/python /usr/local/bin/swift-', - '/bin/python /usr/bin/swift-', - '/usr/bin/python3 /usr/bin/swift-', - '/usr/bin/python3 /usr/local/bin/swift-', - '/bin/python3 /usr/bin/swift-')): - continue - args = args.split('-', 1)[1] - etime = etime.split('-') - if len(etime) == 2: - hours = int(etime[0]) * 24 - etime = etime[1] - elif len(etime) == 1: - etime = etime[0] - else: - sys.exit('Could not process etime value from %r' % line) - etime = etime.split(':') - if len(etime) == 3: - hours += int(etime[0]) - elif len(etime) != 2: - sys.exit('Could not process etime value from %r' % line) - if hours >= options.hours: - listing.append((str(hours), pid, args)) - - if not listing: - sys.exit() - - if options.pids: - for hours, pid, args in listing: - print(pid) - else: - hours_len = len('Hours') - pid_len = len('PID') - args_len = len('Command') - for hours, pid, args in listing: - hours_len = max(hours_len, len(hours)) - pid_len = max(pid_len, len(pid)) - args_len = max(args_len, len(args)) - args_len = min(args_len, 78 - hours_len - pid_len) - - print('%*s %*s %s' % (hours_len, 'Hours', pid_len, 'PID', 'Command')) - for hours, pid, args in listing: - print('%*s %*s %s' % (hours_len, hours, pid_len, - pid, args[:args_len])) + oldies.main() diff --git a/bin/swift-orphans b/bin/swift-orphans index 8095c9e2f6..492ba7ecd2 100755 --- a/bin/swift-orphans +++ b/bin/swift-orphans @@ -12,121 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function -import optparse -import os -import re -import signal -import subprocess -import sys - -from swift.common.manager import RUN_DIR +from swift.cli import orphans if __name__ == '__main__': - parser = optparse.OptionParser(usage='''%prog [options] - -Lists and optionally kills orphaned Swift processes. This is done by scanning -/var/run/swift for .pid files and listing any processes that look like Swift -processes but aren't associated with the pids in those .pid files. Any Swift -processes running with the 'once' parameter are ignored, as those are usually -for full-speed audit scans and such. - -Example (sends SIGTERM to all orphaned Swift processes older than two hours): -%prog -a 2 -k TERM - '''.strip()) - parser.add_option('-a', '--age', dest='hours', type='int', default=24, - help="look for processes at least HOURS old; " - "default: 24") - parser.add_option('-k', '--kill', dest='signal', - help='send SIGNAL to matched processes; default: just ' - 'list process information') - parser.add_option('-w', '--wide', dest='wide', default=False, - action='store_true', - help="don't clip the listing at 80 characters") - parser.add_option('-r', '--run-dir', type="str", - dest="run_dir", default=RUN_DIR, - help="alternative directory to store running pid files " - "default: %s" % RUN_DIR) - (options, args) = parser.parse_args() - - pids = [] - - for root, directories, files in os.walk(options.run_dir): - for name in files: - if name.endswith(('.pid', '.pid.d')): - pids.append(open(os.path.join(root, name)).read().strip()) - pids.extend(subprocess.Popen( - ['ps', '--ppid', pids[-1], '-o', 'pid', '--no-headers'], - stdout=subprocess.PIPE).communicate()[0].decode().split()) - - listing = [] - swift_cmd_re = re.compile( - '^/usr/bin/python[23]? /usr(?:/local)?/bin/swift-') - for line in subprocess.Popen( - ['ps', '-eo', 'etime,pid,args', '--no-headers'], - stdout=subprocess.PIPE).communicate()[0].split(b'\n'): - if not line: - continue - hours = 0 - try: - etime, pid, args = line.decode('ascii').split(None, 2) - except ValueError: - sys.exit('Could not process ps line %r' % line) - if pid in pids: - continue - if any([ - not swift_cmd_re.match(args), - 'swift-orphans' in args, - 'once' in args.split(), - ]): - continue - args = args.split('swift-', 1)[1] - etime = etime.split('-') - if len(etime) == 2: - hours = int(etime[0]) * 24 - etime = etime[1] - elif len(etime) == 1: - etime = etime[0] - else: - sys.exit('Could not process etime value from %r' % line) - etime = etime.split(':') - if len(etime) == 3: - hours += int(etime[0]) - elif len(etime) != 2: - sys.exit('Could not process etime value from %r' % line) - if hours >= options.hours: - listing.append((str(hours), pid, args)) - - if not listing: - sys.exit() - - hours_len = len('Hours') - pid_len = len('PID') - args_len = len('Command') - for hours, pid, args in listing: - hours_len = max(hours_len, len(hours)) - pid_len = max(pid_len, len(pid)) - args_len = max(args_len, len(args)) - args_len = min(args_len, 78 - hours_len - pid_len) - - print('%*s %*s %s' % - (hours_len, 'Hours', pid_len, 'PID', 'Command')) - for hours, pid, args in listing: - print('%*s %*s %s' % - (hours_len, hours, pid_len, pid, args[:args_len])) - - if options.signal: - try: - signum = int(options.signal) - except ValueError: - signum = getattr(signal, options.signal.upper(), - getattr(signal, 'SIG' + options.signal.upper(), - None)) - if not signum: - sys.exit('Could not translate %r to a signal number.' % - options.signal) - print('Sending processes %s (%d) signal...' % (options.signal, signum), - end='') - for hours, pid, args in listing: - os.kill(int(pid), signum) - print('Done.') + orphans.main() diff --git a/bin/swift-reconciler-enqueue b/bin/swift-reconciler-enqueue index 2a9dcc3a55..34a187cd55 100755 --- a/bin/swift-reconciler-enqueue +++ b/bin/swift-reconciler-enqueue @@ -11,65 +11,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function -import sys -from optparse import OptionParser - -import eventlet.debug -eventlet.debug.hub_exceptions(True) - -from swift.common.ring import Ring -from swift.common.utils import split_path -from swift.common.storage_policy import POLICIES - -from swift.container.reconciler import add_to_reconciler_queue -""" -This tool is primarily for debugging and development but can be used an example -of how an operator could enqueue objects manually if a problem is discovered - -might be particularly useful if you need to hack a fix into the reconciler -and re-run it. -""" - -USAGE = """ -%prog [options] - -This script enqueues an object to be evaluated by the reconciler. - -Arguments: -policy_index: the policy the object is currently stored in. - /a/c/o: the full path of the object - utf-8 - timestamp: the timestamp of the datafile/tombstone. - -""".strip() - -parser = OptionParser(USAGE) -parser.add_option('-X', '--op', default='PUT', choices=('PUT', 'DELETE'), - help='the method of the misplaced operation') -parser.add_option('-f', '--force', action='store_true', - help='force an object to be re-enqueued') - - -def main(): - options, args = parser.parse_args() - try: - policy_index, path, timestamp = args - except ValueError: - sys.exit(parser.print_help()) - container_ring = Ring('/etc/swift/container.ring.gz') - policy = POLICIES.get_by_index(policy_index) - if not policy: - return 'ERROR: invalid storage policy index: %s' % policy - try: - account, container, obj = split_path(path, 3, 3, True) - except ValueError as e: - return 'ERROR: %s' % e - container_name = add_to_reconciler_queue( - container_ring, account, container, obj, - policy.idx, timestamp, options.op, force=options.force) - if not container_name: - return 'ERROR: unable to enqueue!' - print(container_name) - +from swift.cli import reconciler_enqueue if __name__ == "__main__": - sys.exit(main()) + reconciler_enqueue.main() diff --git a/setup.cfg b/setup.cfg index 70e2036802..9b1f367e14 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,16 +40,6 @@ skip_reno = True packages = swift -[files] -scripts = - bin/swift-account-audit - bin/swift-config - bin/swift-reconciler-enqueue - bin/swift-drive-audit - bin/swift-get-nodes - bin/swift-oldies - bin/swift-orphans - [extras] kms_keymaster = oslo.config>=4.0.0,!=4.3.0,!=4.4.0 # Apache-2.0 @@ -63,11 +53,13 @@ keystone = [entry_points] console_scripts = + swift-account-audit = swift.cli.account_audit:main swift-account-auditor = swift.account.auditor:main swift-account-info = swift.cli.info:account_main swift-account-reaper = swift.account.reaper:main swift-account-replicator = swift.account.replicator:main swift-account-server = swift.account.server:main + swift-config = swift.cli.config:main swift-container-auditor = swift.container.auditor:main swift-container-deleter = swift.cli.container_deleter:main swift-container-info = swift.cli.info:container_main @@ -79,7 +71,9 @@ console_scripts = swift-container-updater = swift.container.updater:main swift-dispersion-populate = swift.cli.dispersion_populate:main swift-dispersion-report = swift.cli.dispersion_report:main + swift-drive-audit = swift.cli.drive_audit:main swift-form-signature = swift.cli.form_signature:main + swift-get-nodes = swift.cli.get_nodes:main swift-init = swift.common.manager:main swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main swift-object-auditor = swift.obj.auditor:main @@ -90,9 +84,12 @@ console_scripts = swift-object-replicator = swift.obj.replicator:main swift-object-server = swift.obj.server:main swift-object-updater = swift.obj.updater:main + swift-oldies = swift.cli.oldies:main + swift-orphans = swift.cli.orphans:main swift-proxy-server = swift.proxy.server:main swift-recon = swift.cli.recon:main swift-recon-cron = swift.cli.recon_cron:main + swift-reconciler-enqueue = swift.cli.reconciler_enqueue:main swift-reload = swift.cli.reload:main swift-ring-builder = swift.cli.ringbuilder:error_handling_main swift-ring-builder-analyzer = swift.cli.ring_builder_analyzer:main diff --git a/swift/cli/account_audit.py b/swift/cli/account_audit.py new file mode 100755 index 0000000000..09fd7490f4 --- /dev/null +++ b/swift/cli/account_audit.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python +# Copyright (c) 2010-2012 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import os +import sys +from hashlib import md5 +import getopt +from itertools import chain + +import json +from eventlet.greenpool import GreenPool +from eventlet.event import Event +from six.moves.urllib.parse import quote + +from swift.common.ring import Ring +from swift.common.utils import split_path +from swift.common.bufferedhttp import http_connect + + +usage = """ +Usage! + +%(cmd)s [options] [url 1] [url 2] ... + -c [concurrency] Set the concurrency, default 50 + -r [ring dir] Ring locations, default /etc/swift + -e [filename] File for writing a list of inconsistent urls + -d Also download files and verify md5 + +You can also feed a list of urls to the script through stdin. + +Examples! + + %(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076 + %(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container/object + %(cmd)s -e errors.txt AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container + %(cmd)s < errors.txt + %(cmd)s -c 25 -d < errors.txt +""" % {'cmd': sys.argv[0]} + + +class Auditor(object): + def __init__(self, swift_dir='/etc/swift', concurrency=50, deep=False, + error_file=None): + self.pool = GreenPool(concurrency) + self.object_ring = Ring(swift_dir, ring_name='object') + self.container_ring = Ring(swift_dir, ring_name='container') + self.account_ring = Ring(swift_dir, ring_name='account') + self.deep = deep + self.error_file = error_file + # zero out stats + self.accounts_checked = self.account_exceptions = \ + self.account_not_found = self.account_container_mismatch = \ + self.account_object_mismatch = self.objects_checked = \ + self.object_exceptions = self.object_not_found = \ + self.object_checksum_mismatch = self.containers_checked = \ + self.container_exceptions = self.container_count_mismatch = \ + self.container_not_found = self.container_obj_mismatch = 0 + self.list_cache = {} + self.in_progress = {} + + def audit_object(self, account, container, name): + path = '/%s/%s/%s' % (account, container, name) + part, nodes = self.object_ring.get_nodes( + account, container.encode('utf-8'), name.encode('utf-8')) + container_listing = self.audit_container(account, container) + consistent = True + if name not in container_listing: + print(" Object %s missing in container listing!" % path) + consistent = False + hash = None + else: + hash = container_listing[name]['hash'] + etags = [] + for node in nodes: + try: + if self.deep: + conn = http_connect(node['ip'], node['port'], + node['device'], part, 'GET', path, {}) + resp = conn.getresponse() + calc_hash = md5() + chunk = True + while chunk: + chunk = resp.read(8192) + calc_hash.update(chunk) + calc_hash = calc_hash.hexdigest() + if resp.status // 100 != 2: + self.object_not_found += 1 + consistent = False + print(' Bad status %s GETting object "%s" on %s/%s' + % (resp.status, path, + node['ip'], node['device'])) + continue + if resp.getheader('ETag').strip('"') != calc_hash: + self.object_checksum_mismatch += 1 + consistent = False + print(' MD5 does not match etag for "%s" on %s/%s' + % (path, node['ip'], node['device'])) + else: + conn = http_connect(node['ip'], node['port'], + node['device'], part, 'HEAD', + path.encode('utf-8'), {}) + resp = conn.getresponse() + if resp.status // 100 != 2: + self.object_not_found += 1 + consistent = False + print(' Bad status %s HEADing object "%s" on %s/%s' + % (resp.status, path, + node['ip'], node['device'])) + continue + + override_etag = resp.getheader( + 'X-Object-Sysmeta-Container-Update-Override-Etag') + if override_etag: + etags.append((override_etag, node)) + else: + etags.append((resp.getheader('ETag'), node)) + except Exception: + self.object_exceptions += 1 + consistent = False + print(' Exception fetching object "%s" on %s/%s' + % (path, node['ip'], node['device'])) + continue + if not etags: + consistent = False + print(" Failed fo fetch object %s at all!" % path) + elif hash: + for etag, node in etags: + if etag.strip('"') != hash: + consistent = False + self.object_checksum_mismatch += 1 + print(' ETag mismatch for "%s" on %s/%s' + % (path, node['ip'], node['device'])) + if not consistent and self.error_file: + with open(self.error_file, 'a') as err_file: + print(path, file=err_file) + self.objects_checked += 1 + + def audit_container(self, account, name, recurse=False): + if (account, name) in self.in_progress: + self.in_progress[(account, name)].wait() + if (account, name) in self.list_cache: + return self.list_cache[(account, name)] + self.in_progress[(account, name)] = Event() + print('Auditing container "%s"' % name) + path = '/%s/%s' % (account, name) + account_listing = self.audit_account(account) + consistent = True + if name not in account_listing: + consistent = False + print(" Container %s not in account listing!" % path) + part, nodes = \ + self.container_ring.get_nodes(account, name.encode('utf-8')) + rec_d = {} + responses = {} + for node in nodes: + marker = '' + results = True + while results: + try: + conn = http_connect(node['ip'], node['port'], + node['device'], part, 'GET', + path.encode('utf-8'), {}, + 'format=json&marker=%s' % + quote(marker.encode('utf-8'))) + resp = conn.getresponse() + if resp.status // 100 != 2: + self.container_not_found += 1 + consistent = False + print(' Bad status GETting container "%s" on %s/%s' % + (path, node['ip'], node['device'])) + break + if node['id'] not in responses: + responses[node['id']] = { + h.lower(): v for h, v in resp.getheaders()} + results = json.loads(resp.read()) + except Exception: + self.container_exceptions += 1 + consistent = False + print(' Exception GETting container "%s" on %s/%s' % + (path, node['ip'], node['device'])) + break + if results: + marker = results[-1]['name'] + for obj in results: + obj_name = obj['name'] + if obj_name not in rec_d: + rec_d[obj_name] = obj + if (obj['last_modified'] != + rec_d[obj_name]['last_modified']): + self.container_obj_mismatch += 1 + consistent = False + print(" Different versions of %s/%s " + "in container dbs." % (name, obj['name'])) + if (obj['last_modified'] > + rec_d[obj_name]['last_modified']): + rec_d[obj_name] = obj + obj_counts = [int(header['x-container-object-count']) + for header in responses.values()] + if not obj_counts: + consistent = False + print(" Failed to fetch container %s at all!" % path) + else: + if len(set(obj_counts)) != 1: + self.container_count_mismatch += 1 + consistent = False + print( + " Container databases don't agree on number of objects.") + print( + " Max: %s, Min: %s" % (max(obj_counts), min(obj_counts))) + self.containers_checked += 1 + self.list_cache[(account, name)] = rec_d + self.in_progress[(account, name)].send(True) + del self.in_progress[(account, name)] + if recurse: + for obj in rec_d.keys(): + self.pool.spawn_n(self.audit_object, account, name, obj) + if not consistent and self.error_file: + with open(self.error_file, 'a') as error_file: + print(path, file=error_file) + return rec_d + + def audit_account(self, account, recurse=False): + if account in self.in_progress: + self.in_progress[account].wait() + if account in self.list_cache: + return self.list_cache[account] + self.in_progress[account] = Event() + print('Auditing account "%s"' % account) + consistent = True + path = '/%s' % account + part, nodes = self.account_ring.get_nodes(account) + responses = {} + for node in nodes: + marker = '' + results = True + while results: + node_id = node['id'] + try: + conn = http_connect(node['ip'], node['port'], + node['device'], part, 'GET', path, {}, + 'format=json&marker=%s' % + quote(marker.encode('utf-8'))) + resp = conn.getresponse() + if resp.status // 100 != 2: + self.account_not_found += 1 + consistent = False + print(" Bad status GETting account '%s' " + " from %s:%s" % + (account, node['ip'], node['device'])) + break + results = json.loads(resp.read()) + except Exception: + self.account_exceptions += 1 + consistent = False + print(" Exception GETting account '%s' on %s:%s" % + (account, node['ip'], node['device'])) + break + if node_id not in responses: + responses[node_id] = [ + {h.lower(): v for h, v in resp.getheaders()}, []] + responses[node_id][1].extend(results) + if results: + marker = results[-1]['name'] + headers = [r[0] for r in responses.values()] + cont_counts = [int(header['x-account-container-count']) + for header in headers] + if len(set(cont_counts)) != 1: + self.account_container_mismatch += 1 + consistent = False + print(" Account databases for '%s' don't agree on" + " number of containers." % account) + if cont_counts: + print(" Max: %s, Min: %s" % (max(cont_counts), + min(cont_counts))) + obj_counts = [int(header['x-account-object-count']) + for header in headers] + if len(set(obj_counts)) != 1: + self.account_object_mismatch += 1 + consistent = False + print(" Account databases for '%s' don't agree on" + " number of objects." % account) + if obj_counts: + print(" Max: %s, Min: %s" % (max(obj_counts), + min(obj_counts))) + containers = set() + for resp in responses.values(): + containers.update(container['name'] for container in resp[1]) + self.list_cache[account] = containers + self.in_progress[account].send(True) + del self.in_progress[account] + self.accounts_checked += 1 + if recurse: + for container in containers: + self.pool.spawn_n(self.audit_container, account, + container, True) + if not consistent and self.error_file: + with open(self.error_file, 'a') as error_file: + print(path, error_file) + return containers + + def audit(self, account, container=None, obj=None): + if obj and container: + self.pool.spawn_n(self.audit_object, account, container, obj) + elif container: + self.pool.spawn_n(self.audit_container, account, container, True) + else: + self.pool.spawn_n(self.audit_account, account, True) + + def wait(self): + self.pool.waitall() + + def print_stats(self): + + def _print_stat(name, stat): + # Right align stat name in a field of 18 characters + print("{0:>18}: {1}".format(name, stat)) + + print() + _print_stat("Accounts checked", self.accounts_checked) + if self.account_not_found: + _print_stat("Missing Replicas", self.account_not_found) + if self.account_exceptions: + _print_stat("Exceptions", self.account_exceptions) + if self.account_container_mismatch: + _print_stat("Container mismatch", self.account_container_mismatch) + if self.account_object_mismatch: + _print_stat("Object mismatch", self.account_object_mismatch) + print() + _print_stat("Containers checked", self.containers_checked) + if self.container_not_found: + _print_stat("Missing Replicas", self.container_not_found) + if self.container_exceptions: + _print_stat("Exceptions", self.container_exceptions) + if self.container_count_mismatch: + _print_stat("Count mismatch", self.container_count_mismatch) + if self.container_obj_mismatch: + _print_stat("Object mismatch", self.container_obj_mismatch) + print() + _print_stat("Objects checked", self.objects_checked) + if self.object_not_found: + _print_stat("Missing Replicas", self.object_not_found) + if self.object_exceptions: + _print_stat("Exceptions", self.object_exceptions) + if self.object_checksum_mismatch: + _print_stat("MD5 Mismatch", self.object_checksum_mismatch) + + +def main(): + try: + optlist, args = getopt.getopt(sys.argv[1:], 'c:r:e:d') + except getopt.GetoptError as err: + print(str(err)) + print(usage) + sys.exit(2) + if not args and os.isatty(sys.stdin.fileno()): + print(usage) + sys.exit() + opts = dict(optlist) + options = { + 'concurrency': int(opts.get('-c', 50)), + 'error_file': opts.get('-e', None), + 'swift_dir': opts.get('-r', '/etc/swift'), + 'deep': '-d' in opts, + } + auditor = Auditor(**options) + if not os.isatty(sys.stdin.fileno()): + args = chain(args, sys.stdin) + for path in args: + path = '/' + path.rstrip('\r\n').lstrip('/') + auditor.audit(*split_path(path, 1, 3, True)) + auditor.wait() + auditor.print_stats() + + +if __name__ == '__main__': + main() diff --git a/swift/cli/config.py b/swift/cli/config.py new file mode 100755 index 0000000000..1a2db221ce --- /dev/null +++ b/swift/cli/config.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import optparse +import os +import sys + +from swift.common.manager import Server +from swift.common.utils import readconf +from swift.common.wsgi import appconfig + +parser = optparse.OptionParser('%prog [options] SERVER') +parser.add_option('-c', '--config-num', metavar="N", type="int", + dest="number", default=0, + help="parse config for the Nth server only") +parser.add_option('-s', '--section', help="only display matching sections") +parser.add_option('-w', '--wsgi', action='store_true', + help="use wsgi/paste parser instead of readconf") + + +def _context_name(context): + return ':'.join((context.object_type.name, context.name)) + + +def inspect_app_config(app_config): + conf = {} + context = app_config.context + section_name = _context_name(context) + conf[section_name] = context.config() + if context.object_type.name == 'pipeline': + filters = context.filter_contexts + pipeline = [] + for filter_context in filters: + conf[_context_name(filter_context)] = filter_context.config() + pipeline.append(filter_context.entry_point_name) + app_context = context.app_context + conf[_context_name(app_context)] = app_context.config() + pipeline.append(app_context.entry_point_name) + conf[section_name]['pipeline'] = ' '.join(pipeline) + return conf + + +def main(): + options, args = parser.parse_args() + options = dict(vars(options)) + + if not args: + return 'ERROR: specify type of server or conf_path' + conf_files = [] + for arg in args: + if os.path.exists(arg): + conf_files.append(arg) + else: + conf_files += Server(arg).conf_files(**options) + for conf_file in conf_files: + print('# %s' % conf_file) + if options['wsgi']: + app_config = appconfig(conf_file) + conf = inspect_app_config(app_config) + else: + conf = readconf(conf_file) + flat_vars = {} + for k, v in conf.items(): + if options['section'] and k != options['section']: + continue + if not isinstance(v, dict): + flat_vars[k] = v + continue + print('[%s]' % k) + for opt, value in v.items(): + print('%s = %s' % (opt, value)) + print() + for k, v in flat_vars.items(): + print('# %s = %s' % (k, v)) + print() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/swift/cli/drive_audit.py b/swift/cli/drive_audit.py new file mode 100755 index 0000000000..5c74e52c3a --- /dev/null +++ b/swift/cli/drive_audit.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# Copyright (c) 2010-2012 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import glob +import locale +import os +import os.path +import re +import subprocess +import sys + + +import six +from six.moves.configparser import ConfigParser + +from swift.common.utils import backward, get_logger, dump_recon_cache, \ + config_true_value + + +def get_devices(device_dir, logger): + devices = [] + majmin_devices = {} + + # List /dev/block + # Using os.scandir on recent versions of python, else os.listdir + if 'scandir' in dir(os): + with os.scandir("/dev/block") as it: + for ent in it: + if ent.is_symlink(): + dev_name = os.path.basename(os.readlink(ent.path)) + majmin = os.path.basename(ent.path).split(':') + majmin_devices[dev_name] = {'major': majmin[0], + 'minor': majmin[1]} + else: + for ent in os.listdir("/dev/block"): + ent_path = os.path.join("/dev/block", ent) + if os.path.is_symlink(ent_path): + dev_name = os.path.basename(os.readlink(ent_path)) + majmin = os.path.basename(ent_path).split(':') + majmin_devices[dev_name] = {'major': majmin[0], + 'minor': majmin[1]} + + for line in open('/proc/mounts').readlines(): + data = line.strip().split() + block_device = data[0] + mount_point = data[1] + if mount_point.startswith(device_dir): + device = {} + device['mount_point'] = mount_point + device['block_device'] = block_device + dev_name = os.path.basename(block_device) + if dev_name in majmin_devices: + # If symlink is in /dev/block + device['major'] = majmin_devices[dev_name]['major'] + device['minor'] = majmin_devices[dev_name]['minor'] + else: + # Else we try to stat block_device + try: + device_num = os.stat(block_device).st_rdev + except OSError: + # If we can't stat the device, + # then something weird is going on + logger.error( + 'Could not determine major:minor numbers for %s ' + '(mounted at %s)! Skipping...', + block_device, mount_point) + continue + device['major'] = str(os.major(device_num)) + device['minor'] = str(os.minor(device_num)) + devices.append(device) + for line in open('/proc/partitions').readlines()[2:]: + major, minor, blocks, kernel_device = line.strip().split() + device = [d for d in devices + if d['major'] == major and d['minor'] == minor] + if device: + device[0]['kernel_device'] = kernel_device + return devices + + +def get_errors(error_re, log_file_pattern, minutes, logger, + log_file_encoding): + # Assuming log rotation is being used, we need to examine + # recently rotated files in case the rotation occurred + # just before the script is being run - the data we are + # looking for may have rotated. + # + # The globbing used before would not work with all out-of-box + # distro setup for logrotate and syslog therefore moving this + # to the config where one can set it with the desired + # globbing pattern. + log_files = [f for f in glob.glob(log_file_pattern)] + try: + log_files.sort(key=lambda f: os.stat(f).st_mtime, reverse=True) + except (IOError, OSError) as exc: + logger.error(exc) + print(exc) + sys.exit(1) + + now_time = datetime.datetime.now() + end_time = now_time - datetime.timedelta(minutes=minutes) + # kern.log does not contain the year so we need to keep + # track of the year and month in case the year recently + # ticked over + year = now_time.year + prev_ent_month = now_time.strftime('%b') + errors = {} + + reached_old_logs = False + for path in log_files: + try: + f = open(path, 'rb') + except IOError: + logger.error("Error: Unable to open " + path) + print("Unable to open " + path) + sys.exit(1) + for line in backward(f): + if not six.PY2: + line = line.decode(log_file_encoding, 'surrogateescape') + if '[ 0.000000]' in line \ + or 'KERNEL supported cpus:' in line \ + or 'BIOS-provided physical RAM map:' in line: + # Ignore anything before the last boot + reached_old_logs = True + break + # Solves the problem with year change - kern.log does not + # keep track of the year. + log_time_ent = line.split()[:3] + if log_time_ent[0] == 'Dec' and prev_ent_month == 'Jan': + year -= 1 + prev_ent_month = log_time_ent[0] + log_time_string = '%d %s' % (year, ' '.join(log_time_ent)) + try: + log_time = datetime.datetime.strptime( + log_time_string, '%Y %b %d %H:%M:%S') + except ValueError: + # Some versions use ISO timestamps instead + try: + log_time = datetime.datetime.strptime( + line[0:19], '%Y-%m-%dT%H:%M:%S') + except ValueError: + continue + if log_time > end_time: + for err in error_re: + for device in err.findall(line): + errors[device] = errors.get(device, 0) + 1 + else: + reached_old_logs = True + break + if reached_old_logs: + break + return errors + + +def comment_fstab(mount_point): + with open('/etc/fstab', 'r') as fstab: + with open('/etc/fstab.new', 'w') as new_fstab: + for line in fstab: + parts = line.split() + if len(parts) > 2 \ + and parts[1] == mount_point \ + and not line.startswith('#'): + new_fstab.write('#' + line) + else: + new_fstab.write(line) + os.rename('/etc/fstab.new', '/etc/fstab') + + +def main(): + c = ConfigParser() + try: + conf_path = sys.argv[1] + except Exception: + print("Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1]) + sys.exit(1) + if not c.read(conf_path): + print("Unable to read config file %s" % conf_path) + sys.exit(1) + conf = dict(c.items('drive-audit')) + device_dir = conf.get('device_dir', '/srv/node') + minutes = int(conf.get('minutes', 60)) + error_limit = int(conf.get('error_limit', 1)) + recon_cache_path = conf.get('recon_cache_path', "/var/cache/swift") + log_file_pattern = conf.get('log_file_pattern', + '/var/log/kern.*[!.][!g][!z]') + log_file_encoding = conf.get('log_file_encoding', 'auto') + if log_file_encoding == 'auto': + log_file_encoding = locale.getpreferredencoding() + log_to_console = config_true_value(conf.get('log_to_console', False)) + error_re = [] + for conf_key in conf: + if conf_key.startswith('regex_pattern_'): + error_pattern = conf[conf_key] + try: + r = re.compile(error_pattern) + except re.error: + sys.exit('Error: unable to compile regex pattern "%s"' % + error_pattern) + error_re.append(r) + if not error_re: + error_re = [ + re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'), + re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'), + ] + conf['log_name'] = conf.get('log_name', 'drive-audit') + logger = get_logger(conf, log_to_console=log_to_console, + log_route='drive-audit') + devices = get_devices(device_dir, logger) + logger.debug("Devices found: %s" % str(devices)) + if not devices: + logger.error("Error: No devices found!") + recon_errors = {} + total_errors = 0 + for device in devices: + recon_errors[device['mount_point']] = 0 + errors = get_errors(error_re, log_file_pattern, minutes, logger, + log_file_encoding) + logger.debug("Errors found: %s" % str(errors)) + unmounts = 0 + for kernel_device, count in errors.items(): + if count >= error_limit: + device = \ + [d for d in devices if d['kernel_device'] == kernel_device] + if device: + mount_point = device[0]['mount_point'] + if mount_point.startswith(device_dir): + if config_true_value(conf.get('unmount_failed_device', + True)): + logger.info("Unmounting %s with %d errors" % + (mount_point, count)) + subprocess.call(['umount', '-fl', mount_point]) + logger.info("Commenting out %s from /etc/fstab" % + (mount_point)) + comment_fstab(mount_point) + unmounts += 1 + else: + logger.info("Detected %s with %d errors " + "(Device not unmounted)" % + (mount_point, count)) + recon_errors[mount_point] = count + total_errors += count + recon_file = recon_cache_path + "/drive.recon" + dump_recon_cache(recon_errors, recon_file, logger) + dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger, + set_owner=conf.get("user", "swift")) + + if unmounts == 0: + logger.info("No drives were unmounted") + elif os.path.isdir("/run/systemd/system"): + logger.debug("fstab updated, calling systemctl daemon-reload") + subprocess.call(["/usr/bin/systemctl", "daemon-reload"]) + + +if __name__ == '__main__': + main() diff --git a/swift/cli/get_nodes.py b/swift/cli/get_nodes.py new file mode 100755 index 0000000000..7586cd3b46 --- /dev/null +++ b/swift/cli/get_nodes.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# Copyright (c) 2010-2012 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from optparse import OptionParser +from os.path import basename + +from swift.common.ring import Ring +from swift.common.storage_policy import reload_storage_policies +from swift.common.utils import set_swift_dir +from swift.cli.info import (parse_get_node_args, print_item_locations, + InfoSystemExit) + + +def main(): + + usage = ''' + Shows the nodes responsible for the item specified. + Usage: %prog [-a] [ []] + Or: %prog [-a] -p partition + Or: %prog [-a] -P policy_name [ []] + Or: %prog [-a] -P policy_name -p partition + Note: account, container, object can also be a single arg separated by / + Example: + $ %prog -a /etc/swift/account.ring.gz MyAccount + Partition 5743883 + Hash 96ae332a60b58910784e4417a03e1ad0 + 10.1.1.7:8000 sdd1 + 10.1.9.2:8000 sdb1 + 10.1.5.5:8000 sdf1 + 10.1.5.9:8000 sdt1 # [Handoff] + ''' + parser = OptionParser(usage) + parser.add_option('-a', '--all', action='store_true', + help='Show all handoff nodes') + parser.add_option('-p', '--partition', metavar='PARTITION', + help='Show nodes for a given partition') + parser.add_option('-P', '--policy-name', dest='policy_name', + help='Specify which policy to use') + parser.add_option('-d', '--swift-dir', default='/etc/swift', + dest='swift_dir', help='Path to swift directory') + parser.add_option('-Q', '--quoted', action='store_true', + help='Assume swift paths are quoted') + options, args = parser.parse_args() + + if set_swift_dir(options.swift_dir): + reload_storage_policies() + + try: + ring_path, args = parse_get_node_args(options, args) + except InfoSystemExit as e: + parser.print_help() + sys.exit('ERROR: %s' % e) + + ring = ring_name = None + if ring_path: + ring_name = basename(ring_path)[:-len('.ring.gz')] + ring = Ring(ring_path) + + try: + print_item_locations(ring, ring_name, *args, **vars(options)) + except InfoSystemExit: + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/swift/cli/oldies.py b/swift/cli/oldies.py new file mode 100755 index 0000000000..7c6abe0bb2 --- /dev/null +++ b/swift/cli/oldies.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import optparse +import subprocess +import sys + + +def main(): + parser = optparse.OptionParser(usage='''%prog [options] + +Lists old Swift processes. + '''.strip()) + parser.add_option('-a', '--age', dest='hours', type='int', default=720, + help='look for processes at least HOURS old; ' + 'default: 720 (30 days)') + parser.add_option('-p', '--pids', action='store_true', + help='only print the pids found; for example, to pipe ' + 'to xargs kill') + (options, args) = parser.parse_args() + + listing = [] + for line in subprocess.Popen( + ['ps', '-eo', 'etime,pid,args', '--no-headers'], + stdout=subprocess.PIPE).communicate()[0].split(b'\n'): + if not line: + continue + hours = 0 + try: + etime, pid, args = line.decode('ascii').split(None, 2) + except ValueError: + # This covers both decoding and not-enough-values-to-unpack errors + sys.exit('Could not process ps line %r' % line) + if not args.startswith(( + '/usr/bin/python /usr/bin/swift-', + '/usr/bin/python /usr/local/bin/swift-', + '/bin/python /usr/bin/swift-', + '/usr/bin/python3 /usr/bin/swift-', + '/usr/bin/python3 /usr/local/bin/swift-', + '/bin/python3 /usr/bin/swift-')): + continue + args = args.split('-', 1)[1] + etime = etime.split('-') + if len(etime) == 2: + hours = int(etime[0]) * 24 + etime = etime[1] + elif len(etime) == 1: + etime = etime[0] + else: + sys.exit('Could not process etime value from %r' % line) + etime = etime.split(':') + if len(etime) == 3: + hours += int(etime[0]) + elif len(etime) != 2: + sys.exit('Could not process etime value from %r' % line) + if hours >= options.hours: + listing.append((str(hours), pid, args)) + + if not listing: + sys.exit() + + if options.pids: + for hours, pid, args in listing: + print(pid) + else: + hours_len = len('Hours') + pid_len = len('PID') + args_len = len('Command') + for hours, pid, args in listing: + hours_len = max(hours_len, len(hours)) + pid_len = max(pid_len, len(pid)) + args_len = max(args_len, len(args)) + args_len = min(args_len, 78 - hours_len - pid_len) + + print('%*s %*s %s' % (hours_len, 'Hours', pid_len, 'PID', 'Command')) + for hours, pid, args in listing: + print('%*s %*s %s' % (hours_len, hours, pid_len, + pid, args[:args_len])) + + +if __name__ == '__main__': + main() diff --git a/swift/cli/orphans.py b/swift/cli/orphans.py new file mode 100755 index 0000000000..f819b2c220 --- /dev/null +++ b/swift/cli/orphans.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import optparse +import os +import re +import signal +import subprocess +import sys + +from swift.common.manager import RUN_DIR + + +def main(): + parser = optparse.OptionParser(usage='''%prog [options] + +Lists and optionally kills orphaned Swift processes. This is done by scanning +/var/run/swift for .pid files and listing any processes that look like Swift +processes but aren't associated with the pids in those .pid files. Any Swift +processes running with the 'once' parameter are ignored, as those are usually +for full-speed audit scans and such. + +Example (sends SIGTERM to all orphaned Swift processes older than two hours): +%prog -a 2 -k TERM + '''.strip()) + parser.add_option('-a', '--age', dest='hours', type='int', default=24, + help="look for processes at least HOURS old; " + "default: 24") + parser.add_option('-k', '--kill', dest='signal', + help='send SIGNAL to matched processes; default: just ' + 'list process information') + parser.add_option('-w', '--wide', dest='wide', default=False, + action='store_true', + help="don't clip the listing at 80 characters") + parser.add_option('-r', '--run-dir', type="str", + dest="run_dir", default=RUN_DIR, + help="alternative directory to store running pid files " + "default: %s" % RUN_DIR) + (options, args) = parser.parse_args() + + pids = [] + + for root, directories, files in os.walk(options.run_dir): + for name in files: + if name.endswith(('.pid', '.pid.d')): + pids.append(open(os.path.join(root, name)).read().strip()) + pids.extend(subprocess.Popen( + ['ps', '--ppid', pids[-1], '-o', 'pid', '--no-headers'], + stdout=subprocess.PIPE).communicate()[0].decode().split()) + + listing = [] + swift_cmd_re = re.compile( + '^/usr/bin/python[23]? /usr(?:/local)?/bin/swift-') + for line in subprocess.Popen( + ['ps', '-eo', 'etime,pid,args', '--no-headers'], + stdout=subprocess.PIPE).communicate()[0].split(b'\n'): + if not line: + continue + hours = 0 + try: + etime, pid, args = line.decode('ascii').split(None, 2) + except ValueError: + sys.exit('Could not process ps line %r' % line) + if pid in pids: + continue + if any([ + not swift_cmd_re.match(args), + 'swift-orphans' in args, + 'once' in args.split(), + ]): + continue + args = args.split('swift-', 1)[1] + etime = etime.split('-') + if len(etime) == 2: + hours = int(etime[0]) * 24 + etime = etime[1] + elif len(etime) == 1: + etime = etime[0] + else: + sys.exit('Could not process etime value from %r' % line) + etime = etime.split(':') + if len(etime) == 3: + hours += int(etime[0]) + elif len(etime) != 2: + sys.exit('Could not process etime value from %r' % line) + if hours >= options.hours: + listing.append((str(hours), pid, args)) + + if not listing: + sys.exit() + + hours_len = len('Hours') + pid_len = len('PID') + args_len = len('Command') + for hours, pid, args in listing: + hours_len = max(hours_len, len(hours)) + pid_len = max(pid_len, len(pid)) + args_len = max(args_len, len(args)) + args_len = min(args_len, 78 - hours_len - pid_len) + + print('%*s %*s %s' % + (hours_len, 'Hours', pid_len, 'PID', 'Command')) + for hours, pid, args in listing: + print('%*s %*s %s' % + (hours_len, hours, pid_len, pid, args[:args_len])) + + if options.signal: + try: + signum = int(options.signal) + except ValueError: + signum = getattr(signal, options.signal.upper(), + getattr(signal, 'SIG' + options.signal.upper(), + None)) + if not signum: + sys.exit('Could not translate %r to a signal number.' % + options.signal) + print('Sending processes %s (%d) signal...' % (options.signal, signum), + end='') + for hours, pid, args in listing: + os.kill(int(pid), signum) + print('Done.') + + +if __name__ == '__main__': + main() diff --git a/swift/cli/reconciler_enqueue.py b/swift/cli/reconciler_enqueue.py new file mode 100644 index 0000000000..367b0b5e08 --- /dev/null +++ b/swift/cli/reconciler_enqueue.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +from optparse import OptionParser + +import eventlet.debug + +from swift.common.ring import Ring +from swift.common.utils import split_path +from swift.common.storage_policy import POLICIES + +from swift.container.reconciler import add_to_reconciler_queue +""" +This tool is primarily for debugging and development but can be used an example +of how an operator could enqueue objects manually if a problem is discovered - +might be particularly useful if you need to hack a fix into the reconciler +and re-run it. +""" + +USAGE = """ +%prog [options] + +This script enqueues an object to be evaluated by the reconciler. + +Arguments: +policy_index: the policy the object is currently stored in. + /a/c/o: the full path of the object - utf-8 + timestamp: the timestamp of the datafile/tombstone. + +""".strip() + +parser = OptionParser(USAGE) +parser.add_option('-X', '--op', default='PUT', choices=('PUT', 'DELETE'), + help='the method of the misplaced operation') +parser.add_option('-f', '--force', action='store_true', + help='force an object to be re-enqueued') + + +def main(): + eventlet.debug.hub_exceptions(True) + options, args = parser.parse_args() + try: + policy_index, path, timestamp = args + except ValueError: + sys.exit(parser.print_help()) + container_ring = Ring('/etc/swift/container.ring.gz') + policy = POLICIES.get_by_index(policy_index) + if not policy: + return 'ERROR: invalid storage policy index: %s' % policy + try: + account, container, obj = split_path(path, 3, 3, True) + except ValueError as e: + return 'ERROR: %s' % e + container_name = add_to_reconciler_queue( + container_ring, account, container, obj, + policy.idx, timestamp, options.op, force=options.force) + if not container_name: + return 'ERROR: unable to enqueue!' + print(container_name) + + +if __name__ == "__main__": + sys.exit(main())