#! /usr/bin/env python # Copyright (c) 2010-2012 OpenStack, LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import cPickle as pickle from array import array from errno import EEXIST from gzip import GzipFile from itertools import islice, izip from os import mkdir from os.path import basename, dirname, exists, join as pathjoin from sys import argv, exit, modules from textwrap import wrap from time import time from swift.common import exceptions from swift.common.ring import RingBuilder MAJOR_VERSION = 1 MINOR_VERSION = 3 EXIT_SUCCESS = 0 EXIT_WARNING = 1 EXIT_ERROR = 2 def search_devs(builder, search_value): """ The can be of the form: dz-:/_ Any part is optional, but you must include at least one part. Examples: d74 Matches the device id 74 z1 Matches devices in zone 1 z1-1.2.3.4 Matches devices in zone 1 with the ip 1.2.3.4 1.2.3.4 Matches devices in any zone with the ip 1.2.3.4 z1:5678 Matches devices in zone 1 using port 5678 :5678 Matches devices that use port 5678 /sdb1 Matches devices with the device name sdb1 _shiny Matches devices with shiny in the meta data _"snet: 5.6.7.8" Matches devices with snet: 5.6.7.8 in the meta data [::1] Matches devices in any zone with the ip ::1 z1-[::1]:5678 Matches devices in zone 1 with ip ::1 and port 5678 Most specific example: d74z1-1.2.3.4:5678/sdb1_"snet: 5.6.7.8" Nerd explanation: All items require their single character prefix except the ip, in which case the - is optional unless the device id or zone is also included. """ orig_search_value = search_value match = [] if search_value.startswith('d'): i = 1 while i < len(search_value) and search_value[i].isdigit(): i += 1 match.append(('id', int(search_value[1:i]))) search_value = search_value[i:] if search_value.startswith('z'): i = 1 while i < len(search_value) and search_value[i].isdigit(): i += 1 match.append(('zone', int(search_value[1:i]))) search_value = search_value[i:] if search_value.startswith('-'): search_value = search_value[1:] if len(search_value) and search_value[0].isdigit(): i = 1 while i < len(search_value) and search_value[i] in '0123456789.': i += 1 match.append(('ip', search_value[:i])) search_value = search_value[i:] elif len(search_value) and search_value[0] == '[': i = 1 while i < len(search_value) and search_value[i] != ']': i += 1 i += 1 match.append(('ip', search_value[:i].lstrip('[').rstrip(']'))) search_value = search_value[i:] if search_value.startswith(':'): i = 1 while i < len(search_value) and search_value[i].isdigit(): i += 1 match.append(('port', int(search_value[1:i]))) search_value = search_value[i:] if search_value.startswith('/'): i = 1 while i < len(search_value) and search_value[i] != '_': i += 1 match.append(('device', search_value[1:i])) search_value = search_value[i:] if search_value.startswith('_'): match.append(('meta', search_value[1:])) search_value = '' if search_value: raise ValueError('Invalid : %s' % repr(orig_search_value)) devs = [] for dev in builder.devs: if not dev: continue matched = True for key, value in match: if key == 'meta': if value not in dev.get(key): matched = False elif dev.get(key) != value: matched = False if matched: devs.append(dev) return devs def format_device(dev): """ Format a device for display. """ if ':' in dev['ip']: return 'd%(id)sz%(zone)s-[%(ip)s]:%(port)s/%(device)s_"%(meta)s"' % dev else: return 'd%(id)sz%(zone)s-%(ip)s:%(port)s/%(device)s_"%(meta)s"' % dev class Commands: def unknown(): print 'Unknown command: %s' % argv[2] exit(EXIT_ERROR) def create(): """ swift-ring-builder create Creates with 2^ partitions and . is number of hours to restrict moving a partition more than once. """ if len(argv) < 6: print Commands.create.__doc__.strip() exit(EXIT_ERROR) builder = RingBuilder(int(argv[3]), int(argv[4]), int(argv[5])) backup_dir = pathjoin(dirname(argv[1]), 'backups') try: mkdir(backup_dir) except OSError, err: if err.errno != EEXIST: raise pickle.dump(builder.to_dict(), open(pathjoin(backup_dir, '%d.' % time() + basename(argv[1])), 'wb'), protocol=2) pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(EXIT_SUCCESS) def default(): """ swift-ring-builder Shows information about the ring and the devices within. """ print '%s, build version %d' % (argv[1], builder.version) zones = 0 balance = 0 if builder.devs: zones = len(set(d['zone'] for d in builder.devs if d is not None)) balance = builder.get_balance() print '%d partitions, %d replicas, %d zones, %d devices, %.02f ' \ 'balance' % (builder.parts, builder.replicas, zones, len([d for d in builder.devs if d]), balance) print 'The minimum number of hours before a partition can be ' \ 'reassigned is %s' % builder.min_part_hours if builder.devs: print 'Devices: id zone ip address port name ' \ 'weight partitions balance meta' weighted_parts = builder.parts * builder.replicas / \ sum(d['weight'] for d in builder.devs if d is not None) for dev in builder.devs: if dev is None: continue if not dev['weight']: if dev['parts']: balance = 999.99 else: balance = 0 else: balance = 100.0 * dev['parts'] / \ (dev['weight'] * weighted_parts) - 100.0 print ' %5d %5d %15s %5d %9s %6.02f %10s %7.02f %s' % \ (dev['id'], dev['zone'], dev['ip'], dev['port'], dev['device'], dev['weight'], dev['parts'], balance, dev['meta']) exit(EXIT_SUCCESS) def search(): """ swift-ring-builder search Shows information about matching devices. """ if len(argv) < 4: print Commands.search.__doc__.strip() print print search_devs.__doc__.strip() exit(EXIT_ERROR) devs = search_devs(builder, argv[3]) if not devs: print 'No matching devices found' exit(EXIT_ERROR) print 'Devices: id zone ip address port name ' \ 'weight partitions balance meta' weighted_parts = builder.parts * builder.replicas / \ sum(d['weight'] for d in builder.devs if d is not None) for dev in devs: if not dev['weight']: if dev['parts']: balance = 999.99 else: balance = 0 else: balance = 100.0 * dev['parts'] / \ (dev['weight'] * weighted_parts) - 100.0 print ' %5d %5d %15s %5d %9s %6.02f %10s %7.02f %s' % \ (dev['id'], dev['zone'], dev['ip'], dev['port'], dev['device'], dev['weight'], dev['parts'], balance, dev['meta']) exit(EXIT_SUCCESS) def list_parts(): """ swift-ring-builder list_parts [] .. Returns a 2 column list of all the partitions that are assigned to any of the devices matching the search values given. The first column is the assigned partition number and the second column is the number of device matches for that partition. The list is ordered from most number of matches to least. If there are a lot of devices to match against, this command could take a while to run. """ if len(argv) < 4: print Commands.list_parts.__doc__.strip() print print search_devs.__doc__.strip() exit(EXIT_ERROR) devs = [] for arg in argv[3:]: devs.extend(search_devs(builder, arg) or []) if not devs: print 'No matching devices found' exit(EXIT_ERROR) devs = [d['id'] for d in devs] matches = [array('i') for x in xrange(builder.replicas)] for part in xrange(builder.parts): count = len([d for d in builder.get_part_devices(part) if d['id'] in devs]) if count: matches[builder.replicas - count].append(part) print 'Partition Matches' for index, parts in enumerate(matches): for part in parts: print '%9d %7d' % (part, builder.replicas - index) exit(EXIT_SUCCESS) def add(): """ swift-ring-builder add z-:/_ [z-:/_ ] ... Adds devices to the ring with the given information. No partitions will be assigned to the new device until after running 'rebalance'. This is so you can make multiple device changes and rebalance them all just once. """ if len(argv) < 5 or len(argv) % 2 != 1: print Commands.add.__doc__.strip() exit(EXIT_ERROR) devs_and_weights = izip(islice(argv, 3, len(argv), 2), islice(argv, 4, len(argv), 2)) for devstr, weightstr in devs_and_weights: if not devstr.startswith('z'): print 'Invalid add value: %s' % devstr exit(EXIT_ERROR) i = 1 while i < len(devstr) and devstr[i].isdigit(): i += 1 zone = int(devstr[1:i]) rest = devstr[i:] if not rest.startswith('-'): print 'Invalid add value: %s' % devstr print "The on-disk ring builder is unchanged.\n" exit(EXIT_ERROR) i = 1 if rest[i] == '[': i += 1 while i < len(rest) and rest[i] != ']': i += 1 i += 1 ip = rest[1:i].lstrip('[').rstrip(']') rest = rest[i:] else: while i < len(rest) and rest[i] in '0123456789.': i += 1 ip = rest[1:i] rest = rest[i:] if not rest.startswith(':'): print 'Invalid add value: %s' % devstr print "The on-disk ring builder is unchanged.\n" exit(EXIT_ERROR) i = 1 while i < len(rest) and rest[i].isdigit(): i += 1 port = int(rest[1:i]) rest = rest[i:] if not rest.startswith('/'): print 'Invalid add value: %s' % devstr print "The on-disk ring builder is unchanged.\n" exit(EXIT_ERROR) i = 1 while i < len(rest) and rest[i] != '_': i += 1 device_name = rest[1:i] rest = rest[i:] meta = '' if rest.startswith('_'): meta = rest[1:] try: weight = float(weightstr) except ValueError: print 'Invalid weight value: %s' % weightstr print "The on-disk ring builder is unchanged.\n" exit(EXIT_ERROR) if weight < 0: print 'Invalid weight value (must be positive): %s' % weightstr print "The on-disk ring builder is unchanged.\n" exit(EXIT_ERROR) for dev in builder.devs: if dev is None: continue if dev['ip'] == ip and dev['port'] == port and \ dev['device'] == device_name: print 'Device %d already uses %s:%d/%s.' % \ (dev['id'], dev['ip'], dev['port'], dev['device']) print "The on-disk ring builder is unchanged.\n" exit(EXIT_ERROR) next_dev_id = 0 if builder.devs: next_dev_id = max(d['id'] for d in builder.devs if d) + 1 builder.add_dev({'id': next_dev_id, 'zone': zone, 'ip': ip, 'port': port, 'device': device_name, 'weight': weight, 'meta': meta}) if ':' in ip: print 'Device z%s-[%s]:%s/%s_"%s" with %s weight got id %s' % \ (zone, ip, port, device_name, meta, weight, next_dev_id) else: print 'Device z%s-%s:%s/%s_"%s" with %s weight got id %s' % \ (zone, ip, port, device_name, meta, weight, next_dev_id) pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(EXIT_SUCCESS) def set_weight(): """ swift-ring-builder set_weight [ 1: print 'Matched more than one device:' for dev in devs: print ' d%(id)sz%(zone)s-%(ip)s:%(port)s/%(device)s_' \ '"%(meta)s"' % dev if raw_input('Are you sure you want to update the weight for ' 'these %s devices? (y/N) ' % len(devs)) != 'y': print 'Aborting device modifications' exit(EXIT_ERROR) for dev in devs: builder.set_dev_weight(dev['id'], weight) print 'd%(id)sz%(zone)s-%(ip)s:%(port)s/%(device)s_' \ '"%(meta)s" weight set to %(weight)s' % dev pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(EXIT_SUCCESS) def set_info(): """ swift-ring-builder set_info :/_ [ :/_] ... For each search-value, resets the matched device's information. This information isn't used to assign partitions, so you can use 'write_ring' afterward to rewrite the current ring with the newer device information. Any of the parts are optional in the final :/_ parameter; just give what you want to change. For instance set_info d74 _"snet: 5.6.7.8" would just update the meta data for device id 74. """ if len(argv) < 5 or len(argv) % 2 != 1: print Commands.set_info.__doc__.strip() print print search_devs.__doc__.strip() exit(EXIT_ERROR) searches_and_changes = izip(islice(argv, 3, len(argv), 2), islice(argv, 4, len(argv), 2)) for search_value, change_value in searches_and_changes: devs = search_devs(builder, search_value) change = [] if len(change_value) and change_value[0].isdigit(): i = 1 while (i < len(change_value) and change_value[i] in '0123456789.'): i += 1 change.append(('ip', change_value[:i])) change_value = change_value[i:] elif len(change_value) and change_value[0] == '[': i = 1 while i < len(change_value) and change_value[i] != ']': i += 1 i += 1 change.append(('ip', change_value[:i].lstrip('[').rstrip(']'))) change_value = change_value[i:] if change_value.startswith(':'): i = 1 while i < len(change_value) and change_value[i].isdigit(): i += 1 change.append(('port', int(change_value[1:i]))) change_value = change_value[i:] if change_value.startswith('/'): i = 1 while i < len(change_value) and change_value[i] != '_': i += 1 change.append(('device', change_value[1:i])) change_value = change_value[i:] if change_value.startswith('_'): change.append(('meta', change_value[1:])) change_value = '' if change_value or not change: raise ValueError('Invalid set info change value: %s' % repr(argv[4])) if not devs: print("Search value \"%s\" matched 0 devices.\n" "The on-disk ring builder is unchanged.\n" % search_value) exit(EXIT_ERROR) if len(devs) > 1: print 'Matched more than one device:' for dev in devs: print ' %s' % format_device(dev) if raw_input('Are you sure you want to update the info for ' 'these %s devices? (y/N) ' % len(devs)) != 'y': print 'Aborting device modifications' exit(EXIT_ERROR) for dev in devs: orig_dev_string = format_device(dev) test_dev = dict(dev) for key, value in change: test_dev[key] = value for check_dev in builder.devs: if not check_dev or check_dev['id'] == test_dev['id']: continue if check_dev['ip'] == test_dev['ip'] and \ check_dev['port'] == test_dev['port'] and \ check_dev['device'] == test_dev['device']: print 'Device %d already uses %s:%d/%s.' % \ (check_dev['id'], check_dev['ip'], check_dev['port'], check_dev['device']) exit(EXIT_ERROR) for key, value in change: dev[key] = value print 'Device %s is now %s' % (orig_dev_string, format_device(dev)) pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(EXIT_SUCCESS) def remove(): """ swift-ring-builder remove [search-value ...] Removes the device(s) from the ring. This should normally just be used for a device that has failed. For a device you wish to decommission, it's best to set its weight to 0, wait for it to drain all its data, then use this remove command. This will not take effect until after running 'rebalance'. This is so you can make multiple device changes and rebalance them all just once. """ if len(argv) < 4: print Commands.remove.__doc__.strip() print print search_devs.__doc__.strip() exit(EXIT_ERROR) for search_value in argv[3:]: devs = search_devs(builder, search_value) if not devs: print("Search value \"%s\" matched 0 devices.\n" "The on-disk ring builder is unchanged.\n" % devstr) exit(EXIT_ERROR) if len(devs) > 1: print 'Matched more than one device:' for dev in devs: print ' d%(id)sz%(zone)s-%(ip)s:%(port)s/%(device)s_' \ '"%(meta)s"' % dev if raw_input('Are you sure you want to remove these %s ' 'devices? (y/N) ' % len(devs)) != 'y': print 'Aborting device removals' exit(EXIT_ERROR) for dev in devs: try: builder.remove_dev(dev['id']) except exceptions.RingBuilderError, e: print '-' * 79 print( "An error occurred while removing device with id %d\n" "This usually means that you attempted to remove\n" "the last device in a ring. If this is the case,\n" "consider creating a new ring instead.\n" "The on-disk ring builder is unchanged.\n" "Original exception message: %s" % (dev['id'], e.message) ) print '-' * 79 exit(EXIT_ERROR) print 'd%(id)sz%(zone)s-%(ip)s:%(port)s/%(device)s_' \ '"%(meta)s" marked for removal and will be removed' \ ' next rebalance.' % dev pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(EXIT_SUCCESS) def rebalance(): """ swift-ring-builder rebalance Attempts to rebalance the ring by reassigning partitions that haven't been recently reassigned. """ devs_changed = builder.devs_changed try: last_balance = builder.get_balance() parts, balance = builder.rebalance() except exceptions.RingBuilderError, e: print '-' * 79 print ("An error has occurred during ring validation. Common\n" "causes of failure are rings that are empty or do not\n" "have enough devices to accommodate the replica count.\n" "Original exception message:\n %s" % e.message ) print '-' * 79 exit(EXIT_ERROR) if not parts: print 'No partitions could be reassigned.' print 'Either none need to be or none can be due to ' \ 'min_part_hours [%s].' % builder.min_part_hours exit(EXIT_WARNING) if not devs_changed and abs(last_balance - balance) < 1: print 'Cowardly refusing to save rebalance as it did not change ' \ 'at least 1%.' exit(EXIT_WARNING) try: builder.validate() except exceptions.RingValidationError, e: print '-' * 79 print ("An error has occurred during ring validation. Common\n" "causes of failure are rings that are empty or do not\n" "have enough devices to accommodate the replica count.\n" "Original exception message:\n %s" % e.message ) print '-' * 79 exit(EXIT_ERROR) print 'Reassigned %d (%.02f%%) partitions. Balance is now %.02f.' % \ (parts, 100.0 * parts / builder.parts, balance) status = EXIT_SUCCESS if balance > 5: print '-' * 79 print 'NOTE: Balance of %.02f indicates you should push this ' % \ balance print ' ring, wait at least %d hours, and rebalance/repush.' \ % builder.min_part_hours print '-' * 79 status = EXIT_WARNING ts = time() pickle.dump(builder.get_ring().to_dict(), GzipFile(pathjoin(backup_dir, '%d.' % ts + basename(ring_file)), 'wb'), protocol=2) pickle.dump(builder.to_dict(), open(pathjoin(backup_dir, '%d.' % ts + basename(argv[1])), 'wb'), protocol=2) pickle.dump(builder.get_ring().to_dict(), GzipFile(ring_file, 'wb'), protocol=2) pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(status) def validate(): """ swift-ring-builder validate Just runs the validation routines on the ring. """ builder.validate() exit(EXIT_SUCCESS) def write_ring(): """ swift-ring-builder write_ring Just rewrites the distributable ring file. This is done automatically after a successful rebalance, so really this is only useful after one or more 'set_info' calls when no rebalance is needed but you want to send out the new device information. """ ring_data = builder.get_ring() if not ring_data._replica2part2dev_id: if ring_data.devs: print 'Warning: Writing a ring with no partition ' \ 'assignments but with devices; did you forget to run ' \ '"rebalance"?' else: print 'Warning: Writing an empty ring' pickle.dump(ring_data.to_dict(), GzipFile(pathjoin(backup_dir, '%d.' % time() + basename(ring_file)), 'wb'), protocol=2) pickle.dump(ring_data.to_dict(), GzipFile(ring_file, 'wb'), protocol=2) exit(EXIT_SUCCESS) def pretend_min_part_hours_passed(): builder.pretend_min_part_hours_passed() pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(EXIT_SUCCESS) def set_min_part_hours(): """ swift-ring-builder set_min_part_hours Changes the to the given . This should be set to however long a full replication/update cycle takes. We're working on a way to determine this more easily than scanning logs. """ if len(argv) < 4: print Commands.set_min_part_hours.__doc__.strip() exit(EXIT_ERROR) builder.change_min_part_hours(int(argv[3])) print 'The minimum number of hours before a partition can be ' \ 'reassigned is now set to %s' % argv[3] pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(EXIT_SUCCESS) if __name__ == '__main__': if len(argv) < 2: print "swift-ring-builder %(MAJOR_VERSION)s.%(MINOR_VERSION)s\n" % \ globals() print Commands.default.__doc__.strip() print cmds = [c for c, f in Commands.__dict__.iteritems() if f.__doc__ and c[0] != '_' and c != 'default'] cmds.sort() for cmd in cmds: print Commands.__dict__[cmd].__doc__.strip() print print search_devs.__doc__.strip() print for line in wrap(' '.join(cmds), 79, initial_indent='Quick list: ', subsequent_indent=' '): print line print ('Exit codes: 0 = operation successful\n' ' 1 = operation completed with warnings\n' \ ' 2 = error' ) exit(EXIT_SUCCESS) if exists(argv[1]): try: builder = pickle.load(open(argv[1], 'rb')) if not hasattr(builder, 'devs'): builder_dict = builder builder = RingBuilder(1, 1, 1) builder.copy_from(builder_dict) except ImportError: # Happens with really old builder pickles modules['swift.ring_builder'] = \ modules['swift.common.ring.builder'] builder = RingBuilder(1, 1, 1) builder.copy_from(pickle.load(open(argv[1], 'rb'))) for dev in builder.devs: if dev and 'meta' not in dev: dev['meta'] = '' elif len(argv) < 3 or argv[2] != 'create': print 'Ring Builder file does not exist: %s' % argv[1] exit(EXIT_ERROR) backup_dir = pathjoin(dirname(argv[1]), 'backups') try: mkdir(backup_dir) except OSError, err: if err.errno != EEXIST: raise ring_file = argv[1] if ring_file.endswith('.builder'): ring_file = ring_file[:-len('.builder')] ring_file += '.ring.gz' if len(argv) == 2: command = "default" else: command = argv[2] Commands.__dict__.get(command, Commands.unknown)()