relinker: Track part_power/next_part_power in state file
...and only update from the state file if those part powers match the work we're currently doing. Otherwise, we would skip relinking when trying to do multiple increases in quick succession, causing misplaced data that clients could not access. Note that because the part_power/next_part_power pair *must* change between relinking and cleanup, we now only need to track one boolean per part; the state file will automatically reset when switching modes. Also note that any state files in the old format will be discarded -- this is safe, but may cause increased I/O. OTOH, operators probably shouldn't be upgrading Swift mid-part-power-increase anyway. Change-Id: Id08d0893db2c6e6f9ce1e42a104409568daf985b Closes-Bug: #1910589
This commit is contained in:
parent
92b15b0436
commit
c10f8ae4ec
|
@ -50,13 +50,18 @@ def hook_pre_device(locks, states, datadir, device_path):
|
|||
locks[0] = fd
|
||||
|
||||
state_file = os.path.join(device_path, STATE_FILE.format(datadir=datadir))
|
||||
states.clear()
|
||||
states["state"].clear()
|
||||
try:
|
||||
with open(state_file, 'rt') as f:
|
||||
tmp = json.load(f)
|
||||
states.update(tmp)
|
||||
except ValueError:
|
||||
# Invalid JSON: remove the file to restart from scratch
|
||||
state_from_disk = json.load(f)
|
||||
if state_from_disk["next_part_power"] != states["next_part_power"]:
|
||||
raise ValueError
|
||||
if state_from_disk["part_power"] != states["part_power"]:
|
||||
states["prev_part_power"] = state_from_disk["part_power"]
|
||||
raise ValueError
|
||||
states["state"].update(state_from_disk["state"])
|
||||
except (ValueError, TypeError, KeyError):
|
||||
# Bad state file: remove the file to restart from scratch
|
||||
os.unlink(state_file)
|
||||
except IOError as err:
|
||||
# Ignore file not found error
|
||||
|
@ -69,33 +74,39 @@ def hook_post_device(locks, _):
|
|||
locks[0] = None
|
||||
|
||||
|
||||
def partitions_filter(states, step, part_power, next_part_power,
|
||||
def partitions_filter(states, part_power, next_part_power,
|
||||
datadir_path, partitions):
|
||||
# Remove all non partitions first (eg: auditor_status_ALL.json)
|
||||
partitions = [p for p in partitions if p.isdigit()]
|
||||
|
||||
if not (step == STEP_CLEANUP and part_power == next_part_power):
|
||||
# This is not a cleanup after cancel, partitions in the upper half are
|
||||
# new partitions, there is nothing to relink/cleanup from there
|
||||
partitions = [p for p in partitions
|
||||
if int(p) < 2 ** next_part_power / 2]
|
||||
relinking = (part_power != next_part_power)
|
||||
if relinking:
|
||||
# All partitions in the upper half are new partitions and there is
|
||||
# nothing to relink there
|
||||
partitions = [part for part in partitions
|
||||
if int(part) < 2 ** part_power]
|
||||
elif "prev_part_power" in states:
|
||||
# All partitions in the upper half are new partitions and there is
|
||||
# nothing to clean up there
|
||||
partitions = [part for part in partitions
|
||||
if int(part) < 2 ** states["prev_part_power"]]
|
||||
|
||||
# Format: { 'part': [relinked, cleaned] }
|
||||
if states:
|
||||
missing = list(set(partitions) - set(states.keys()))
|
||||
# Format: { 'part': processed }
|
||||
if states["state"]:
|
||||
missing = list(set(partitions) - set(states["state"].keys()))
|
||||
if missing:
|
||||
# All missing partitions was created after the first run of
|
||||
# relink, so after the new ring was distribued, so they already
|
||||
# are hardlinked in both partitions, but they will need to
|
||||
# cleaned.. Just update the state file.
|
||||
# All missing partitions were created after the first run of the
|
||||
# relinker with this part_power/next_part_power pair. This is
|
||||
# expected when relinking, where new partitions appear that are
|
||||
# appropriate for the target part power. In such cases, there's
|
||||
# nothing to be done. Err on the side of caution during cleanup,
|
||||
# however.
|
||||
for part in missing:
|
||||
states[part] = [True, False]
|
||||
if step == STEP_RELINK:
|
||||
partitions = [str(p) for p, (r, c) in states.items() if not r]
|
||||
elif step == STEP_CLEANUP:
|
||||
partitions = [str(p) for p, (r, c) in states.items() if not c]
|
||||
states["state"][part] = relinking
|
||||
partitions = [str(part) for part, processed in states["state"].items()
|
||||
if not processed]
|
||||
else:
|
||||
states.update({str(p): [False, False] for p in partitions})
|
||||
states["state"].update({str(part): False for part in partitions})
|
||||
|
||||
# Always scan the partitions in reverse order to minimize the amount of IO
|
||||
# (it actually only matters for relink, not for cleanup).
|
||||
|
@ -107,7 +118,7 @@ def partitions_filter(states, step, part_power, next_part_power,
|
|||
# If the relinker then scan partition 1, it will listdir that object while
|
||||
# it's unnecessary. By working in reverse order of partitions, this is
|
||||
# avoided.
|
||||
partitions = sorted(partitions, key=lambda x: int(x), reverse=True)
|
||||
partitions = sorted(partitions, key=lambda part: int(part), reverse=True)
|
||||
|
||||
return partitions
|
||||
|
||||
|
@ -124,10 +135,8 @@ def hook_post_partition(states, step,
|
|||
state_file = os.path.join(device_path,
|
||||
STATE_FILE.format(datadir=datadir_name))
|
||||
|
||||
if step == STEP_RELINK:
|
||||
states[part][0] = True
|
||||
elif step == STEP_CLEANUP:
|
||||
states[part][1] = True
|
||||
if step in (STEP_RELINK, STEP_CLEANUP):
|
||||
states["state"][part] = True
|
||||
with open(state_tmp_file, 'wt') as f:
|
||||
json.dump(states, f)
|
||||
os.fsync(f.fileno())
|
||||
|
@ -164,14 +173,17 @@ def relink(swift_dir='/etc/swift',
|
|||
datadir = diskfile.get_data_dir(policy)
|
||||
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"part_power": part_power,
|
||||
"next_part_power": next_part_power,
|
||||
"state": {},
|
||||
}
|
||||
relink_devices_filter = partial(devices_filter, device)
|
||||
relink_hook_pre_device = partial(hook_pre_device, locks, states,
|
||||
datadir)
|
||||
relink_hook_post_device = partial(hook_post_device, locks)
|
||||
relink_partition_filter = partial(partitions_filter,
|
||||
states, STEP_RELINK,
|
||||
part_power, next_part_power)
|
||||
states, part_power, next_part_power)
|
||||
relink_hook_post_partition = partial(hook_post_partition,
|
||||
states, STEP_RELINK)
|
||||
relink_hashes_filter = partial(hashes_filter, next_part_power)
|
||||
|
@ -228,14 +240,17 @@ def cleanup(swift_dir='/etc/swift',
|
|||
datadir = diskfile.get_data_dir(policy)
|
||||
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"part_power": part_power,
|
||||
"next_part_power": next_part_power,
|
||||
"state": {},
|
||||
}
|
||||
cleanup_devices_filter = partial(devices_filter, device)
|
||||
cleanup_hook_pre_device = partial(hook_pre_device, locks, states,
|
||||
datadir)
|
||||
cleanup_hook_post_device = partial(hook_post_device, locks)
|
||||
cleanup_partition_filter = partial(partitions_filter,
|
||||
states, STEP_CLEANUP,
|
||||
part_power, next_part_power)
|
||||
states, part_power, next_part_power)
|
||||
cleanup_hook_post_partition = partial(hook_post_partition,
|
||||
states, STEP_CLEANUP)
|
||||
cleanup_hashes_filter = partial(hashes_filter, next_part_power)
|
||||
|
|
|
@ -178,16 +178,30 @@ class TestRelinker(unittest.TestCase):
|
|||
self._save_ring()
|
||||
relinker.relink(self.testdir, self.devices, True)
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {str(self.part): [True, False]})
|
||||
orig_inode = os.stat(state_file).st_ino
|
||||
self.assertEqual(json.load(f), {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {str(self.part): True}})
|
||||
|
||||
self.rb.increase_partition_power()
|
||||
self.rb._ring = None # Force builder to reload ring
|
||||
self._save_ring()
|
||||
relinker.cleanup(self.testdir, self.devices, True)
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f),
|
||||
{str(self.part): [True, True],
|
||||
str(self.next_part): [True, True]})
|
||||
# Keep the state file open during cleanup so the inode can't be
|
||||
# released/re-used when it gets unlinked
|
||||
self.assertEqual(orig_inode, os.stat(state_file).st_ino)
|
||||
relinker.cleanup(self.testdir, self.devices, True)
|
||||
self.assertNotEqual(orig_inode, os.stat(state_file).st_ino)
|
||||
with open(state_file, 'rt') as f:
|
||||
# NB: part_power/next_part_power tuple changed, so state was reset
|
||||
# (though we track prev_part_power for an efficient clean up)
|
||||
self.assertEqual(json.load(f), {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {str(self.part): True,
|
||||
str(self.next_part): True}})
|
||||
|
||||
def test_devices_filter_filtering(self):
|
||||
# With no filtering, returns all devices
|
||||
|
@ -210,7 +224,8 @@ class TestRelinker(unittest.TestCase):
|
|||
lock_file = os.path.join(device_path, '.relink.%s.lock' % datadir)
|
||||
|
||||
# The first run gets the lock
|
||||
relinker.hook_pre_device(locks, {}, datadir, device_path)
|
||||
states = {"state": {}}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertNotEqual([None], locks)
|
||||
|
||||
# A following run would block
|
||||
|
@ -232,20 +247,21 @@ class TestRelinker(unittest.TestCase):
|
|||
datadir_path = os.path.join(device_path, datadir)
|
||||
state_file = os.path.join(device_path, 'relink.%s.json' % datadir)
|
||||
|
||||
def call_partition_filter(step, parts):
|
||||
def call_partition_filter(part_power, next_part_power, parts):
|
||||
# Partition 312 will be ignored because it must have been created
|
||||
# by the relinker
|
||||
return relinker.partitions_filter(states, step,
|
||||
PART_POWER, PART_POWER + 1,
|
||||
return relinker.partitions_filter(states,
|
||||
part_power, next_part_power,
|
||||
datadir_path, parts)
|
||||
|
||||
# Start relinking
|
||||
states = {}
|
||||
states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1,
|
||||
"state": {}}
|
||||
|
||||
# Load the states: As it starts, it must be empty
|
||||
locks = [None]
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual({}, states)
|
||||
self.assertEqual({}, states["state"])
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# Partition 312 is ignored because it must have been created with the
|
||||
|
@ -253,80 +269,134 @@ class TestRelinker(unittest.TestCase):
|
|||
# 96 and 227 are reverse ordered
|
||||
# auditor_status_ALL.json is ignored because it's not a partition
|
||||
self.assertEqual(['227', '96'],
|
||||
call_partition_filter(relinker.STEP_RELINK,
|
||||
call_partition_filter(PART_POWER, PART_POWER + 1,
|
||||
['96', '227', '312',
|
||||
'auditor_status.json']))
|
||||
self.assertEqual(states, {'96': [False, False], '227': [False, False]})
|
||||
self.assertEqual(states["state"], {'96': False, '227': False})
|
||||
|
||||
# Ack partition 96
|
||||
relinker.hook_post_partition(states, relinker.STEP_RELINK,
|
||||
os.path.join(datadir_path, '96'))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [False, False]})
|
||||
self.assertEqual(states["state"], {'96': True, '227': False})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, False],
|
||||
'227': [False, False]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': False}})
|
||||
|
||||
# Restart relinking after only part 96 was done
|
||||
self.assertEqual(['227'],
|
||||
call_partition_filter(relinker.STEP_RELINK,
|
||||
call_partition_filter(PART_POWER, PART_POWER + 1,
|
||||
['96', '227', '312']))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [False, False]})
|
||||
self.assertEqual(states["state"], {'96': True, '227': False})
|
||||
|
||||
# Ack partition 227
|
||||
relinker.hook_post_partition(states, relinker.STEP_RELINK,
|
||||
os.path.join(datadir_path, '227'))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, False]})
|
||||
self.assertEqual(states["state"], {'96': True, '227': True})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, False],
|
||||
'227': [True, False]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': True}})
|
||||
|
||||
# If the process restarts, it reload the state
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {},
|
||||
}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, False]})
|
||||
self.assertEqual(states, {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': True}})
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# Start cleanup -- note that part_power and next_part_power now match!
|
||||
states = {
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {},
|
||||
}
|
||||
# ...which means our state file was ignored
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {}})
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# Start cleanup
|
||||
self.assertEqual(['227', '96'],
|
||||
call_partition_filter(relinker.STEP_CLEANUP,
|
||||
call_partition_filter(PART_POWER + 1, PART_POWER + 1,
|
||||
['96', '227', '312']))
|
||||
# Ack partition 227
|
||||
relinker.hook_post_partition(states, relinker.STEP_CLEANUP,
|
||||
os.path.join(datadir_path, '227'))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': False, '227': True})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, False],
|
||||
'227': [True, True]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': False, '227': True}})
|
||||
|
||||
# Restart cleanup after only part 227 was done
|
||||
self.assertEqual(['96'],
|
||||
call_partition_filter(relinker.STEP_CLEANUP,
|
||||
call_partition_filter(PART_POWER + 1, PART_POWER + 1,
|
||||
['96', '227', '312']))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': False, '227': True})
|
||||
|
||||
# Ack partition 96
|
||||
relinker.hook_post_partition(states, relinker.STEP_CLEANUP,
|
||||
os.path.join(datadir_path, '96'))
|
||||
self.assertEqual(states, {'96': [True, True], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': True, '227': True})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, True],
|
||||
'227': [True, True]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': True}})
|
||||
|
||||
# At the end, the state is still accurate
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {},
|
||||
}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {'96': [True, True], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': True, '227': True})
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# If the part_power/next_part_power tuple differs, restart from scratch
|
||||
locks = [None]
|
||||
states = {
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 2,
|
||||
"state": {},
|
||||
}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states["state"], {})
|
||||
self.assertFalse(os.path.exists(state_file))
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# If the file gets corrupted, restart from scratch
|
||||
with open(state_file, 'wt') as f:
|
||||
f.write('NOT JSON')
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1,
|
||||
"state": {}}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {})
|
||||
self.assertEqual(states["state"], {})
|
||||
self.assertFalse(os.path.exists(state_file))
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
def test_cleanup_not_yet_relinked(self):
|
||||
|
|
Loading…
Reference in New Issue