relinker: Track part_power/next_part_power in state file

...and only update from the state file if those part powers match the
work we're currently doing. Otherwise, we would skip relinking when
trying to do multiple increases in quick succession, causing misplaced
data that clients could not access.

Note that because the part_power/next_part_power pair *must* change
between relinking and cleanup, we now only need to track one boolean per
part; the state file will automatically reset when switching modes.

Also note that any state files in the old format will be discarded --
this is safe, but may cause increased I/O. OTOH, operators probably
shouldn't be upgrading Swift mid-part-power-increase anyway.

Change-Id: Id08d0893db2c6e6f9ce1e42a104409568daf985b
Closes-Bug: #1910589
This commit is contained in:
Tim Burke 2021-01-07 16:18:07 -08:00 committed by Tim Burke
parent 92b15b0436
commit c10f8ae4ec
2 changed files with 157 additions and 72 deletions

View File

@ -50,13 +50,18 @@ def hook_pre_device(locks, states, datadir, device_path):
locks[0] = fd
state_file = os.path.join(device_path, STATE_FILE.format(datadir=datadir))
states.clear()
states["state"].clear()
try:
with open(state_file, 'rt') as f:
tmp = json.load(f)
states.update(tmp)
except ValueError:
# Invalid JSON: remove the file to restart from scratch
state_from_disk = json.load(f)
if state_from_disk["next_part_power"] != states["next_part_power"]:
raise ValueError
if state_from_disk["part_power"] != states["part_power"]:
states["prev_part_power"] = state_from_disk["part_power"]
raise ValueError
states["state"].update(state_from_disk["state"])
except (ValueError, TypeError, KeyError):
# Bad state file: remove the file to restart from scratch
os.unlink(state_file)
except IOError as err:
# Ignore file not found error
@ -69,33 +74,39 @@ def hook_post_device(locks, _):
locks[0] = None
def partitions_filter(states, step, part_power, next_part_power,
def partitions_filter(states, part_power, next_part_power,
datadir_path, partitions):
# Remove all non partitions first (eg: auditor_status_ALL.json)
partitions = [p for p in partitions if p.isdigit()]
if not (step == STEP_CLEANUP and part_power == next_part_power):
# This is not a cleanup after cancel, partitions in the upper half are
# new partitions, there is nothing to relink/cleanup from there
partitions = [p for p in partitions
if int(p) < 2 ** next_part_power / 2]
relinking = (part_power != next_part_power)
if relinking:
# All partitions in the upper half are new partitions and there is
# nothing to relink there
partitions = [part for part in partitions
if int(part) < 2 ** part_power]
elif "prev_part_power" in states:
# All partitions in the upper half are new partitions and there is
# nothing to clean up there
partitions = [part for part in partitions
if int(part) < 2 ** states["prev_part_power"]]
# Format: { 'part': [relinked, cleaned] }
if states:
missing = list(set(partitions) - set(states.keys()))
# Format: { 'part': processed }
if states["state"]:
missing = list(set(partitions) - set(states["state"].keys()))
if missing:
# All missing partitions was created after the first run of
# relink, so after the new ring was distribued, so they already
# are hardlinked in both partitions, but they will need to
# cleaned.. Just update the state file.
# All missing partitions were created after the first run of the
# relinker with this part_power/next_part_power pair. This is
# expected when relinking, where new partitions appear that are
# appropriate for the target part power. In such cases, there's
# nothing to be done. Err on the side of caution during cleanup,
# however.
for part in missing:
states[part] = [True, False]
if step == STEP_RELINK:
partitions = [str(p) for p, (r, c) in states.items() if not r]
elif step == STEP_CLEANUP:
partitions = [str(p) for p, (r, c) in states.items() if not c]
states["state"][part] = relinking
partitions = [str(part) for part, processed in states["state"].items()
if not processed]
else:
states.update({str(p): [False, False] for p in partitions})
states["state"].update({str(part): False for part in partitions})
# Always scan the partitions in reverse order to minimize the amount of IO
# (it actually only matters for relink, not for cleanup).
@ -107,7 +118,7 @@ def partitions_filter(states, step, part_power, next_part_power,
# If the relinker then scan partition 1, it will listdir that object while
# it's unnecessary. By working in reverse order of partitions, this is
# avoided.
partitions = sorted(partitions, key=lambda x: int(x), reverse=True)
partitions = sorted(partitions, key=lambda part: int(part), reverse=True)
return partitions
@ -124,10 +135,8 @@ def hook_post_partition(states, step,
state_file = os.path.join(device_path,
STATE_FILE.format(datadir=datadir_name))
if step == STEP_RELINK:
states[part][0] = True
elif step == STEP_CLEANUP:
states[part][1] = True
if step in (STEP_RELINK, STEP_CLEANUP):
states["state"][part] = True
with open(state_tmp_file, 'wt') as f:
json.dump(states, f)
os.fsync(f.fileno())
@ -164,14 +173,17 @@ def relink(swift_dir='/etc/swift',
datadir = diskfile.get_data_dir(policy)
locks = [None]
states = {}
states = {
"part_power": part_power,
"next_part_power": next_part_power,
"state": {},
}
relink_devices_filter = partial(devices_filter, device)
relink_hook_pre_device = partial(hook_pre_device, locks, states,
datadir)
relink_hook_post_device = partial(hook_post_device, locks)
relink_partition_filter = partial(partitions_filter,
states, STEP_RELINK,
part_power, next_part_power)
states, part_power, next_part_power)
relink_hook_post_partition = partial(hook_post_partition,
states, STEP_RELINK)
relink_hashes_filter = partial(hashes_filter, next_part_power)
@ -228,14 +240,17 @@ def cleanup(swift_dir='/etc/swift',
datadir = diskfile.get_data_dir(policy)
locks = [None]
states = {}
states = {
"part_power": part_power,
"next_part_power": next_part_power,
"state": {},
}
cleanup_devices_filter = partial(devices_filter, device)
cleanup_hook_pre_device = partial(hook_pre_device, locks, states,
datadir)
cleanup_hook_post_device = partial(hook_post_device, locks)
cleanup_partition_filter = partial(partitions_filter,
states, STEP_CLEANUP,
part_power, next_part_power)
states, part_power, next_part_power)
cleanup_hook_post_partition = partial(hook_post_partition,
states, STEP_CLEANUP)
cleanup_hashes_filter = partial(hashes_filter, next_part_power)

View File

@ -178,16 +178,30 @@ class TestRelinker(unittest.TestCase):
self._save_ring()
relinker.relink(self.testdir, self.devices, True)
with open(state_file, 'rt') as f:
self.assertEqual(json.load(f), {str(self.part): [True, False]})
orig_inode = os.stat(state_file).st_ino
self.assertEqual(json.load(f), {
"part_power": PART_POWER,
"next_part_power": PART_POWER + 1,
"state": {str(self.part): True}})
self.rb.increase_partition_power()
self.rb._ring = None # Force builder to reload ring
self._save_ring()
relinker.cleanup(self.testdir, self.devices, True)
with open(state_file, 'rt') as f:
self.assertEqual(json.load(f),
{str(self.part): [True, True],
str(self.next_part): [True, True]})
# Keep the state file open during cleanup so the inode can't be
# released/re-used when it gets unlinked
self.assertEqual(orig_inode, os.stat(state_file).st_ino)
relinker.cleanup(self.testdir, self.devices, True)
self.assertNotEqual(orig_inode, os.stat(state_file).st_ino)
with open(state_file, 'rt') as f:
# NB: part_power/next_part_power tuple changed, so state was reset
# (though we track prev_part_power for an efficient clean up)
self.assertEqual(json.load(f), {
"prev_part_power": PART_POWER,
"part_power": PART_POWER + 1,
"next_part_power": PART_POWER + 1,
"state": {str(self.part): True,
str(self.next_part): True}})
def test_devices_filter_filtering(self):
# With no filtering, returns all devices
@ -210,7 +224,8 @@ class TestRelinker(unittest.TestCase):
lock_file = os.path.join(device_path, '.relink.%s.lock' % datadir)
# The first run gets the lock
relinker.hook_pre_device(locks, {}, datadir, device_path)
states = {"state": {}}
relinker.hook_pre_device(locks, states, datadir, device_path)
self.assertNotEqual([None], locks)
# A following run would block
@ -232,20 +247,21 @@ class TestRelinker(unittest.TestCase):
datadir_path = os.path.join(device_path, datadir)
state_file = os.path.join(device_path, 'relink.%s.json' % datadir)
def call_partition_filter(step, parts):
def call_partition_filter(part_power, next_part_power, parts):
# Partition 312 will be ignored because it must have been created
# by the relinker
return relinker.partitions_filter(states, step,
PART_POWER, PART_POWER + 1,
return relinker.partitions_filter(states,
part_power, next_part_power,
datadir_path, parts)
# Start relinking
states = {}
states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1,
"state": {}}
# Load the states: As it starts, it must be empty
locks = [None]
relinker.hook_pre_device(locks, states, datadir, device_path)
self.assertEqual({}, states)
self.assertEqual({}, states["state"])
os.close(locks[0]) # Release the lock
# Partition 312 is ignored because it must have been created with the
@ -253,80 +269,134 @@ class TestRelinker(unittest.TestCase):
# 96 and 227 are reverse ordered
# auditor_status_ALL.json is ignored because it's not a partition
self.assertEqual(['227', '96'],
call_partition_filter(relinker.STEP_RELINK,
call_partition_filter(PART_POWER, PART_POWER + 1,
['96', '227', '312',
'auditor_status.json']))
self.assertEqual(states, {'96': [False, False], '227': [False, False]})
self.assertEqual(states["state"], {'96': False, '227': False})
# Ack partition 96
relinker.hook_post_partition(states, relinker.STEP_RELINK,
os.path.join(datadir_path, '96'))
self.assertEqual(states, {'96': [True, False], '227': [False, False]})
self.assertEqual(states["state"], {'96': True, '227': False})
with open(state_file, 'rt') as f:
self.assertEqual(json.load(f), {'96': [True, False],
'227': [False, False]})
self.assertEqual(json.load(f), {
"part_power": PART_POWER,
"next_part_power": PART_POWER + 1,
"state": {'96': True, '227': False}})
# Restart relinking after only part 96 was done
self.assertEqual(['227'],
call_partition_filter(relinker.STEP_RELINK,
call_partition_filter(PART_POWER, PART_POWER + 1,
['96', '227', '312']))
self.assertEqual(states, {'96': [True, False], '227': [False, False]})
self.assertEqual(states["state"], {'96': True, '227': False})
# Ack partition 227
relinker.hook_post_partition(states, relinker.STEP_RELINK,
os.path.join(datadir_path, '227'))
self.assertEqual(states, {'96': [True, False], '227': [True, False]})
self.assertEqual(states["state"], {'96': True, '227': True})
with open(state_file, 'rt') as f:
self.assertEqual(json.load(f), {'96': [True, False],
'227': [True, False]})
self.assertEqual(json.load(f), {
"part_power": PART_POWER,
"next_part_power": PART_POWER + 1,
"state": {'96': True, '227': True}})
# If the process restarts, it reload the state
locks = [None]
states = {}
states = {
"part_power": PART_POWER,
"next_part_power": PART_POWER + 1,
"state": {},
}
relinker.hook_pre_device(locks, states, datadir, device_path)
self.assertEqual(states, {'96': [True, False], '227': [True, False]})
self.assertEqual(states, {
"part_power": PART_POWER,
"next_part_power": PART_POWER + 1,
"state": {'96': True, '227': True}})
os.close(locks[0]) # Release the lock
# Start cleanup -- note that part_power and next_part_power now match!
states = {
"part_power": PART_POWER + 1,
"next_part_power": PART_POWER + 1,
"state": {},
}
# ...which means our state file was ignored
relinker.hook_pre_device(locks, states, datadir, device_path)
self.assertEqual(states, {
"prev_part_power": PART_POWER,
"part_power": PART_POWER + 1,
"next_part_power": PART_POWER + 1,
"state": {}})
os.close(locks[0]) # Release the lock
# Start cleanup
self.assertEqual(['227', '96'],
call_partition_filter(relinker.STEP_CLEANUP,
call_partition_filter(PART_POWER + 1, PART_POWER + 1,
['96', '227', '312']))
# Ack partition 227
relinker.hook_post_partition(states, relinker.STEP_CLEANUP,
os.path.join(datadir_path, '227'))
self.assertEqual(states, {'96': [True, False], '227': [True, True]})
self.assertEqual(states["state"],
{'96': False, '227': True})
with open(state_file, 'rt') as f:
self.assertEqual(json.load(f), {'96': [True, False],
'227': [True, True]})
self.assertEqual(json.load(f), {
"prev_part_power": PART_POWER,
"part_power": PART_POWER + 1,
"next_part_power": PART_POWER + 1,
"state": {'96': False, '227': True}})
# Restart cleanup after only part 227 was done
self.assertEqual(['96'],
call_partition_filter(relinker.STEP_CLEANUP,
call_partition_filter(PART_POWER + 1, PART_POWER + 1,
['96', '227', '312']))
self.assertEqual(states, {'96': [True, False], '227': [True, True]})
self.assertEqual(states["state"],
{'96': False, '227': True})
# Ack partition 96
relinker.hook_post_partition(states, relinker.STEP_CLEANUP,
os.path.join(datadir_path, '96'))
self.assertEqual(states, {'96': [True, True], '227': [True, True]})
self.assertEqual(states["state"],
{'96': True, '227': True})
with open(state_file, 'rt') as f:
self.assertEqual(json.load(f), {'96': [True, True],
'227': [True, True]})
self.assertEqual(json.load(f), {
"prev_part_power": PART_POWER,
"part_power": PART_POWER + 1,
"next_part_power": PART_POWER + 1,
"state": {'96': True, '227': True}})
# At the end, the state is still accurate
locks = [None]
states = {}
states = {
"prev_part_power": PART_POWER,
"part_power": PART_POWER + 1,
"next_part_power": PART_POWER + 1,
"state": {},
}
relinker.hook_pre_device(locks, states, datadir, device_path)
self.assertEqual(states, {'96': [True, True], '227': [True, True]})
self.assertEqual(states["state"],
{'96': True, '227': True})
os.close(locks[0]) # Release the lock
# If the part_power/next_part_power tuple differs, restart from scratch
locks = [None]
states = {
"part_power": PART_POWER + 1,
"next_part_power": PART_POWER + 2,
"state": {},
}
relinker.hook_pre_device(locks, states, datadir, device_path)
self.assertEqual(states["state"], {})
self.assertFalse(os.path.exists(state_file))
os.close(locks[0]) # Release the lock
# If the file gets corrupted, restart from scratch
with open(state_file, 'wt') as f:
f.write('NOT JSON')
locks = [None]
states = {}
states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1,
"state": {}}
relinker.hook_pre_device(locks, states, datadir, device_path)
self.assertEqual(states, {})
self.assertEqual(states["state"], {})
self.assertFalse(os.path.exists(state_file))
os.close(locks[0]) # Release the lock
def test_cleanup_not_yet_relinked(self):