diff --git a/swift/cli/relinker.py b/swift/cli/relinker.py index 630c0e98ee..9bf6b52965 100644 --- a/swift/cli/relinker.py +++ b/swift/cli/relinker.py @@ -50,13 +50,18 @@ def hook_pre_device(locks, states, datadir, device_path): locks[0] = fd state_file = os.path.join(device_path, STATE_FILE.format(datadir=datadir)) - states.clear() + states["state"].clear() try: with open(state_file, 'rt') as f: - tmp = json.load(f) - states.update(tmp) - except ValueError: - # Invalid JSON: remove the file to restart from scratch + state_from_disk = json.load(f) + if state_from_disk["next_part_power"] != states["next_part_power"]: + raise ValueError + if state_from_disk["part_power"] != states["part_power"]: + states["prev_part_power"] = state_from_disk["part_power"] + raise ValueError + states["state"].update(state_from_disk["state"]) + except (ValueError, TypeError, KeyError): + # Bad state file: remove the file to restart from scratch os.unlink(state_file) except IOError as err: # Ignore file not found error @@ -69,33 +74,39 @@ def hook_post_device(locks, _): locks[0] = None -def partitions_filter(states, step, part_power, next_part_power, +def partitions_filter(states, part_power, next_part_power, datadir_path, partitions): # Remove all non partitions first (eg: auditor_status_ALL.json) partitions = [p for p in partitions if p.isdigit()] - if not (step == STEP_CLEANUP and part_power == next_part_power): - # This is not a cleanup after cancel, partitions in the upper half are - # new partitions, there is nothing to relink/cleanup from there - partitions = [p for p in partitions - if int(p) < 2 ** next_part_power / 2] + relinking = (part_power != next_part_power) + if relinking: + # All partitions in the upper half are new partitions and there is + # nothing to relink there + partitions = [part for part in partitions + if int(part) < 2 ** part_power] + elif "prev_part_power" in states: + # All partitions in the upper half are new partitions and there is + # nothing to clean up there + partitions = [part for part in partitions + if int(part) < 2 ** states["prev_part_power"]] - # Format: { 'part': [relinked, cleaned] } - if states: - missing = list(set(partitions) - set(states.keys())) + # Format: { 'part': processed } + if states["state"]: + missing = list(set(partitions) - set(states["state"].keys())) if missing: - # All missing partitions was created after the first run of - # relink, so after the new ring was distribued, so they already - # are hardlinked in both partitions, but they will need to - # cleaned.. Just update the state file. + # All missing partitions were created after the first run of the + # relinker with this part_power/next_part_power pair. This is + # expected when relinking, where new partitions appear that are + # appropriate for the target part power. In such cases, there's + # nothing to be done. Err on the side of caution during cleanup, + # however. for part in missing: - states[part] = [True, False] - if step == STEP_RELINK: - partitions = [str(p) for p, (r, c) in states.items() if not r] - elif step == STEP_CLEANUP: - partitions = [str(p) for p, (r, c) in states.items() if not c] + states["state"][part] = relinking + partitions = [str(part) for part, processed in states["state"].items() + if not processed] else: - states.update({str(p): [False, False] for p in partitions}) + states["state"].update({str(part): False for part in partitions}) # Always scan the partitions in reverse order to minimize the amount of IO # (it actually only matters for relink, not for cleanup). @@ -107,7 +118,7 @@ def partitions_filter(states, step, part_power, next_part_power, # If the relinker then scan partition 1, it will listdir that object while # it's unnecessary. By working in reverse order of partitions, this is # avoided. - partitions = sorted(partitions, key=lambda x: int(x), reverse=True) + partitions = sorted(partitions, key=lambda part: int(part), reverse=True) return partitions @@ -124,10 +135,8 @@ def hook_post_partition(states, step, state_file = os.path.join(device_path, STATE_FILE.format(datadir=datadir_name)) - if step == STEP_RELINK: - states[part][0] = True - elif step == STEP_CLEANUP: - states[part][1] = True + if step in (STEP_RELINK, STEP_CLEANUP): + states["state"][part] = True with open(state_tmp_file, 'wt') as f: json.dump(states, f) os.fsync(f.fileno()) @@ -164,14 +173,17 @@ def relink(swift_dir='/etc/swift', datadir = diskfile.get_data_dir(policy) locks = [None] - states = {} + states = { + "part_power": part_power, + "next_part_power": next_part_power, + "state": {}, + } relink_devices_filter = partial(devices_filter, device) relink_hook_pre_device = partial(hook_pre_device, locks, states, datadir) relink_hook_post_device = partial(hook_post_device, locks) relink_partition_filter = partial(partitions_filter, - states, STEP_RELINK, - part_power, next_part_power) + states, part_power, next_part_power) relink_hook_post_partition = partial(hook_post_partition, states, STEP_RELINK) relink_hashes_filter = partial(hashes_filter, next_part_power) @@ -228,14 +240,17 @@ def cleanup(swift_dir='/etc/swift', datadir = diskfile.get_data_dir(policy) locks = [None] - states = {} + states = { + "part_power": part_power, + "next_part_power": next_part_power, + "state": {}, + } cleanup_devices_filter = partial(devices_filter, device) cleanup_hook_pre_device = partial(hook_pre_device, locks, states, datadir) cleanup_hook_post_device = partial(hook_post_device, locks) cleanup_partition_filter = partial(partitions_filter, - states, STEP_CLEANUP, - part_power, next_part_power) + states, part_power, next_part_power) cleanup_hook_post_partition = partial(hook_post_partition, states, STEP_CLEANUP) cleanup_hashes_filter = partial(hashes_filter, next_part_power) diff --git a/test/unit/cli/test_relinker.py b/test/unit/cli/test_relinker.py index 571f1c2d7f..8f31c33597 100644 --- a/test/unit/cli/test_relinker.py +++ b/test/unit/cli/test_relinker.py @@ -178,16 +178,30 @@ class TestRelinker(unittest.TestCase): self._save_ring() relinker.relink(self.testdir, self.devices, True) with open(state_file, 'rt') as f: - self.assertEqual(json.load(f), {str(self.part): [True, False]}) + orig_inode = os.stat(state_file).st_ino + self.assertEqual(json.load(f), { + "part_power": PART_POWER, + "next_part_power": PART_POWER + 1, + "state": {str(self.part): True}}) self.rb.increase_partition_power() self.rb._ring = None # Force builder to reload ring self._save_ring() - relinker.cleanup(self.testdir, self.devices, True) with open(state_file, 'rt') as f: - self.assertEqual(json.load(f), - {str(self.part): [True, True], - str(self.next_part): [True, True]}) + # Keep the state file open during cleanup so the inode can't be + # released/re-used when it gets unlinked + self.assertEqual(orig_inode, os.stat(state_file).st_ino) + relinker.cleanup(self.testdir, self.devices, True) + self.assertNotEqual(orig_inode, os.stat(state_file).st_ino) + with open(state_file, 'rt') as f: + # NB: part_power/next_part_power tuple changed, so state was reset + # (though we track prev_part_power for an efficient clean up) + self.assertEqual(json.load(f), { + "prev_part_power": PART_POWER, + "part_power": PART_POWER + 1, + "next_part_power": PART_POWER + 1, + "state": {str(self.part): True, + str(self.next_part): True}}) def test_devices_filter_filtering(self): # With no filtering, returns all devices @@ -210,7 +224,8 @@ class TestRelinker(unittest.TestCase): lock_file = os.path.join(device_path, '.relink.%s.lock' % datadir) # The first run gets the lock - relinker.hook_pre_device(locks, {}, datadir, device_path) + states = {"state": {}} + relinker.hook_pre_device(locks, states, datadir, device_path) self.assertNotEqual([None], locks) # A following run would block @@ -232,20 +247,21 @@ class TestRelinker(unittest.TestCase): datadir_path = os.path.join(device_path, datadir) state_file = os.path.join(device_path, 'relink.%s.json' % datadir) - def call_partition_filter(step, parts): + def call_partition_filter(part_power, next_part_power, parts): # Partition 312 will be ignored because it must have been created # by the relinker - return relinker.partitions_filter(states, step, - PART_POWER, PART_POWER + 1, + return relinker.partitions_filter(states, + part_power, next_part_power, datadir_path, parts) # Start relinking - states = {} + states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1, + "state": {}} # Load the states: As it starts, it must be empty locks = [None] relinker.hook_pre_device(locks, states, datadir, device_path) - self.assertEqual({}, states) + self.assertEqual({}, states["state"]) os.close(locks[0]) # Release the lock # Partition 312 is ignored because it must have been created with the @@ -253,80 +269,134 @@ class TestRelinker(unittest.TestCase): # 96 and 227 are reverse ordered # auditor_status_ALL.json is ignored because it's not a partition self.assertEqual(['227', '96'], - call_partition_filter(relinker.STEP_RELINK, + call_partition_filter(PART_POWER, PART_POWER + 1, ['96', '227', '312', 'auditor_status.json'])) - self.assertEqual(states, {'96': [False, False], '227': [False, False]}) + self.assertEqual(states["state"], {'96': False, '227': False}) # Ack partition 96 relinker.hook_post_partition(states, relinker.STEP_RELINK, os.path.join(datadir_path, '96')) - self.assertEqual(states, {'96': [True, False], '227': [False, False]}) + self.assertEqual(states["state"], {'96': True, '227': False}) with open(state_file, 'rt') as f: - self.assertEqual(json.load(f), {'96': [True, False], - '227': [False, False]}) + self.assertEqual(json.load(f), { + "part_power": PART_POWER, + "next_part_power": PART_POWER + 1, + "state": {'96': True, '227': False}}) # Restart relinking after only part 96 was done self.assertEqual(['227'], - call_partition_filter(relinker.STEP_RELINK, + call_partition_filter(PART_POWER, PART_POWER + 1, ['96', '227', '312'])) - self.assertEqual(states, {'96': [True, False], '227': [False, False]}) + self.assertEqual(states["state"], {'96': True, '227': False}) # Ack partition 227 relinker.hook_post_partition(states, relinker.STEP_RELINK, os.path.join(datadir_path, '227')) - self.assertEqual(states, {'96': [True, False], '227': [True, False]}) + self.assertEqual(states["state"], {'96': True, '227': True}) with open(state_file, 'rt') as f: - self.assertEqual(json.load(f), {'96': [True, False], - '227': [True, False]}) + self.assertEqual(json.load(f), { + "part_power": PART_POWER, + "next_part_power": PART_POWER + 1, + "state": {'96': True, '227': True}}) # If the process restarts, it reload the state locks = [None] - states = {} + states = { + "part_power": PART_POWER, + "next_part_power": PART_POWER + 1, + "state": {}, + } relinker.hook_pre_device(locks, states, datadir, device_path) - self.assertEqual(states, {'96': [True, False], '227': [True, False]}) + self.assertEqual(states, { + "part_power": PART_POWER, + "next_part_power": PART_POWER + 1, + "state": {'96': True, '227': True}}) + os.close(locks[0]) # Release the lock + + # Start cleanup -- note that part_power and next_part_power now match! + states = { + "part_power": PART_POWER + 1, + "next_part_power": PART_POWER + 1, + "state": {}, + } + # ...which means our state file was ignored + relinker.hook_pre_device(locks, states, datadir, device_path) + self.assertEqual(states, { + "prev_part_power": PART_POWER, + "part_power": PART_POWER + 1, + "next_part_power": PART_POWER + 1, + "state": {}}) os.close(locks[0]) # Release the lock - # Start cleanup self.assertEqual(['227', '96'], - call_partition_filter(relinker.STEP_CLEANUP, + call_partition_filter(PART_POWER + 1, PART_POWER + 1, ['96', '227', '312'])) # Ack partition 227 relinker.hook_post_partition(states, relinker.STEP_CLEANUP, os.path.join(datadir_path, '227')) - self.assertEqual(states, {'96': [True, False], '227': [True, True]}) + self.assertEqual(states["state"], + {'96': False, '227': True}) with open(state_file, 'rt') as f: - self.assertEqual(json.load(f), {'96': [True, False], - '227': [True, True]}) + self.assertEqual(json.load(f), { + "prev_part_power": PART_POWER, + "part_power": PART_POWER + 1, + "next_part_power": PART_POWER + 1, + "state": {'96': False, '227': True}}) # Restart cleanup after only part 227 was done self.assertEqual(['96'], - call_partition_filter(relinker.STEP_CLEANUP, + call_partition_filter(PART_POWER + 1, PART_POWER + 1, ['96', '227', '312'])) - self.assertEqual(states, {'96': [True, False], '227': [True, True]}) + self.assertEqual(states["state"], + {'96': False, '227': True}) # Ack partition 96 relinker.hook_post_partition(states, relinker.STEP_CLEANUP, os.path.join(datadir_path, '96')) - self.assertEqual(states, {'96': [True, True], '227': [True, True]}) + self.assertEqual(states["state"], + {'96': True, '227': True}) with open(state_file, 'rt') as f: - self.assertEqual(json.load(f), {'96': [True, True], - '227': [True, True]}) + self.assertEqual(json.load(f), { + "prev_part_power": PART_POWER, + "part_power": PART_POWER + 1, + "next_part_power": PART_POWER + 1, + "state": {'96': True, '227': True}}) # At the end, the state is still accurate locks = [None] - states = {} + states = { + "prev_part_power": PART_POWER, + "part_power": PART_POWER + 1, + "next_part_power": PART_POWER + 1, + "state": {}, + } relinker.hook_pre_device(locks, states, datadir, device_path) - self.assertEqual(states, {'96': [True, True], '227': [True, True]}) + self.assertEqual(states["state"], + {'96': True, '227': True}) + os.close(locks[0]) # Release the lock + + # If the part_power/next_part_power tuple differs, restart from scratch + locks = [None] + states = { + "part_power": PART_POWER + 1, + "next_part_power": PART_POWER + 2, + "state": {}, + } + relinker.hook_pre_device(locks, states, datadir, device_path) + self.assertEqual(states["state"], {}) + self.assertFalse(os.path.exists(state_file)) os.close(locks[0]) # Release the lock # If the file gets corrupted, restart from scratch with open(state_file, 'wt') as f: f.write('NOT JSON') locks = [None] - states = {} + states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1, + "state": {}} relinker.hook_pre_device(locks, states, datadir, device_path) - self.assertEqual(states, {}) + self.assertEqual(states["state"], {}) + self.assertFalse(os.path.exists(state_file)) os.close(locks[0]) # Release the lock def test_cleanup_not_yet_relinked(self):