diff --git a/debian/changelog b/debian/changelog index bdf0340b..0606e7ec 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,11 +1,14 @@ -swift (2.10.1-2) UNRELEASED; urgency=medium +swift (2.10.1-2) unstable; urgency=medium * Don't start rsyslog during postinst/postrm if it's stopped * d/tests - Added Python module import test - Added daemons test + * Added patches from upstream master: + - For any part only one replica can move in a rebalance + - Quarantine malformed database schema SQLite errors - -- Ondřej Nový Mon, 02 Jan 2017 17:30:26 +0100 + -- Ondřej Nový Fri, 13 Jan 2017 10:05:12 +0100 swift (2.10.1-1) unstable; urgency=medium diff --git a/debian/patches/For_any_part_only_one_replica_can_move_in_a_rebalance.patch b/debian/patches/For_any_part_only_one_replica_can_move_in_a_rebalance.patch new file mode 100644 index 00000000..5ed65a34 --- /dev/null +++ b/debian/patches/For_any_part_only_one_replica_can_move_in_a_rebalance.patch @@ -0,0 +1,234 @@ +From e5dd050113646a93a0fe7fb1aed4f1cafdc9139f Mon Sep 17 00:00:00 2001 +From: cheng +Date: Sun, 24 Jul 2016 01:10:36 +0000 +Subject: [PATCH] For any part, only one replica can move in a rebalance + +With a min_part_hours of zero, it's possible to move more than one +replicas of the same part in a single rebalance. + +This change in behavior only effects min_part_hour zero rings, which +are understood to be uncommon in production mostly because of this +very specific and strange behavior of min_part_hour zero rings. + +With this change, no matter how small your min_part_hours it will +always require at least N rebalances to move N part-replicas of the +same part. + +To supplement the existing persisted _last_part_moves structure to +enforce min_part_hours, this change adds a _part_moved_bitmap that +exists only during the life of the rebalance, to track when rebalance +moves a part in order to prevent another replicas of the same part +from being moved in the same rebalance. + +Add authors: Clay Gerrard, clay.gerrard@gmail.com + Christian Schwede, cschwede@redhat.com + +Closes-bug: #1586167 + +Change-Id: Ia1629abd5ce6e1b3acc2e94f818ed8223eed993a +(cherry picked from commit ce26e78) +--- + +diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py +index ef4f095..f4f2a83 100644 +--- a/swift/common/ring/builder.py ++++ b/swift/common/ring/builder.py +@@ -108,6 +108,8 @@ + # a device overrides this behavior as it's assumed that's only + # done because of device failure. + self._last_part_moves = None ++ # _part_moved_bitmap record parts have been moved ++ self._part_moved_bitmap = None + # _last_part_moves_epoch indicates the time the offsets in + # _last_part_moves is based on. + self._last_part_moves_epoch = 0 +@@ -124,6 +126,19 @@ + self.logger.disabled = True + # silence "no handler for X" error messages + self.logger.addHandler(NullHandler()) ++ ++ def _set_part_moved(self, part): ++ self._last_part_moves[part] = 0 ++ byte, bit = divmod(part, 8) ++ self._part_moved_bitmap[byte] |= (128 >> bit) ++ ++ def _has_part_moved(self, part): ++ byte, bit = divmod(part, 8) ++ return bool(self._part_moved_bitmap[byte] & (128 >> bit)) ++ ++ def _can_part_move(self, part): ++ return (self._last_part_moves[part] >= self.min_part_hours and ++ not self._has_part_moved(part)) + + @contextmanager + def debug(self): +@@ -437,6 +452,7 @@ + if self._last_part_moves is None: + self.logger.debug("New builder; performing initial balance") + self._last_part_moves = array('B', itertools.repeat(0, self.parts)) ++ self._part_moved_bitmap = bytearray(max(2 ** (self.part_power - 3), 1)) + self._update_last_part_moves() + + replica_plan = self._build_replica_plan() +@@ -876,7 +892,7 @@ + dev_id = self._replica2part2dev[replica][part] + if dev_id in dev_ids: + self._replica2part2dev[replica][part] = NONE_DEV +- self._last_part_moves[part] = 0 ++ self._set_part_moved(part) + assign_parts[part].append(replica) + self.logger.debug( + "Gathered %d/%d from dev %d [dev removed]", +@@ -964,7 +980,7 @@ + # Now we gather partitions that are "at risk" because they aren't + # currently sufficient spread out across the cluster. + for part in range(self.parts): +- if self._last_part_moves[part] < self.min_part_hours: ++ if (not self._can_part_move(part)): + continue + # First, add up the count of replicas at each tier for each + # partition. +@@ -996,7 +1012,7 @@ + # has more than one replica of a part assigned to it - which + # would have only been possible on rings built with an older + # version of the code +- if (self._last_part_moves[part] < self.min_part_hours and ++ if (not self._can_part_move(part) and + not replicas_at_tier[dev['tiers'][-1]] > 1): + continue + dev['parts_wanted'] += 1 +@@ -1008,7 +1024,7 @@ + self._replica2part2dev[replica][part] = NONE_DEV + for tier in dev['tiers']: + replicas_at_tier[tier] -= 1 +- self._last_part_moves[part] = 0 ++ self._set_part_moved(part) + + def _gather_parts_for_balance_can_disperse(self, assign_parts, start, + replica_plan): +@@ -1025,7 +1041,7 @@ + # they have more partitions than their parts_wanted. + for offset in range(self.parts): + part = (start + offset) % self.parts +- if self._last_part_moves[part] < self.min_part_hours: ++ if (not self._can_part_move(part)): + continue + # For each part we'll look at the devices holding those parts and + # see if any are overweight, keeping track of replicas_at_tier as +@@ -1048,7 +1064,7 @@ + overweight_dev_replica.sort( + key=lambda dr: dr[0]['parts_wanted']) + for dev, replica in overweight_dev_replica: +- if self._last_part_moves[part] < self.min_part_hours: ++ if (not self._can_part_move(part)): + break + if any(replica_plan[tier]['min'] <= + replicas_at_tier[tier] < +@@ -1067,7 +1083,7 @@ + self._replica2part2dev[replica][part] = NONE_DEV + for tier in dev['tiers']: + replicas_at_tier[tier] -= 1 +- self._last_part_moves[part] = 0 ++ self._set_part_moved(part) + + def _gather_parts_for_balance(self, assign_parts, replica_plan): + """ +@@ -1107,7 +1123,7 @@ + """ + for offset in range(self.parts): + part = (start + offset) % self.parts +- if self._last_part_moves[part] < self.min_part_hours: ++ if (not self._can_part_move(part)): + continue + overweight_dev_replica = [] + for replica in self._replicas_for_part(part): +@@ -1124,7 +1140,7 @@ + overweight_dev_replica.sort( + key=lambda dr: dr[0]['parts_wanted']) + for dev, replica in overweight_dev_replica: +- if self._last_part_moves[part] < self.min_part_hours: ++ if (not self._can_part_move(part)): + break + # this is the most overweight_device holding a replica of this + # part we don't know where it's going to end up - but we'll +@@ -1136,7 +1152,7 @@ + "Gathered %d/%d from dev %d [weight forced]", + part, replica, dev['id']) + self._replica2part2dev[replica][part] = NONE_DEV +- self._last_part_moves[part] = 0 ++ self._set_part_moved(part) + + def _reassign_parts(self, reassign_parts, replica_plan): + """ +diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py +index 9702730..0007f10 100644 +--- a/test/unit/common/ring/test_builder.py ++++ b/test/unit/common/ring/test_builder.py +@@ -723,7 +723,7 @@ + "Partition %d not in zones 0 and 1 (got %r)" % + (part, zones)) + +- def test_min_part_hours_zero_will_move_whatever_it_takes(self): ++ def test_min_part_hours_zero_will_move_one_replica(self): + rb = ring.RingBuilder(8, 3, 0) + # there'll be at least one replica in z0 and z1 + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 0.5, +@@ -747,6 +747,33 @@ + rb.validate() + + self.assertEqual(0, rb.dispersion) ++ # Only one replica could move, so some zones are quite unbalanced ++ self.assertAlmostEqual(rb.get_balance(), 66.66, delta=0.5) ++ ++ # There was only zone 0 and 1 before adding more devices. Only one ++ # replica should have been moved, therefore we expect 256 parts in zone ++ # 0 and 1, and a total of 256 in zone 2,3, and 4 ++ expected = defaultdict(int, {0: 256, 1: 256, 2: 86, 3: 85, 4: 85}) ++ self.assertEqual(expected, self._partition_counts(rb, key='zone')) ++ ++ parts_with_moved_count = defaultdict(int) ++ for part in range(rb.parts): ++ zones = set() ++ for replica in range(rb.replicas): ++ zones.add(rb.devs[rb._replica2part2dev[replica][part]]['zone']) ++ moved_replicas = len(zones - {0, 1}) ++ parts_with_moved_count[moved_replicas] += 1 ++ ++ # We expect that every partition moved exactly one replica ++ expected = {1: 256} ++ self.assertEqual(parts_with_moved_count, expected) ++ ++ # After rebalancing two more times, we expect that everything is in a ++ # good state ++ rb.rebalance(seed=3) ++ rb.rebalance(seed=3) ++ ++ self.assertEqual(0, rb.dispersion) + # a balance of w/i a 1% isn't too bad for 3 replicas on 7 + # devices when part power is only 8 + self.assertAlmostEqual(rb.get_balance(), 0, delta=0.5) +diff --git a/test/unit/common/ring/test_utils.py b/test/unit/common/ring/test_utils.py +index c6d6b21..c40ce85 100644 +--- a/test/unit/common/ring/test_utils.py ++++ b/test/unit/common/ring/test_utils.py +@@ -664,11 +664,18 @@ + 'ip': '127.0.0.3', 'port': 10003, 'device': 'sdd1'}) + + # when the biggest tier has the smallest devices things get ugly ++ # can't move all the part-replicas in one rebalance + rb.rebalance(seed=100) + report = dispersion_report(rb, verbose=True) +- self.assertEqual(rb.dispersion, 70.3125) ++ self.assertEqual(rb.dispersion, 9.375) ++ self.assertEqual(report['worst_tier'], 'r1z1-127.0.0.1') ++ self.assertEqual(report['max_dispersion'], 7.18562874251497) ++ # do a sencond rebalance ++ rb.rebalance(seed=100) ++ report = dispersion_report(rb, verbose=True) ++ self.assertEqual(rb.dispersion, 50.0) + self.assertEqual(report['worst_tier'], 'r1z0-127.0.0.3') +- self.assertEqual(report['max_dispersion'], 88.23529411764706) ++ self.assertEqual(report['max_dispersion'], 50.0) + + # ... but overload can square it + rb.set_overload(rb.get_required_overload()) diff --git a/debian/patches/Quarantine_malformed_database_schema_SQLite_errors.patch b/debian/patches/Quarantine_malformed_database_schema_SQLite_errors.patch new file mode 100644 index 00000000..c1449b0a --- /dev/null +++ b/debian/patches/Quarantine_malformed_database_schema_SQLite_errors.patch @@ -0,0 +1,95 @@ +From ea1ecf3d8b097a5d24fe81f2c5ee9ec390d41809 Mon Sep 17 00:00:00 2001 +From: Matthew Oliver +Date: Thu, 01 Dec 2016 09:46:53 +1100 +Subject: [PATCH] Quarantine malformed database schema SQLite errors + +Currently if an sqlite3.DatabaseError is thrown when caused by +a corrupted database schema, it get logged and the database is isn't +quarantined. + +This patch adds the malformed database schema case to the list of +SQLite errors in possibly_quarantine that will trigger the db to be +quarantined. + +Also it improved the possibly_quarantined unit test to test all existing +exceptions, and catches exceptions based on the real world except we use +in code. + +Closes-Bug: #1646247 + +Change-Id: Id9452c88f8394a2a910c34c69361442543aa206d +(cherry picked from commit 3bde14b) +--- + +diff --git a/swift/common/db.py b/swift/common/db.py +index 1f06694..fc5c057 100644 +--- a/swift/common/db.py ++++ b/swift/common/db.py +@@ -331,6 +331,8 @@ + """ + if 'database disk image is malformed' in str(exc_value): + exc_hint = 'malformed' ++ elif 'malformed database schema' in str(exc_value): ++ exc_hint = 'malformed' + elif 'file is encrypted or is not a database' in str(exc_value): + exc_hint = 'corrupted' + elif 'disk I/O error' in str(exc_value): +index 45949c9..dd580b2 100644 +--- a/test/unit/common/test_db.py ++++ b/test/unit/common/test_db.py +@@ -1214,29 +1230,36 @@ + message = str(e) + self.assertEqual(message, '400 Bad Request') + +- def test_possibly_quarantine_disk_error(self): ++ def test_possibly_quarantine_db_errors(self): + dbpath = os.path.join(self.testdir, 'dev', 'dbs', 'par', 'pre', 'db') +- mkdirs(dbpath) + qpath = os.path.join(self.testdir, 'dev', 'quarantined', 'tests', 'db') +- broker = DatabaseBroker(os.path.join(dbpath, '1.db')) +- broker.db_type = 'test' ++ # Data is a list of Excpetions to be raised and expected values in the ++ # log ++ data = [ ++ (sqlite3.DatabaseError('database disk image is malformed'), ++ 'malformed'), ++ (sqlite3.DatabaseError('malformed database schema'), 'malformed'), ++ (sqlite3.DatabaseError('file is encrypted or is not a database'), ++ 'corrupted'), ++ (sqlite3.OperationalError('disk I/O error'), ++ 'disk error while accessing')] + +- def stub(): +- raise sqlite3.OperationalError('disk I/O error') +- +- try: +- stub() +- except Exception: ++ for i, (ex, hint) in enumerate(data): ++ mkdirs(dbpath) ++ broker = DatabaseBroker(os.path.join(dbpath, '%d.db' % (i))) ++ broker.db_type = 'test' + try: +- broker.possibly_quarantine(*sys.exc_info()) +- except Exception as exc: +- self.assertEqual( +- str(exc), +- 'Quarantined %s to %s due to disk error ' +- 'while accessing database' % +- (dbpath, qpath)) +- else: +- self.fail('Expected an exception to be raised') ++ raise ex ++ except (sqlite3.DatabaseError, DatabaseConnectionError): ++ try: ++ broker.possibly_quarantine(*sys.exc_info()) ++ except Exception as exc: ++ self.assertEqual( ++ str(exc), ++ 'Quarantined %s to %s due to %s database' % ++ (dbpath, qpath, hint)) ++ else: ++ self.fail('Expected an exception to be raised') + + if __name__ == '__main__': + unittest.main() diff --git a/debian/patches/series b/debian/patches/series index 125d2ae1..5b01baf6 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,2 +1,4 @@ sphinx_reproducible_build.patch syslog_log_name.patch +Quarantine_malformed_database_schema_SQLite_errors.patch +For_any_part_only_one_replica_can_move_in_a_rebalance.patch