From 4337b753f5899377b93258f2a631b394fde221cd Mon Sep 17 00:00:00 2001 From: "Erlon R. Cruz" Date: Tue, 17 Nov 2020 18:19:43 -0300 Subject: [PATCH] Fix swift replication errors There are several errors related to the swift replication service. The swift storage charm is not properly restarting the services after configuration changes, the correct object_lockup_timeout value (that per the behaviour observerd in our environments must be greater than object_rsync_timeout) and we also needed to fix the object replicator config file to honor the object-handoffs-first configuration. This patch along with the swift proxy-change should fix the currently known replication problems. Closes-bug: #1903762 Depends-on: I87eb23de94e3f2f5b06d44df1f8bd9d2324456a0 Change-Id: I87eb23de94e3f2f5b06d44df1f8bd9d2324c8470 --- lib/swift_storage_context.py | 7 +++++-- lib/swift_storage_utils.py | 20 +++++++++++++------- templates/object-server-replicator.conf | 7 +++++++ unit_tests/test_swift_storage_context.py | 2 +- unit_tests/test_swift_storage_utils.py | 5 ++++- 5 files changed, 30 insertions(+), 11 deletions(-) diff --git a/lib/swift_storage_context.py b/lib/swift_storage_context.py index ae07a6b..ea1d477 100644 --- a/lib/swift_storage_context.py +++ b/lib/swift_storage_context.py @@ -108,10 +108,13 @@ class SwiftStorageServerContext(OSContextGenerator): 'statsd_sample_rate': config('statsd-sample-rate'), } - # ensure lockup_timeout > rsync_timeout. See bug 1575277 + # Per the behavior listed on LB#1575277, object_lockup_timeout should + # be smaller than object_rsync_timeout. But we have hit issues any time + # this value is smaller or equal to '2 * object_rsync_timeout', so we + # set it to a value a bit bigger than that. ctxt['object_lockup_timeout'] = max( config('object-lockup-timeout'), - 2*ctxt['object_rsync_timeout'] + 2*ctxt['object_rsync_timeout'] + 10 ) if config('node-timeout'): diff --git a/lib/swift_storage_utils.py b/lib/swift_storage_utils.py index d28f99a..a97d902 100644 --- a/lib/swift_storage_utils.py +++ b/lib/swift_storage_utils.py @@ -169,12 +169,15 @@ SWIFT_SVCS = ( RESTART_MAP = { '/etc/rsync-juju.d/050-swift-storage.conf': ['rsync'], - '/etc/swift/account-server.conf': ACCOUNT_SVCS, - '/etc/swift/account-server/replicator.conf': ACCOUNT_SVCS_REP, - '/etc/swift/container-server.conf': CONTAINER_SVCS, - '/etc/swift/container-server/replicator.conf': CONTAINER_SVCS_REP, - '/etc/swift/object-server.conf': OBJECT_SVCS, - '/etc/swift/object-server/replicator.conf': OBJECT_SVCS_REP, + '/etc/swift/account-server.conf': ACCOUNT_SVCS + ACCOUNT_SVCS_REP, + '/etc/swift/account-server/account-server-replicator.conf': + ACCOUNT_SVCS + ACCOUNT_SVCS_REP, + '/etc/swift/container-server.conf': CONTAINER_SVCS + CONTAINER_SVCS_REP, + '/etc/swift/container-server/container-server-replicator.conf': + CONTAINER_SVCS + CONTAINER_SVCS_REP, + '/etc/swift/object-server.conf': OBJECT_SVCS + OBJECT_SVCS_REP, + '/etc/swift/object-server/object-server-replicator.conf': + OBJECT_SVCS + OBJECT_SVCS_REP, '/etc/swift/swift.conf': SWIFT_SVCS } @@ -830,7 +833,10 @@ def setup_ufw(): ports = [config('object-server-port'), config('container-server-port'), - config('account-server-port')] + config('account-server-port'), + config('object-server-port-rep'), + config('container-server-port-rep'), + config('account-server-port-rep')] # Storage peers allowed_hosts = RsyncContext()().get('allowed_hosts', '').split(' ') diff --git a/templates/object-server-replicator.conf b/templates/object-server-replicator.conf index 70ae5fb..0192141 100644 --- a/templates/object-server-replicator.conf +++ b/templates/object-server-replicator.conf @@ -25,3 +25,10 @@ replication_server = true concurrency = {{ object_replicator_concurrency }} rsync_timeout = {{ object_rsync_timeout }} lockup_timeout = {{ object_lockup_timeout }} +{% if object_handoffs_first %} +handoffs_first = True +{% endif %} +{% if http_timeout -%} +http_timeout = {{ http_timeout }} +{%- endif %} + diff --git a/unit_tests/test_swift_storage_context.py b/unit_tests/test_swift_storage_context.py index 32e6b40..8c055d4 100644 --- a/unit_tests/test_swift_storage_context.py +++ b/unit_tests/test_swift_storage_context.py @@ -88,7 +88,7 @@ class SwiftStorageContextTests(CharmTestCase): result = ctxt() ex = { 'object_rsync_timeout': 1000, - 'object_lockup_timeout': 2000, + 'object_lockup_timeout': 2010, } self.assertDictContainsSubset(ex, result) diff --git a/unit_tests/test_swift_storage_utils.py b/unit_tests/test_swift_storage_utils.py index fbcdbf0..85cb287 100644 --- a/unit_tests/test_swift_storage_utils.py +++ b/unit_tests/test_swift_storage_utils.py @@ -738,10 +738,13 @@ class SwiftStorageUtilsTests(CharmTestCase): peer_addr_1 = '10.1.1.1' peer_addr_2 = '10.1.1.2' client_addrs = ['10.3.3.1', '10.3.3.2', '10.3.3.3', 'ubuntu.com'] - ports = [6660, 6661, 6662] + ports = [6660, 6661, 6662, 6670, 6671, 6672] self.test_config.set('object-server-port', ports[0]) self.test_config.set('container-server-port', ports[1]) self.test_config.set('account-server-port', ports[2]) + self.test_config.set('object-server-port-rep', ports[3]) + self.test_config.set('container-server-port-rep', ports[4]) + self.test_config.set('account-server-port-rep', ports[5]) RelatedUnits = namedtuple('RelatedUnits', 'rid, unit') self.iter_units_for_relation_name.return_value = [ RelatedUnits(rid='rid:1', unit='unit/1'),