Sync metadata in 'rsync_then_merge' in db_replicator

In previous 'rsync_then_merge' remote objects are merged with
rsync'ed local objects, but remote metadata is not merged with local
one. Account/Container replicator sometimes uses rsync for db sync if
there is a big difference of record history in db files between 'local'
and 'remote' servers. If replicator needs to rsync local db to remote
but metadata in local db is older, older info of metadata can be
distributed then some metadata values can be missing or go back to older.

This patch fixes this problem by merging 'remote' metadata with
rsync'ed local db file.

Closes-Bug: #1570118

Change-Id: Icdf0a936fc456c5462471938cbc365bd012b05d4
This commit is contained in:
Daisuke Morita 2016-04-06 19:02:28 -07:00 committed by Tim Burke
parent 1d09a02481
commit 843184f3fe
3 changed files with 226 additions and 3 deletions

View File

@ -839,6 +839,7 @@ class ReplicatorRpc(object):
objects = existing_broker.get_items_since(point, 1000) objects = existing_broker.get_items_since(point, 1000)
sleep() sleep()
new_broker.newid(args[0]) new_broker.newid(args[0])
new_broker.update_metadata(existing_broker.metadata)
renamer(old_filename, db_file) renamer(old_filename, db_file)
return HTTPNoContent() return HTTPNoContent()

139
test/probe/test_db_replicator.py Executable file
View File

@ -0,0 +1,139 @@
# Copyright (c) 2010-2016 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import main
from uuid import uuid4
from swiftclient import client, ClientException
from test.probe.common import kill_server, ReplProbeTest, start_server
from swift.common import direct_client, utils
from swift.common.manager import Server
class TestDbUsyncReplicator(ReplProbeTest):
object_puts = 1 # Overridden in subclass to force rsync
def test_metadata_sync(self):
# Create container
container = 'container-%s' % uuid4()
client.put_container(self.url, self.token, container,
headers={'X-Storage-Policy': self.policy.name,
'X-Container-Meta-A': '1',
'X-Container-Meta-B': '1',
'X-Container-Meta-C': '1'})
cpart, cnodes = self.container_ring.get_nodes(self.account, container)
cnode = cnodes.pop()
# 2 of 3 container servers are temporarily down
for node in cnodes:
kill_server((node['ip'], node['port']),
self.ipport2server)
# Put some meta on the lone server, to make sure it's merged properly
# This will 503 (since we don't have a quorum), but we don't care (!)
try:
client.post_container(self.url, self.token, container,
headers={'X-Container-Meta-A': '2',
'X-Container-Meta-B': '2',
'X-Container-Meta-D': '2'})
except ClientException:
pass
# object updates come to only one container server
for _ in range(self.object_puts):
obj = 'object-%s' % uuid4()
client.put_object(self.url, self.token, container, obj, 'VERIFY')
# 2 container servers make comeback
for node in cnodes:
start_server((node['ip'], node['port']),
self.ipport2server)
# But, container-server which got object updates is down
kill_server((cnode['ip'], cnode['port']),
self.ipport2server)
# Metadata update will be applied to 2 container servers
# (equal to quorum)
client.post_container(self.url, self.token, container,
headers={'X-Container-Meta-B': '3',
'X-Container-Meta-E': '3'})
# container-server which got object updates makes comeback
start_server((cnode['ip'], cnode['port']),
self.ipport2server)
# other nodes have no objects
for node in cnodes:
resp_headers = direct_client.direct_head_container(
node, cpart, self.account, container)
self.assertIn(resp_headers.get('x-container-object-count'),
(None, '0', 0))
# If container-replicator on the node which got the object updates
# runs in first, db file may be replicated by rsync to other
# containers. In that case, the db file does not information about
# metadata, so metadata should be synced before replication
crepl = Server('container-replicator')
crepl.spawn(self.configs['container-replicator'][cnode['id'] + 1],
once=True)
crepl.interact()
expected_meta = {
'x-container-meta-a': '2',
'x-container-meta-b': '2',
'x-container-meta-c': '1',
'x-container-meta-d': '2',
}
# node that got the object updates still doesn't have the meta
resp_headers = direct_client.direct_head_container(
cnode, cpart, self.account, container)
for header, value in expected_meta.items():
self.assertIn(header, resp_headers)
self.assertEqual(value, resp_headers[header])
self.assertNotIn(resp_headers.get('x-container-object-count'),
(None, '0', 0))
expected_meta = {
'x-container-meta-a': '2',
'x-container-meta-b': '3',
'x-container-meta-c': '1',
'x-container-meta-d': '2',
'x-container-meta-e': '3',
}
# other nodes still have the meta, as well as objects
for node in cnodes:
resp_headers = direct_client.direct_head_container(
node, cpart, self.account, container)
for header, value in expected_meta.items():
self.assertIn(header, resp_headers)
self.assertEqual(value, resp_headers[header])
self.assertNotIn(resp_headers.get('x-container-object-count'),
(None, '0', 0))
class TestDbRsyncReplicator(TestDbUsyncReplicator):
def setUp(self):
super(TestDbRsyncReplicator, self).setUp()
cont_configs = [utils.readconf(p, 'container-replicator')
for p in self.configs['container-replicator'].values()]
# Do more than per_diff object PUTs, to force rsync instead of usync
self.object_puts = 1 + max(int(c.get('per_diff', '1000'))
for c in cont_configs)
if __name__ == '__main__':
main()

View File

@ -32,7 +32,7 @@ from six.moves import reload_module
from swift.container.backend import DATADIR from swift.container.backend import DATADIR
from swift.common import db_replicator from swift.common import db_replicator
from swift.common.utils import (normalize_timestamp, hash_path, from swift.common.utils import (normalize_timestamp, hash_path,
storage_directory) storage_directory, Timestamp)
from swift.common.exceptions import DriveNotMounted from swift.common.exceptions import DriveNotMounted
from swift.common.swob import HTTPException from swift.common.swob import HTTPException
@ -193,6 +193,7 @@ class FakeBroker(object):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.locked = False self.locked = False
self.metadata = {}
return None return None
@contextmanager @contextmanager
@ -1353,8 +1354,8 @@ class TestReplToNode(unittest.TestCase):
self.fake_info = {'id': 'a', 'point': -1, 'max_row': 20, 'hash': 'b', self.fake_info = {'id': 'a', 'point': -1, 'max_row': 20, 'hash': 'b',
'created_at': 100, 'put_timestamp': 0, 'created_at': 100, 'put_timestamp': 0,
'delete_timestamp': 0, 'count': 0, 'delete_timestamp': 0, 'count': 0,
'metadata': { 'metadata': json.dumps({
'Test': ('Value', normalize_timestamp(1))}} 'Test': ('Value', normalize_timestamp(1))})}
self.replicator.logger = mock.Mock() self.replicator.logger = mock.Mock()
self.replicator._rsync_db = mock.Mock(return_value=True) self.replicator._rsync_db = mock.Mock(return_value=True)
self.replicator._usync_db = mock.Mock(return_value=True) self.replicator._usync_db = mock.Mock(return_value=True)
@ -1398,6 +1399,18 @@ class TestReplToNode(unittest.TestCase):
self.assertEqual(self.replicator._rsync_db.call_count, 0) self.assertEqual(self.replicator._rsync_db.call_count, 0)
self.assertEqual(self.replicator._usync_db.call_count, 0) self.assertEqual(self.replicator._usync_db.call_count, 0)
def test_repl_to_node_metadata_update(self):
now = Timestamp(time.time()).internal
rmetadata = {"X-Container-Sysmeta-Test": ("XYZ", now)}
rinfo = {"id": 3, "point": -1, "max_row": 20, "hash": "b",
"metadata": json.dumps(rmetadata)}
self.http = ReplHttp(json.dumps(rinfo))
self.broker.get_sync()
self.assertEqual(self.replicator._repl_to_node(
self.fake_node, self.broker, '0', self.fake_info), True)
metadata = self.broker.metadata
self.assertEqual({}, metadata)
def test_repl_to_node_not_found(self): def test_repl_to_node_not_found(self):
self.http = ReplHttp('{"id": 3, "point": -1}', set_status=404) self.http = ReplHttp('{"id": 3, "point": -1}', set_status=404)
self.assertEqual(self.replicator._repl_to_node( self.assertEqual(self.replicator._repl_to_node(
@ -1614,6 +1627,76 @@ class TestReplicatorSync(unittest.TestCase):
parts = os.listdir(part_root) parts = os.listdir(part_root)
self.assertEqual(0, len(parts)) self.assertEqual(0, len(parts))
def test_rsync_then_merge(self):
# setup current db (and broker)
broker = self._get_broker('a', 'c', node_index=0)
part, node = self._get_broker_part_node(broker)
part = str(part)
put_timestamp = normalize_timestamp(time.time())
broker.initialize(put_timestamp)
put_metadata = {'example-meta': ['bah', put_timestamp]}
broker.update_metadata(put_metadata)
# sanity (re-open, and the db keeps the metadata)
broker = self._get_broker('a', 'c', node_index=0)
self.assertEqual(put_metadata, broker.metadata)
# create rsynced db in tmp dir
obj_hash = hash_path('a', 'c')
rsynced_db_broker = self.backend(
os.path.join(self.root, node['device'], 'tmp', obj_hash + '.db'),
account='a', container='b')
rsynced_db_broker.initialize(put_timestamp)
# do rysnc_then_merge
rpc = db_replicator.ReplicatorRpc(
self.root, self.datadir, self.backend, False)
response = rpc.dispatch((node['device'], part, obj_hash),
['rsync_then_merge', obj_hash + '.db', 'arg2'])
# sanity
self.assertEqual('204 No Content', response.status)
self.assertEqual(204, response.status_int)
# re-open the db
broker = self._get_broker('a', 'c', node_index=0)
# keep the metadata in existing db
self.assertEqual(put_metadata, broker.metadata)
def test_replicator_sync(self):
# setup current db (and broker)
broker = self._get_broker('a', 'c', node_index=0)
part, node = self._get_broker_part_node(broker)
part = str(part)
put_timestamp = normalize_timestamp(time.time())
broker.initialize(put_timestamp)
put_metadata = {'example-meta': ['bah', put_timestamp]}
sync_local_metadata = {
"meta1": ["data1", put_timestamp],
"meta2": ["data2", put_timestamp]}
broker.update_metadata(put_metadata)
# sanity (re-open, and the db keeps the metadata)
broker = self._get_broker('a', 'c', node_index=0)
self.assertEqual(put_metadata, broker.metadata)
# do rysnc_then_merge
rpc = db_replicator.ReplicatorRpc(
self.root, self.datadir, ExampleBroker, False)
response = rpc.sync(
broker, (broker.get_sync('id_') + 1, 12345, 'id_',
put_timestamp, put_timestamp, '0',
json.dumps(sync_local_metadata)))
# sanity
self.assertEqual('200 OK', response.status)
self.assertEqual(200, response.status_int)
# re-open the db
broker = self._get_broker('a', 'c', node_index=0)
# keep the both metadata in existing db and local db
expected = put_metadata.copy()
expected.update(sync_local_metadata)
self.assertEqual(expected, broker.metadata)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()