Implement Backup and Restore for CouchDB

The recommended method for doing full backups in CouchDB
has been a simple filesystem copy of the data files. This
is because CouchDB stores data in wholly contained append
only files. For example, when a user creates a database,
a corresponding <database-name>.couch file is created in
the database directory.

The backup functionality has been implemented by compressing
the database directory and then encrypting it and sending it
over to store in Swift. Similarly, the restore functionality
has been implemented by fetching the files from Swift and
uncompressing them into the database directory. After this,
the ownership of the directory needs to be updated.

To test the changes, follow the steps:
- Create a CouchDB instance
- Access the admin console called Futon using the following
  url: http://10.0.0.5:5984/_utils/
- Create a database from there and create one or more documents
- Create a backup of this CouchDB instance
- Create another CouchDB instance from the backup created above
- Access the admin console for this new instance and verify that
  the database created above is there

couchdb client library for the integration tests has been added
to global-requirements: https://review.openstack.org/#/c/285191/

Change-Id: Iad6d69bf60ace73825819081964a43ad53d6c6fc
Implements: blueprint couchdb-backup-restore
This commit is contained in:
Mariam John 2016-01-20 12:16:29 -06:00
parent 8d9af8c4c8
commit 65917f3ec0
10 changed files with 329 additions and 9 deletions

View File

@ -161,3 +161,9 @@ restore_namespace = trove.guestagent.strategies.restore.experimental.cassandra_i
# backup_strategy = DB2Backup
# backup_namespace = trove.guestagent.strategies.backup.experimental.db2_impl
# restore_namespace = trove.guestagent.strategies.restore.experimental.db2_impl
[couchdb]
#For CouchDB, the following are the defaults for backup and restore:
# backup_strategy = CouchDBBackup
# backup_namespace = trove.guestagent.strategies.backup.experimental.couchdb_impl
# restore_namespace = trove.guestagent.strategies.restore.experimental.couchdb_impl

View File

@ -24,3 +24,4 @@ redis>=2.10.0 # MIT
psycopg2>=2.5 # LGPL/ZPL
cassandra-driver>=2.1.4 # Apache-2.0
pycrypto>=2.6 # Public Domain
couchdb>=0.8 # Apache-2.0

View File

@ -1070,13 +1070,15 @@ couchdb_opts = [
help='Whether to provision a Cinder volume for datadir.'),
cfg.StrOpt('device_path', default='/dev/vdb',
help='Device path for volume if volume support is enabled.'),
cfg.StrOpt('backup_strategy', default=None,
cfg.StrOpt('backup_strategy', default='CouchDBBackup',
help='Default strategy to perform backups.'),
cfg.StrOpt('replication_strategy', default=None,
help='Default strategy for replication.'),
cfg.StrOpt('backup_namespace', default=None,
cfg.StrOpt('backup_namespace', default='trove.guestagent.strategies'
'.backup.experimental.couchdb_impl',
help='Namespace to load backup strategies from.'),
cfg.StrOpt('restore_namespace', default=None,
cfg.StrOpt('restore_namespace', default='trove.guestagent.strategies'
'.restore.experimental.couchdb_impl',
help='Namespace to load restore strategies from.'),
cfg.DictOpt('backup_incremental_strategy', default={},
help='Incremental Backup Runner based on the default '

View File

@ -17,11 +17,13 @@ import os
from oslo_log import log as logging
from trove.common.i18n import _
from trove.common import instance as rd_instance
from trove.guestagent import backup
from trove.guestagent.datastore.experimental.couchdb import service
from trove.guestagent.datastore import manager
from trove.guestagent import volume
LOG = logging.getLogger(__name__)
@ -59,6 +61,8 @@ class Manager(manager.Manager):
self.app.start_db()
self.app.change_permissions()
self.app.make_host_reachable()
if backup_info:
self._perform_restore(backup_info, context, mount_point)
def stop_db(self, context, do_not_start_on_reboot=False):
"""
@ -81,3 +85,23 @@ class Manager(manager.Manager):
def start_db_with_conf_changes(self, context, config_contents):
LOG.debug("Starting CouchDB with configuration changes.")
self.app.start_db_with_conf_changes(config_contents)
def _perform_restore(self, backup_info, context, restore_location):
"""
Restores all CouchDB databases and their documents from the
backup.
"""
LOG.info(_("Restoring database from backup %s") %
backup_info['id'])
try:
backup.restore(context, backup_info, restore_location)
except Exception:
LOG.exception(_("Error performing restore from backup %s") %
backup_info['id'])
self.status.set_status(rd_instance.ServiceStatuses.FAILED)
raise
LOG.info(_("Restored database successfully"))
def create_backup(self, context, backup_info):
LOG.debug("Creating backup for CouchDB.")
backup.backup(context, backup_info)

View File

@ -0,0 +1,35 @@
# Copyright 2016 IBM Corporation
#
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from trove.guestagent.datastore.experimental.couchdb import service
from trove.guestagent.strategies.backup import base
class CouchDBBackup(base.BackupRunner):
__strategy_name__ = 'couchdbbackup'
@property
def cmd(self):
"""
CouchDB backup is based on a simple filesystem copy of the database
files. Each database is a single fully contained append only file.
For example, if a user creates a database 'foo', then a corresponding
'foo.couch' file will be created in the database directory which by
default is in '/var/lib/couchdb'.
"""
cmd = 'sudo tar cpPf - ' + service.COUCHDB_LIB_DIR
return cmd + self.zip_cmd + self.encrypt_cmd

View File

@ -0,0 +1,41 @@
# Copyright 2016 IBM Corporation
#
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from trove.guestagent.common import operating_system
from trove.guestagent.datastore.experimental.couchdb import service
from trove.guestagent.strategies.restore import base
class CouchDBBackup(base.RestoreRunner):
__strategy_name__ = 'couchdbbackup'
base_restore_cmd = 'sudo tar xPf -'
def __init__(self, *args, **kwargs):
self.appStatus = service.CouchDBAppStatus()
self.app = service.CouchDBApp(self.appStatus)
super(CouchDBBackup, self).__init__(*args, **kwargs)
def post_restore(self):
"""
To restore from backup, all we need to do is untar the compressed
database files into the database directory and change its ownership.
"""
operating_system.chown(service.COUCHDB_LIB_DIR,
'couchdb',
'couchdb',
as_root=True)
self.app.restart()

View File

@ -203,7 +203,7 @@ register(["cassandra_supported"], common_groups,
backup_groups, configuration_groups, cluster_actions_groups)
register(["couchbase_supported"], common_groups, backup_groups,
root_actions_groups)
register(["couchdb_supported"], common_groups)
register(["couchdb_supported"], common_groups, backup_groups)
register(["postgresql_supported"], common_groups,
backup_groups, database_actions_groups, configuration_groups,
root_actions_groups, user_actions_groups)

View File

@ -0,0 +1,87 @@
# Copyright 2016 IBM Corporation
#
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import couchdb
from trove.tests.scenario.helpers.test_helper import TestHelper
from trove.tests.scenario.runners.test_runners import TestRunner
class CouchdbHelper(TestHelper):
def __init__(self, expected_override_name):
super(CouchdbHelper, self).__init__(expected_override_name)
self._data_cache = dict()
self.field_name = 'ff-%s'
self.database = 'foodb'
def create_client(self, host, *args, **kwargs):
url = 'http://' + host + ':5984/'
server = couchdb.Server(url)
return server
def add_actual_data(self, data_label, data_start, data_size, host,
*args, **kwargs):
client = self.get_client(host, *args, **kwargs)
db = client.create(self.database + '_' + data_label)
doc = {}
doc_id, doc_rev = db.save(doc)
data = self._get_dataset(data_size)
doc = db.get(doc_id)
for value in data:
key = self.field_name % value
doc[key] = value
db.save(doc)
def _get_dataset(self, data_size):
cache_key = str(data_size)
if cache_key in self._data_cache:
return self._data_cache.get(cache_key)
data = self._generate_dataset(data_size)
self._data_cache[cache_key] = data
return data
def _generate_dataset(self, data_size):
return range(1, data_size + 1)
def remove_actual_data(self, data_label, data_start, data_size, host,
*args, **kwargs):
client = self.get_client(host)
db = client[self.database + "_" + data_label]
client.delete(db)
def verify_actual_data(self, data_label, data_start, data_size, host,
*args, **kwargs):
expected_data = self._get_dataset(data_size)
client = self.get_client(host, *args, **kwargs)
db = client[self.database + '_' + data_label]
actual_data = []
TestRunner.assert_equal(len(db), 1)
for i in db:
items = db[i].items()
actual_data = ([value for key, value in items
if key not in ['_id', '_rev']])
TestRunner.assert_equal(len(expected_data),
len(actual_data),
"Unexpected number of result rows.")
for expected_row in expected_data:
TestRunner.assert_true(expected_row in actual_data,
"Row not found in the result set: %s"
% expected_row)

View File

@ -60,7 +60,10 @@ BACKUP_DB2_CLS = ("trove.guestagent.strategies.backup."
"experimental.db2_impl.DB2Backup")
RESTORE_DB2_CLS = ("trove.guestagent.strategies.restore."
"experimental.db2_impl.DB2Backup")
BACKUP_COUCHDB_BACKUP_CLS = ("trove.guestagent.strategies.backup."
"experimental.couchdb_impl.CouchDBBackup")
RESTORE_COUCHDB_BACKUP_CLS = ("trove.guestagent.strategies.restore."
"experimental.couchdb_impl.CouchDBBackup")
PIPE = " | "
ZIP = "gzip"
@ -106,6 +109,9 @@ REDISBACKUP_RESTORE = "tee /var/lib/redis/dump.rdb"
DB2BACKUP_CMD = "sudo tar cPf - /home/db2inst1/db2inst1/backup"
DB2BACKUP_RESTORE = "sudo tar xPf -"
COUCHDB_BACKUP_CMD = "sudo tar cpPf - /var/lib/couchdb"
COUCHDB_RESTORE_CMD = "sudo tar xPf -"
class GuestAgentBackupTest(trove_testtools.TestCase):
@ -465,6 +471,39 @@ class GuestAgentBackupTest(trove_testtools.TestCase):
self.assertEqual(restr.restore_cmd,
DECRYPT + PIPE + UNZIP + PIPE + DB2BACKUP_RESTORE)
def test_backup_encrypted_couchdbbackup_command(self):
backupBase.BackupRunner.encrypt_key = CRYPTO_KEY
RunnerClass = utils.import_class(BACKUP_COUCHDB_BACKUP_CLS)
bkp = RunnerClass(12345)
self.assertIsNotNone(bkp)
self.assertEqual(
COUCHDB_BACKUP_CMD + PIPE + ZIP + PIPE + ENCRYPT, bkp.command)
self.assertIn("gz.enc", bkp.manifest)
def test_backup_not_encrypted_couchdbbackup_command(self):
backupBase.BackupRunner.is_encrypted = False
backupBase.BackupRunner.encrypt_key = CRYPTO_KEY
RunnerClass = utils.import_class(BACKUP_COUCHDB_BACKUP_CLS)
bkp = RunnerClass(12345)
self.assertIsNotNone(bkp)
self.assertEqual(COUCHDB_BACKUP_CMD + PIPE + ZIP, bkp.command)
self.assertIn("gz", bkp.manifest)
def test_restore_decrypted_couchdbbackup_command(self):
restoreBase.RestoreRunner.is_encrypted = False
RunnerClass = utils.import_class(RESTORE_COUCHDB_BACKUP_CLS)
restr = RunnerClass(None, restore_location="/var/lib/couchdb",
location="filename", checksum="md5")
self.assertEqual(UNZIP + PIPE + COUCHDB_RESTORE_CMD, restr.restore_cmd)
def test_restore_encrypted_couchdbbackup_command(self):
restoreBase.RestoreRunner.decrypt_key = CRYPTO_KEY
RunnerClass = utils.import_class(RESTORE_COUCHDB_BACKUP_CLS)
restr = RunnerClass(None, restore_location="/var/lib/couchdb",
location="filename", checksum="md5")
self.assertEqual(DECRYPT + PIPE + UNZIP + PIPE + COUCHDB_RESTORE_CMD,
restr.restore_cmd)
class CassandraBackupTest(trove_testtools.TestCase):
@ -910,3 +949,72 @@ class DB2RestoreTests(trove_testtools.TestCase):
self.restore_runner.post_restore = mock.Mock()
self.assertRaises(exception.ProcessExecutionError,
self.restore_runner.restore)
class CouchDBBackupTests(trove_testtools.TestCase):
def setUp(self):
super(CouchDBBackupTests, self).setUp()
self.backup_runner = utils.import_class(BACKUP_COUCHDB_BACKUP_CLS)
self.backup_runner_patch = patch.multiple(
self.backup_runner, _run=DEFAULT,
_run_pre_backup=DEFAULT, _run_post_backup=DEFAULT)
def tearDown(self):
super(CouchDBBackupTests, self).tearDown()
self.backup_runner_patch.stop()
def test_backup_success(self):
backup_runner_mocks = self.backup_runner_patch.start()
with self.backup_runner(12345):
pass
backup_runner_mocks['_run_pre_backup'].assert_called_once_with()
backup_runner_mocks['_run'].assert_called_once_with()
backup_runner_mocks['_run_post_backup'].assert_called_once_with()
def test_backup_failed_due_to_run_backup(self):
backup_runner_mocks = self.backup_runner_patch.start()
backup_runner_mocks['_run'].configure_mock(
side_effect=exception.TroveError('test')
)
with ExpectedException(exception.TroveError, 'test'):
with self.backup_runner(12345):
pass
backup_runner_mocks['_run_pre_backup'].assert_called_once_with()
backup_runner_mocks['_run'].assert_called_once_with()
self.assertEqual(0, backup_runner_mocks['_run_post_backup'].call_count)
class CouchDBRestoreTests(trove_testtools.TestCase):
def setUp(self):
super(CouchDBRestoreTests, self).setUp()
self.restore_runner = utils.import_class(
RESTORE_COUCHDB_BACKUP_CLS)(
'swift', location='http://some.where',
checksum='True_checksum',
restore_location='/tmp/somewhere')
def tearDown(self):
super(CouchDBRestoreTests, self).tearDown()
def test_restore_success(self):
expected_content_length = 123
self.restore_runner._run_restore = mock.Mock(
return_value=expected_content_length)
self.restore_runner.pre_restore = mock.Mock()
self.restore_runner.post_restore = mock.Mock()
actual_content_length = self.restore_runner.restore()
self.assertEqual(
expected_content_length, actual_content_length)
def test_restore_failed_due_to_run_restore(self):
self.restore_runner.pre_restore = mock.Mock()
self.restore_runner._run_restore = mock.Mock(
side_effect=exception.ProcessExecutionError('Error'))
self.restore_runner.post_restore = mock.Mock()
self.assertRaises(exception.ProcessExecutionError,
self.restore_runner.restore)

View File

@ -19,6 +19,7 @@ from mock import patch
from oslo_utils import netutils
from trove.common.instance import ServiceStatuses
from trove.guestagent import backup
from trove.guestagent.datastore.experimental.couchdb import (
manager as couchdb_manager)
from trove.guestagent.datastore.experimental.couchdb import (
@ -56,6 +57,7 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase):
self.original_get_ip = netutils.get_my_ipv4
self.orig_make_host_reachable = (
couchdb_service.CouchDBApp.make_host_reachable)
self.orig_backup_restore = backup.restore
def tearDown(self):
super(GuestAgentCouchDBManagerTest, self).tearDown()
@ -71,6 +73,7 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase):
netutils.get_my_ipv4 = self.original_get_ip
couchdb_service.CouchDBApp.make_host_reachable = (
self.orig_make_host_reachable)
backup.restore = self.orig_backup_restore
def test_update_status(self):
mock_status = MagicMock()
@ -85,6 +88,7 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase):
mock_app = MagicMock()
self.manager.appStatus = mock_status
self.manager.app = mock_app
mount_point = '/var/lib/couchdb'
mock_status.begin_install = MagicMock(return_value=None)
mock_app.install_if_needed = MagicMock(return_value=None)
@ -97,6 +101,12 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase):
volume.VolumeDevice.migrate_data = MagicMock(return_value=None)
volume.VolumeDevice.mount = MagicMock(return_value=None)
volume.VolumeDevice.mount_points = MagicMock(return_value=[])
backup.restore = MagicMock(return_value=None)
backup_info = {'id': backup_id,
'location': 'fake-location',
'type': 'CouchDBBackup',
'checksum': 'fake-checksum'} if backup_id else None
with patch.object(pkg.Package, 'pkg_is_installed',
return_value=MagicMock(
@ -106,16 +116,19 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase):
databases=None,
memory_mb='2048', users=None,
device_path=device_path,
mount_point="/var/lib/couchdb",
backup_info=None,
mount_point=mount_point,
backup_info=backup_info,
overrides=None,
cluster_config=None)
# verification/assertion
mock_status.begin_install.assert_any_call()
mock_app.install_if_needed.assert_any_call(packages)
mock_app.make_host_reachable.assert_any_call()
mock_app.change_permissions.assert_any_call()
if backup_id:
backup.restore.assert_any_call(self.context,
backup_info,
mount_point)
def test_prepare_pkg(self):
self._prepare_dynamic(['couchdb'])
@ -123,6 +136,9 @@ class GuestAgentCouchDBManagerTest(trove_testtools.TestCase):
def test_prepare_no_pkg(self):
self._prepare_dynamic([])
def test_prepare_from_backup(self):
self._prepare_dynamic(['couchdb'], backup_id='123abc456')
def test_restart(self):
mock_status = MagicMock()
self.manager.appStatus = mock_status