Implement full online backups for DB2

Currently Trove supports full offline backups for DB2. In this
implementation, we have added support for full online backups for DB2
using archival logging.

Change-Id: I30b5b0b85120fd105cc3db57983b062fad5fab5a
Implements: blueprint db2-online-backup
This commit is contained in:
Mariam John 2016-09-01 09:10:44 -07:00 committed by Peter Stachowski
parent 2978a402ec
commit ff2a52038b
14 changed files with 333 additions and 121 deletions

View File

@ -156,7 +156,7 @@ restore_namespace = trove.guestagent.strategies.restore.experimental.cassandra_i
[db2]
# For db2, the following are the defaults for backup, and restore:
# backup_strategy = DB2Backup
# backup_strategy = DB2OfflineBackup
# backup_namespace = trove.guestagent.strategies.backup.experimental.db2_impl
# restore_namespace = trove.guestagent.strategies.restore.experimental.db2_impl

View File

@ -0,0 +1,14 @@
---
features:
- Add support for full online backup and restore for DB2
Express-C by enabling archive logging.
other:
- In Mitaka release, support was added for full offline
backup and restore using the default circular logging.
In this release, the name of the strategy for offline
backup and restore was changed from DB2Backup to
DB2OfflineBackup. Hence, to enable offline backups, we
should set backup_strategy=DB2OfflineBackup and for
online backups, backup_strategy=DB2OnlineBackup. The
property backup_namespace and restore_namespace will
be the same for both types of backup and restore.

View File

@ -1275,7 +1275,7 @@ db2_opts = [
help='Whether to provision a Cinder volume for datadir.'),
cfg.StrOpt('device_path', default='/dev/vdb',
help='Device path for volume if volume support is enabled.'),
cfg.StrOpt('backup_strategy', default='DB2Backup',
cfg.StrOpt('backup_strategy', default='DB2OfflineBackup',
help='Default strategy to perform backups.'),
cfg.StrOpt('replication_strategy', default=None,
help='Default strategy for replication.'),

View File

@ -497,6 +497,11 @@ class InsufficientSpaceForReplica(TroveError):
"of data.")
class InsufficientSpaceForBackup(TroveError):
message = _("The instance has only %(free)sG free while the estimated "
"backup size is %(backup_size)sG.")
class ReplicaSourceDeleteForbidden(Forbidden):
message = _("The replica source cannot be deleted without detaching the "
"replicas.")

View File

@ -370,3 +370,29 @@ def format_output(message, format_len=79, truncate_len=None, replace_index=0):
if truncate_len and len(msg_str) > truncate_len:
msg_str = msg_str[:truncate_len - 3] + '...'
return msg_str
def to_gb(bytes):
"""
This was moved from dbaas.py so that it could be used as
widely as a utility function. The tests corresponding to
this were also moved out from test_dbaas.py to test_utils.py.
"""
if bytes == 0:
return 0.0
size = bytes / 1024.0 ** 3
# Make sure we don't return 0.0 if the size is greater than 0
return max(round(size, 2), 0.01)
def to_mb(bytes):
"""
This was moved from dbaas.py so that it could be used as
widely as a utility function. The tests corresponding to
this were also moved out from test_dbaas.py to test_utils.py.
"""
if bytes == 0:
return 0.0
size = bytes / 1024.0 ** 2
# Make sure we don't return 0.0 if the size is greater than 0
return max(round(size, 2), 0.01)

View File

@ -54,6 +54,14 @@ class DB2App(object):
self.status = status
self.dbm_default_config = {}
self.init_config()
'''
If DB2 guest agent has been configured for online backups,
every database that is created will be configured for online
backups. Since online backups are done using archive logging,
we need to create a directory to store the archived logs.
'''
if CONF.db2.backup_strategy == 'DB2OnlineBackup':
create_db2_dir(system.DB2_ARCHIVE_LOGS_DIR)
def init_config(self):
if not operating_system.exists(MOUNT_POINT, True):
@ -295,6 +303,20 @@ def run_command(command, superuser=system.DB2_INSTANCE_OWNER,
command, timeout=timeout)
def create_db2_dir(dir_name):
if not operating_system.exists(dir_name, True):
operating_system.create_directory(dir_name,
system.DB2_INSTANCE_OWNER,
system.DB2_INSTANCE_OWNER,
as_root=True)
def remove_db2_dir(dir_name):
operating_system.remove(dir_name,
force=True,
as_root=True)
class DB2Admin(object):
"""
Handles administrative tasks on the DB2 instance.
@ -316,6 +338,26 @@ class DB2Admin(object):
"There was an error creating database: %s.") % dbName)
db_create_failed.append(dbName)
pass
'''
Configure each database to do archive logging for online
backups. Once the database is configured, it will go in to a
BACKUP PENDING state. In this state, the database will not
be accessible for any operations. To get the database back to
normal mode, we have to do a full offline backup as soon as we
configure it for archive logging.
'''
try:
if CONF.db2.backup_strategy == 'DB2OnlineBackup':
run_command(system.UPDATE_DB_LOG_CONFIGURATION % {
'dbname': dbName})
run_command(system.RECOVER_FROM_BACKUP_PENDING_MODE % {
'dbname': dbName})
except exception:
LOG.exception(_(
"There was an error while configuring the database for "
"online backup: %s.") % dbName)
if len(db_create_failed) > 0:
LOG.exception(_("Creating the following databases failed: %s.") %
db_create_failed)

View File

@ -13,8 +13,14 @@
# License for the specific language governing permissions and limitations
# under the License.
from trove.common import cfg
CONF = cfg.CONF
TIMEOUT = 1200
DB2_INSTANCE_OWNER = "db2inst1"
MOUNT_POINT = CONF.db2.mount_point
DB2_BACKUP_DIR = MOUNT_POINT + "/backup"
DB2_ARCHIVE_LOGS_DIR = MOUNT_POINT + "/ArchiveLogs"
UPDATE_HOSTNAME = (
'source /home/db2inst1/sqllib/db2profile;'
'db2set -g DB2SYSTEM="$(hostname)"')
@ -50,14 +56,34 @@ LIST_DB_USERS = (
"db2 +o connect to %(dbname)s; "
"db2 -x select grantee, dataaccessauth from sysibm.sysdbauth; "
"db2 connect reset")
BACKUP_DB = "db2 backup database %(dbname)s to %(dir)s"
RESTORE_DB = (
"db2 restore database %(dbname)s from %(dir)s")
OFFLINE_BACKUP_DB = "db2 backup database %(dbname)s to " + DB2_BACKUP_DIR
RESTORE_OFFLINE_DB = (
"db2 restore database %(dbname)s from " + DB2_BACKUP_DIR)
GET_DB_SIZE = (
"db2 connect to %(dbname)s;"
"db2 call get_dbsize_info(?, ?, ?, -1) ")
"db2 +o connect to %(dbname)s;"
"db2 call get_dbsize_info\(?, ?, ?, -1\) | "
"grep -A1 'DATABASESIZE' | grep 'Parameter Value' | sed 's/.*[:]//' |"
" tr -d '\n'; db2 +o connect reset")
GET_DB_NAMES = ("find /home/db2inst1/db2inst1/backup/ -type f -name '*.001' |"
" grep -Po \"(?<=backup/)[^.']*(?=\.)\"")
GET_DBM_CONFIGURATION = "db2 get dbm configuration > %(dbm_config)s"
UPDATE_DBM_CONFIGURATION = ("db2 update database manager configuration using "
"%(parameter)s %(value)s")
UPDATE_DB_LOG_CONFIGURATION = (
"db2 update database configuration for "
"%(dbname)s using LOGARCHMETH1 'DISK:" + DB2_ARCHIVE_LOGS_DIR + "'")
LOG_UTILIZATION = (
"db2 +o connect to %(dbname)s;"
"db2 -x SELECT TOTAL_LOG_USED_KB FROM SYSIBMADM.LOG_UTILIZATION | "
"tr -d '\n';db2 +o connect reset")
ONLINE_BACKUP_DB = (
"db2 backup database %(dbname)s ONLINE to " +
DB2_BACKUP_DIR + " INCLUDE LOGS")
RESTORE_ONLINE_DB = (
"db2 RESTORE DATABASE %(dbname)s FROM " + DB2_BACKUP_DIR
+ " LOGTARGET " + DB2_ARCHIVE_LOGS_DIR)
ROLL_FORWARD_DB = (
"db2 ROLLFORWARD DATABASE %(dbname)s TO END OF BACKUP "
"AND COMPLETE OVERFLOW LOG PATH '(" + DB2_ARCHIVE_LOGS_DIR + ")'")
RECOVER_FROM_BACKUP_PENDING_MODE = (
"db2 backup database %(dbname)s to /dev/null")

View File

@ -30,6 +30,7 @@ from oslo_log import log as logging
from trove.common import cfg
from trove.common.i18n import _
from trove.common import utils
LOG = logging.getLogger(__name__)
@ -71,22 +72,6 @@ def datastore_registry():
get_custom_managers().items()))
def to_gb(bytes):
if bytes == 0:
return 0.0
size = bytes / 1024.0 ** 3
# Make sure we don't return 0.0 if the size is greater than 0
return max(round(size, 2), 0.01)
def to_mb(bytes):
if bytes == 0:
return 0.0
size = bytes / 1024.0 ** 2
# Make sure we don't return 0.0 if the size is greater than 0
return max(round(size, 2), 0.01)
def get_filesystem_volume_stats(fs_path):
try:
stats = os.statvfs(fs_path)
@ -97,8 +82,8 @@ def get_filesystem_volume_stats(fs_path):
total = stats.f_blocks * stats.f_bsize
free = stats.f_bfree * stats.f_bsize
# return the size in GB
used_gb = to_gb(total - free)
total_gb = to_gb(total)
used_gb = utils.to_gb(total - free)
total_gb = utils.to_gb(total)
output = {
'block_size': stats.f_bsize,

View File

@ -14,69 +14,26 @@
# under the License.
from oslo_log import log as logging
from trove.common import cfg
from trove.common import exception
from trove.common.i18n import _
from trove.common import utils
from trove.guestagent.common import operating_system
from trove.guestagent.datastore.experimental.db2 import service
from trove.guestagent.datastore.experimental.db2 import system
from trove.guestagent.db import models
from trove.guestagent.strategies.backup import base
CONF = cfg.CONF
DB2_DBPATH = CONF.db2.mount_point
DB2_BACKUP_DIR = DB2_DBPATH + "/backup"
LOG = logging.getLogger(__name__)
class DB2Backup(base.BackupRunner):
"""Implementation of Backup Strategy for DB2."""
__Strategy_name__ = 'db2backup'
"""
Base class for DB2 backups
"""
def __init__(self, *args, **kwargs):
self.admin = service.DB2Admin()
super(DB2Backup, self).__init__(*args, **kwargs)
def _run_pre_backup(self):
"""Create archival contents in dump dir"""
try:
est_dump_size = self.estimate_dump_size()
avail = operating_system.get_bytes_free_on_fs(DB2_DBPATH)
if est_dump_size > avail:
self.cleanup()
raise OSError(_("Need more free space to backup db2 database,"
" estimated %(est_dump_size)s"
" and found %(avail)s bytes free ") %
{'est_dump_size': est_dump_size,
'avail': avail})
operating_system.create_directory(DB2_BACKUP_DIR,
system.DB2_INSTANCE_OWNER,
system.DB2_INSTANCE_OWNER,
as_root=True)
service.run_command(system.QUIESCE_DB2)
dbNames = self.list_dbnames()
for dbName in dbNames:
service.run_command(system.BACKUP_DB % {
'dbname': dbName, 'dir': DB2_BACKUP_DIR})
service.run_command(system.UNQUIESCE_DB2)
except exception.ProcessExecutionError:
LOG.debug("Caught exception when preparing the directory")
self.cleanup()
raise
@property
def cmd(self):
cmd = 'sudo tar cPf - ' + DB2_BACKUP_DIR
return cmd + self.zip_cmd + self.encrypt_cmd
def cleanup(self):
operating_system.remove(DB2_BACKUP_DIR, force=True, as_root=True)
def _run_post_backup(self):
self.cleanup()
self.admin = service.DB2Admin()
self.databases = self.list_dbnames()
def list_dbnames(self):
dbNames = []
@ -87,20 +44,135 @@ class DB2Backup(base.BackupRunner):
dbNames.append(mydb.name)
return dbNames
def estimate_dump_size(self):
def estimate_backup_size(self):
"""
Estimating the size of the backup based on the size of the data
returned from the get_db_size procedure. The size of the
backup is always going to be smaller than the size of the data.
"""
try:
dbs = self.list_dbnames()
size = 0
for dbname in dbs:
for dbname in self.databases:
out = service.run_command(system.GET_DB_SIZE % {'dbname':
dbname})
size = size + out
size = size + int(out[0])
except exception.ProcessExecutionError:
LOG.debug("Error while trying to get db size info")
LOG.exception(_("An error occured while trying to "
"estimate backup size"))
LOG.debug("Estimated size for databases: " + str(size))
return size
def estimate_log_size(self):
return 0.0
def run_backup(self):
pass
def execute_backup_cmd(self, backup_command):
service.create_db2_dir(system.DB2_BACKUP_DIR)
for dbName in self.databases:
service.run_command(backup_command % {'dbname': dbName})
def _run_pre_backup(self):
"""
Before performing the actual backup we need to make sure that
there is enough space to store the backups. The backup size
is the sum of the size of the databases and if it is an online
backup, the size of the archived logs is also factored in.
"""
backup_size_bytes = self.estimate_backup_size()
log_size_bytes = self.estimate_log_size()
total_backup_size_gb = utils.to_gb(backup_size_bytes + log_size_bytes)
free_bytes = operating_system.get_bytes_free_on_fs(system.MOUNT_POINT)
free_gb = utils.to_gb(free_bytes)
if total_backup_size_gb > free_gb:
raise exception.InsufficientSpaceForBackup % {
'backup_size': total_backup_size_gb,
'free': free_gb
}
self.run_backup()
@property
def cmd(self):
cmd = 'sudo tar cPf - ' + system.DB2_BACKUP_DIR
return cmd + self.zip_cmd + self.encrypt_cmd
def cleanup(self):
service.remove_db2_dir(system.DB2_BACKUP_DIR)
def _run_post_backup(self):
self.cleanup()
class DB2OnlineBackup(DB2Backup):
"""
Implementation of Online Backup Strategy for DB2
using archive logging.
"""
__strategy_name__ = 'db2onlinebackup'
def __init__(self, *args, **kwargs):
super(DB2OnlineBackup, self).__init__(*args, **kwargs)
def estimate_log_size(self):
"""
Estimate the log utilization for all databases. The LOG_UTILIZATION
administrative view returns information about log utilization for the
connected database. The TOTAL_LOG_USED_KB returns the log utilization
in KB.
"""
log_size = 0
try:
for dbname in self.databases:
out = service.run_command(
system.LOG_UTILIZATION % {'dbname': dbname})
log_size = log_size + int(out[0])
log_size = log_size * 1024
except exception.ProcessExecutionError:
LOG.exception(_("An error occured while trying to estimate log "
"size"))
LOG.debug("Estimated log size for all databases: " + str(log_size))
return log_size
def run_backup(self):
try:
self.execute_backup_cmd(system.ONLINE_BACKUP_DB)
except exception.ProcessExecutionError:
LOG.exception(_("An exception occurred while doing an online "
"backup."))
self.cleanup()
raise
def cleanup(self):
super(DB2OnlineBackup, self).cleanup()
'''
After a backup operation, we can delete the archived logs
from the archived log directory but we do not want to delete
the directory itself. Since archive logging is enabled for
all databases, this directory is needed to store archive logs.
'''
service.remove_db2_dir(system.DB2_ARCHIVE_LOGS_DIR + "/*")
class DB2OfflineBackup(DB2Backup):
"""
Implementation of Offline Backup Strategy for DB2 using
circular logging which is the default.
"""
__strategy_name__ = 'db2offlinebackup'
def __init__(self, *args, **kwargs):
super(DB2OfflineBackup, self).__init__(*args, **kwargs)
def run_backup(self):
"""Create archival contents in dump dir"""
try:
service.run_command(system.QUIESCE_DB2)
self.execute_backup_cmd(system.OFFLINE_BACKUP_DB)
service.run_command(system.UNQUIESCE_DB2)
except exception.ProcessExecutionError:
LOG.exception(_("An exception occurred while doing an offline "
"backup."))
self.cleanup()
raise

View File

@ -15,23 +15,20 @@
from oslo_log import log as logging
from trove.common import cfg
from trove.common import exception
from trove.common.i18n import _
from trove.common import utils
from trove.guestagent.common import operating_system
from trove.guestagent.datastore.experimental.db2 import service
from trove.guestagent.datastore.experimental.db2 import system
from trove.guestagent.strategies.restore import base
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
DB2_DBPATH = CONF.db2.mount_point
DB2_BACKUP_DIR = DB2_DBPATH + "/backup"
class DB2Backup(base.RestoreRunner):
"""Implementation of Restore Strategy for DB2."""
__strategy_name__ = 'db2backup'
"""
Base class implementation of Restore strategy for DB2
"""
base_restore_cmd = 'sudo tar xPf -'
def __init__(self, *args, **kwargs):
@ -39,18 +36,56 @@ class DB2Backup(base.RestoreRunner):
self.appStatus = service.DB2AppStatus()
self.app = service.DB2App(self.appStatus)
self.admin = service.DB2Admin()
self.restore_location = DB2_BACKUP_DIR
self.restore_location = system.DB2_BACKUP_DIR
def post_restore(self):
def _post_restore(self, restore_command, rollforward_command=None):
"""
Restore from the directory that we untarred into
"""
out, err = utils.execute_with_timeout(system.GET_DB_NAMES,
shell=True)
out = ""
try:
out, err = utils.execute_with_timeout(system.GET_DB_NAMES,
shell=True)
except exception.ProcessExecutionError:
LOG.exception(_("Couldn't find any databases."))
dbNames = out.split()
for dbName in dbNames:
service.run_command(system.RESTORE_DB % {'dbname': dbName,
'dir': DB2_BACKUP_DIR})
service.run_command(restore_command % {'dbname': dbName})
if rollforward_command:
service.run_command(system.ROLL_FORWARD_DB % {'dbname':
dbName})
LOG.info(_("Cleaning out restore location post: %s."), DB2_BACKUP_DIR)
operating_system.remove(DB2_BACKUP_DIR, force=True, as_root=True)
LOG.info(_("Cleaning out restore location: %s."),
system.DB2_BACKUP_DIR)
service.remove_db2_dir(system.DB2_BACKUP_DIR)
class DB2OfflineBackup(DB2Backup):
"""
Implementation of Restore Strategy for full offline backups
using the default circular logging
"""
__strategy_name__ = 'db2offlinebackup'
def post_restore(self):
self._post_restore(system.RESTORE_OFFLINE_DB)
class DB2OnlineBackup(DB2Backup):
"""
Implementation of restore strategy for full online backups using
archived logging.
"""
__strategy_name__ = 'db2onlinebackup'
def post_restore(self):
"""
Once the databases are restored from a backup, we have to roll
forward the logs to the point of where the backup was taken. This
brings the database to a state were it can used, otherwise it
remains in a BACKUP PENDING state. After roll forwarding the logs,
we can delete the archived logs.
"""
self._post_restore(system.RESTORE_ONLINE_DB, system.ROLL_FORWARD_DB)
service.remove_db2_dir(system.DB2_ARCHIVE_LOGS_DIR + '/*')

View File

@ -252,7 +252,9 @@ class BackupAgentTest(trove_testtools.TestCase):
self.assertIsNotNone(cbbackup.manifest)
self.assertIn('gz.enc', cbbackup.manifest)
def test_backup_impl_DB2Backup(self):
@mock.patch.object(db2_impl.DB2Backup, 'list_dbnames',
return_value=['testdb1', 'testdb2'])
def test_backup_impl_DB2Backup(self, _):
netutils.get_my_ipv4 = Mock(return_value="1.1.1.1")
db2_backup = db2_impl.DB2Backup('db2backup', extra_opts='')
self.assertIsNotNone(db2_backup)

View File

@ -99,3 +99,27 @@ class TestUtils(trove_testtools.TestCase):
for index, datum in enumerate(data):
self.assertEqual(datum[1], utils.format_output(datum[0]),
"Error formatting line %d of data" % index)
def test_to_gb(self):
result = utils.to_gb(123456789)
self.assertEqual(0.11, result)
def test_to_gb_small(self):
result = utils.to_gb(2)
self.assertEqual(0.01, result)
def test_to_gb_zero(self):
result = utils.to_gb(0)
self.assertEqual(0.0, result)
def test_to_mb(self):
result = utils.to_mb(123456789)
self.assertEqual(117.74, result)
def test_to_mb_small(self):
result = utils.to_mb(2)
self.assertEqual(0.01, result)
def test_to_mb_zero(self):
result = utils.to_mb(0)
self.assertEqual(0.0, result)

View File

@ -27,6 +27,7 @@ from trove.guestagent.datastore.experimental.cassandra import (
from trove.guestagent.datastore.experimental.db2 import (
service as db2_service)
from trove.guestagent.strategies.backup import base as backupBase
from trove.guestagent.strategies.backup.experimental import db2_impl
from trove.guestagent.strategies.backup.experimental.postgresql_impl \
import PgBaseBackupUtil
from trove.guestagent.strategies.backup.mysql_impl import MySqlApp
@ -450,6 +451,7 @@ class GuestAgentBackupTest(trove_testtools.TestCase):
chown=DEFAULT, chmod=DEFAULT)
@patch.object(db2_service, 'run_command')
@patch.object(db2_service.DB2App, 'process_default_dbm_config')
@patch.object(db2_impl.DB2Backup, 'list_dbnames')
def test_backup_encrypted_db2backup_command(self, *mock, **kwargs):
backupBase.BackupRunner.is_encrypted = True
backupBase.BackupRunner.encrypt_key = CRYPTO_KEY
@ -466,6 +468,7 @@ class GuestAgentBackupTest(trove_testtools.TestCase):
chown=DEFAULT, chmod=DEFAULT)
@patch.object(db2_service, 'run_command')
@patch.object(db2_service.DB2App, 'process_default_dbm_config')
@patch.object(db2_impl.DB2Backup, 'list_dbnames')
def test_backup_not_encrypted_db2backup_command(self, *mock, **kwargs):
backupBase.BackupRunner.is_encrypted = False
backupBase.BackupRunner.encrypt_key = CRYPTO_KEY
@ -977,6 +980,9 @@ class DB2BackupTests(trove_testtools.TestCase):
super(DB2BackupTests, self).setUp()
self.exec_timeout_patch = patch.object(utils, 'execute_with_timeout')
self.exec_timeout_patch.start()
self.exec_list_database = patch.object(db2_impl.DB2Backup,
'list_dbnames')
self.exec_list_database.start()
self.backup_runner = utils.import_class(BACKUP_DB2_CLS)
self.backup_runner_patch = patch.multiple(
self.backup_runner, _run=DEFAULT,
@ -985,6 +991,7 @@ class DB2BackupTests(trove_testtools.TestCase):
def tearDown(self):
super(DB2BackupTests, self).tearDown()
self.backup_runner_patch.stop()
self.exec_list_database.stop()
self.exec_timeout_patch.stop()
def test_backup_success(self):

View File

@ -84,8 +84,6 @@ from trove.guestagent.datastore.service import BaseDbStatus
from trove.guestagent.db import models
from trove.guestagent import dbaas as dbaas_sr
from trove.guestagent.dbaas import get_filesystem_volume_stats
from trove.guestagent.dbaas import to_gb
from trove.guestagent.dbaas import to_mb
from trove.guestagent import pkg
from trove.guestagent.volume import VolumeDevice
from trove.instance.models import InstanceServiceStatus
@ -1678,30 +1676,6 @@ class InterrogatorTest(trove_testtools.TestCase):
def tearDown(self):
super(InterrogatorTest, self).tearDown()
def test_to_gb(self):
result = to_gb(123456789)
self.assertEqual(0.11, result)
def test_to_gb_small(self):
result = to_gb(2)
self.assertEqual(0.01, result)
def test_to_gb_zero(self):
result = to_gb(0)
self.assertEqual(0.0, result)
def test_to_mb(self):
result = to_mb(123456789)
self.assertEqual(117.74, result)
def test_to_mb_small(self):
result = to_mb(2)
self.assertEqual(0.01, result)
def test_to_mb_zero(self):
result = to_mb(0)
self.assertEqual(0.0, result)
def test_get_filesystem_volume_stats(self):
with patch.object(os, 'statvfs', return_value=MockStats):
result = get_filesystem_volume_stats('/some/path/')