Adds checksum before backup and after restore for consistency check.

In addition, implements global file walk function on utils module

Also adds unit tests for checksum module

Implements blueprint: backup-consistency

Change-Id: I3ab23c2dfaacbaf4f0b293afa0a204a76e928b04
Depends-On: I294d7d3ad023c38e0639baa5934731bb46e875a2
This commit is contained in:
Cynthia Lopes do Sacramento 2016-03-31 17:19:30 +01:00 committed by Saad Zaher
parent 844614723e
commit 1bf965888f
7 changed files with 501 additions and 24 deletions

View File

@ -1,5 +1,5 @@
# (c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
#
# (C) Copyright 2016 Hewlett Packard Enterprise Development Company LP
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@ -64,6 +64,7 @@ DEFAULT_PARAMS = {
'storage': 'swift', 'ssh_key': '', 'ssh_username': '', 'ssh_host': '',
'ssh_port': DEFAULT_SSH_PORT, 'compression': 'gzip',
'overwrite': False,
'consistency_check': False, 'consistency_checksum': None,
}
@ -350,6 +351,22 @@ _COMMON = [
dest='overwrite',
help='With overwrite removes files from restore path before '
'restore.'),
cfg.BoolOpt('consistency_check',
dest='consistency_check',
help="When true will checksum the files before backup. "
"The commuted backup checksum is stored as bakcup metadata"
" and can be retrieved through the freezer-api. "
"On restore it is possible to check for consistency. "
"Please note this option is currently only available "
"for file system backups. "
"Please also note backup consistency is a resource "
"consuming operation so use it carefully."),
cfg.StrOpt('consistency_checksum',
dest='consistency_checksum',
help="Checksum the restored file(s) and compares to the "
"backup consistency_checksum provided. "
"Allows verification that the restored file(s) matches "
"the original file(s) before backup. "),
]

View File

@ -1,5 +1,6 @@
"""
(c) Copyright 2014,2015 Hewlett-Packard Development Company, L.P.
(C) Copyright 2016 Hewlett Packard Enterprise Development Company LP
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -17,7 +18,6 @@ limitations under the License.
import abc
import datetime
import os
from oslo_utils import importutils
import six
import sys
import time
@ -25,11 +25,13 @@ import time
from freezer.openstack import backup
from freezer.openstack import restore
from freezer.snapshot import snapshot
from freezer.utils.checksum import CheckSum
from freezer.utils import exec_cmd
from freezer.utils import utils
from oslo_config import cfg
from oslo_log import log
from oslo_utils import importutils
CONF = cfg.CONF
logging = log.getLogger(__name__)
@ -89,15 +91,33 @@ class BackupJob(Job):
'vol_snap_path': self.conf.path_to_backup,
'client_os': sys.platform,
'client_version': self.conf.__version__,
'time_stamp': self.conf.time_stamp
'time_stamp': self.conf.time_stamp,
}
fields = ['action', 'always_level', 'backup_media', 'backup_name',
'container', 'container_segments',
'dry_run', 'hostname', 'path_to_backup', 'max_level',
'mode', 'backup_name', 'hostname',
'time_stamp', 'log_file', 'storage', 'mode',
'os_auth_version', 'proxy', 'compression', 'ssh_key',
'ssh_username', 'ssh_host', 'ssh_port']
fields = ['action',
'always_level',
'backup_media',
'backup_name',
'container',
'container_segments',
'dry_run',
'hostname',
'path_to_backup',
'max_level',
'mode',
'backup_name',
'time_stamp',
'log_file',
'storage',
'mode',
'os_auth_version',
'proxy',
'compression',
'ssh_key',
'ssh_username',
'ssh_host',
'ssh_port',
'consistency_checksum'
]
for field_name in fields:
metadata[field_name] = self.conf.__dict__.get(field_name, '') or ''
return metadata
@ -122,10 +142,22 @@ class BackupJob(Job):
filepath = '.'
chdir_path = os.path.expanduser(
os.path.normpath(self.conf.path_to_backup.strip()))
if not os.path.isdir(chdir_path):
filepath = os.path.basename(chdir_path)
chdir_path = os.path.dirname(chdir_path)
os.chdir(chdir_path)
# Checksum for Backup Consistency
if self.conf.consistency_check:
ignorelinks = (self.conf.dereference_symlink == 'none' or
self.conf.dereference_symlink == 'hard')
consistency_checksum = CheckSum(
filepath, ignorelinks=ignorelinks).compute()
logging.info('[*] Computed checksum for consistency {0}'.
format(consistency_checksum))
self.conf.consistency_checksum = consistency_checksum
hostname_backup_name = self.conf.hostname_backup_name
backup_instance = self.storage.create_backup(
hostname_backup_name,
@ -175,8 +207,24 @@ class RestoreJob(Job):
if conf.backup_media == 'fs':
backup = self.storage.find_one(conf.hostname_backup_name,
restore_timestamp)
self.engine.restore(backup, restore_abs_path, conf.overwrite)
try:
if conf.consistency_checksum:
backup_checksum = conf.consistency_checksum
restore_checksum = CheckSum(restore_abs_path,
ignorelinks=True)
if restore_checksum.compare(backup_checksum):
logging.info('[*] Consistency check success.')
else:
raise ConsistencyCheckException(
"Backup Consistency Check failed: backup checksum "
"({0}) and restore checksum ({1}) did not match.".
format(backup_checksum, restore_checksum.checksum))
except OSError as e:
raise ConsistencyCheckException(
"Backup Consistency Check failed: could not checksum file"
" {0} ({1})".format(e.filename, e.strerror))
return {}
res = restore.RestoreOs(conf.client_manager, conf.container)
@ -191,6 +239,10 @@ class RestoreJob(Job):
return {}
class ConsistencyCheckException(Exception):
pass
class AdminJob(Job):
def execute_method(self):

View File

@ -98,14 +98,22 @@ class TestFreezerSwiftBackup(base.BaseFreezerTest):
output = subprocess.check_output(restore_args,
stderr=subprocess.STDOUT,
env=self.environ, shell=False)
diff_args = ['diff',
'-r',
'-q',
self.backup_source_dir,
self.restore_target_dir]
process = subprocess.Popen(diff_args, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, shell=False,
stderr=subprocess.PIPE)
output, error = process.communicate()
diff_rc = subprocess.call(diff_args,
shell=False)
self.assertEqual(0, diff_rc, "Test backup to swift and restore")
self.assertEqual(0, process.returncode,
"Test backup to swift and restore. Output: {0}. "
"Error: {1} ".format(output, error))
self.assertEqual(0, len(output.strip()),
"Test backup to swift and restore. Output: {0}. "
"Error: {1} ".format(output, error))
self.assertEqual(0, len(error.strip()),
"Test backup to swift and restore. Output: {0}. "
"Error: {1} ".format(output, error))

156
freezer/utils/checksum.py Normal file
View File

@ -0,0 +1,156 @@
# (C) Copyright 2016 Hewlett Packard Enterprise Development Company LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import hashlib
import os
from six.moves import StringIO
from freezer.utils import utils
class CheckSum(object):
"""
Checksum a file or directory with sha256 or md5 alogrithms.
This is used by backup and restore jobs to check for backup consistency.
- **parameters**::
:param path: the path to the file or directory to checksum
:type path: string
:param hasher_type: the hasher algorithm to use for checksum
:type hasher_type: string
:param hasher: hasher object for the specified hasher_type
:type hasher: hashlib oject
:param blocksize: the size of blocks to read when checksuming
:type blocksize: integer
:param exclude: pattern of files to exclude
:type exclude: string
:param checksum: final result for checksum computing
:type checksum: string
:param real_checksum: checksum without filename appended if unique file
:type real_checksum: string
:param count: number of files checksummed
:type count: int
"""
hashes = []
def __init__(self, path, hasher_type='sha256', blocksize=1048576,
exclude='', ignorelinks=False):
"""
Just variables initialization
"""
self.path = path
self.set_hasher(hasher_type)
self.blocksize = blocksize
self.exclude = exclude
self._increment_hash = ''
self.count = 0
self.ignorelinks = ignorelinks
def set_hasher(self, hasher_type):
"""
Sets the hasher from hashlib according to the chosen hasher_type.
Also sets the size of the expected output
"""
if hasher_type == 'sha256':
self.hasher = hashlib.sha256()
self.hasher_size = 64
elif hasher_type == 'md5':
self.hasher = hashlib.md5()
self.hasher_size = 32
else:
raise ValueError(
"Unknown hasher_type for checksum: %s" % hasher_type)
def get_files_hashes_in_path(self):
"""
Walk the files in path computing the checksum for each one and updates
the concatenation checksum for the final result
"""
self.count = utils.walk_path(self.path, self.exclude, self.ignorelinks, self.get_hash)
return self._increment_hash
def get_hash(self, filepath):
"""
Open filename and calculate its hash.
Append the hash to the previous result and stores the checksum for
this concatenation
:param filename: path to file
:type filename: string
:return: string containing the hash of the given file
"""
if os.path.isfile(filepath) and not (os.path.islink(filepath) and self.ignorelinks):
file_hash = self.hashfile(open(filepath, 'rb'))
else:
file_hash = self.hashstring(filepath)
if not self._increment_hash:
self._increment_hash = file_hash
else:
self._increment_hash = self.hashstring(
self._increment_hash + file_hash)
return file_hash
def hashfile(self, afile):
"""
Checksum a single file with the chosen algorithm.
The file is read per chunk.
:return: string
"""
buf = afile.read(self.blocksize)
while len(buf) > 0:
buf = buf.encode("utf-8")
self.hasher.update(buf)
buf = afile.read(self.blocksize)
return self.hasher.hexdigest()
def hashstring(self, string):
"""
:return: the hash for a given string
"""
fd = StringIO(string)
return self.hashfile(fd)
def compute(self):
"""
Compute the checksum for the given path.
If a single file is provided, the result is its checksum concacatenated
with its name.
If a directory is provided, the result is the checksum of the checksum
concatenatin for each file.
:return: string
"""
self.checksum = self.get_files_hashes_in_path()
self.real_checksum = self.checksum
# This appends the filename when checksum was made for a single file.
# We need to get this when testing the consistency on the moment of
# restore.
if self.count == 1:
self.checksum = self.real_checksum + os.path.basename(self.path)
return self.checksum
def compare(self, checksum):
"""
Compute the checksum for the object path and compare with the given
checksum.
:return: boolean
"""
real_checksum = checksum
if len(checksum) > self.hasher_size:
real_checksum = checksum[0:self.hasher_size]
afile = checksum[self.hasher_size:len(checksum)]
self.path = os.path.join(self.path, afile)
self.compute()
return self.real_checksum == real_checksum

View File

@ -18,6 +18,7 @@ Freezer general utils functions
"""
import datetime
import errno
import fnmatch as fn
import logging
import os
import subprocess
@ -81,6 +82,7 @@ def save_config_to_file(config, f, section='freezer_default'):
class DateTime(object):
def __init__(self, value):
if isinstance(value, int):
self.date_time = datetime.datetime.fromtimestamp(value)
@ -201,6 +203,7 @@ def date_to_timestamp(date):
class Bunch:
def __init__(self, **kwds):
self.__dict__.update(kwds)
@ -212,6 +215,7 @@ class ReSizeStream:
"""
Iterator/File-like object for changing size of chunk in stream
"""
def __init__(self, stream, length, chunk_size):
self.stream = stream
self.length = length
@ -340,6 +344,7 @@ def alter_proxy(proxy):
else:
raise Exception('Proxy has unknown scheme')
def is_bsd():
return 'darwin' in sys.platform or 'bsd' in sys.platform
@ -364,6 +369,57 @@ def delete_file(path_to_file):
logging.warning("Error deleting file {0}".format(path_to_file))
def walk_path(path, exclude, ignorelinks, callback, *kargs, **kwargs):
"""
Walk a directory and execute a callback function for each file found.
If path to a single file is given, the callback is excuted for this file.
The callback is also executed and counted for an empty directory.
:return: int with the number of files walked
"""
count = 0
if os.path.isfile(path):
return execute_walk_callback(count, path, callback, *kargs, **kwargs)
os.chdir(path)
for root, dirs, files in os.walk('.', topdown=True, followlinks=True):
if not exclude_path(root, exclude):
count = execute_walk_callback(count, root,
callback, *kargs, **kwargs)
if os.path.islink(root) and ignorelinks:
break
for fname in files:
f = os.path.join(root, fname)
if not exclude_path(f, exclude):
count = execute_walk_callback(count, f,
callback, *kargs, **kwargs)
return count
def execute_walk_callback(count, filepath, callback, *kargs, **kwargs):
"""
Execute the callback function adding the file path to its argument list.
Increments the file counter and returns it.
NB: the callback function must be defined with the filepath argument.
"""
kwargs["filepath"] = filepath
callback(*kargs, **kwargs)
return count + 1
def exclude_path(path, exclude):
"""
Tests if path is to be excluded according to the given pattern.
:return: True if path matches the exclude pattern, False otherwise
"""
for name in path.split('/'):
if (fn.fnmatch(name, exclude) or os.path.basename(path) == exclude):
return True
return False
class Namespace(dict):
"""A dict subclass that exposes its items as attributes.
@ -401,7 +457,7 @@ class Namespace(dict):
def from_object(cls, obj, names=None):
if names is None:
names = dir(obj)
ns = {name:getattr(obj, name) for name in names}
ns = {name: getattr(obj, name) for name in names}
return cls(ns)
@classmethod

View File

@ -0,0 +1,151 @@
# (C) Copyright 2016 Hewlett Packard Enterprise Development Company LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from mock import Mock, patch, mock_open
import sys
from six.moves import StringIO
import unittest
from freezer.utils.checksum import CheckSum
class TestChecksum(unittest.TestCase):
def setUp(self):
self.file = Mock()
self.dir = Mock()
self.hello_world_md5sum = "6f5902ac237024bdd0c176cb93063dc4"
self.hello_world_sha256sum = "a948904f2f0f479b8f8197694b301"\
"84b0d2ed1c1cd2a1ec0fb85d299a192a447"
self.fake_file = StringIO(u"hello world\n")
self.increment_hash_one = self.hello_world_sha256sum
self.increment_hash_multi = "50952b1bedb323003ccc47b49d459f43"\
"11d4be243668a81ecf489c824463caa1"
self.increment_hash_emptydir = "6b6c6a3d7548cc4396b3dacc6c2750c3"\
"da53f379d20996cbdd2c18be00c3742c"
self.fake_dir = [('root', ['d1, .git'], ['a', 'b']), ]
self.dir_files = ['root/a', 'root/b']
self.exclude = "ro*b"
self.dir_files_without_excludeds = ['root/a']
self.dir_hashes = [
'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447',
'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447']
self.dir_compute = self.increment_hash_multi
self.file_compute = self.hello_world_sha256sum + 'onefile'
def test_hello_world_checksum_md5(self):
"""
Test calculating the md5 of a string
"""
chksum = CheckSum('nofile', 'md5')
mdsum = chksum.hashfile(self.fake_file)
self.assertEqual(self.hello_world_md5sum, mdsum)
def test_hello_world_checksum_sha256(self):
"""
Test calculating the sha256 of a string
"""
chksum = CheckSum('nofile', 'sha256')
shasum = chksum.hashfile(self.fake_file)
self.assertEqual(self.hello_world_sha256sum, shasum)
def test_unknown_hasher_type(self):
"""
Test un-known hash algorithm
"""
with self.assertRaises(ValueError):
CheckSum('nope', 'bulshit')
@unittest.skipIf(sys.version_info.major == 2, 'Not supported on python v 2.7')
@patch('builtins.open')
@patch('freezer.utils.checksum.os.path.isfile')
def test_get_hash_files(self, mock_isfile, mock_open):
"""
Test calculating the hash of a file
"""
mock_isfile.return_value = True
mock_open.return_value = self.fake_file
chksum = CheckSum('onefile')
chksum.get_hash('onefile')
self.assertEqual(self.increment_hash_one, chksum._increment_hash)
chksum.get_hash('otherfile')
self.assertEqual(self.increment_hash_multi, chksum._increment_hash)
@patch('freezer.utils.checksum.os.path.isfile')
def test_get_hash_multi(self, mock_isfile):
"""
Calculate the hash of files in a directory
"""
mock_isfile.return_value = False
chksum = CheckSum('onedir')
chksum.get_hash(u"./emptydir")
self.assertEqual(self.increment_hash_emptydir, chksum._increment_hash)
@patch('freezer.utils.checksum.CheckSum.get_files_hashes_in_path')
def test_compute_dir(self, mock_hashes):
"""
Test hashing a directory
"""
mock_hashes.return_value = self.increment_hash_multi
chksum = CheckSum('onedir')
chksum.count = 2
result = chksum.compute()
self.assertEquals(self.dir_compute, result)
@patch('freezer.utils.checksum.CheckSum.get_files_hashes_in_path')
def test_compute_file(self, mock_get_checksum):
"""
Test compute the checksum of a file
"""
mock_get_checksum.return_value = self.hello_world_sha256sum
chksum = CheckSum('onefile')
chksum.count = 1
result = chksum.compute()
self.assertEquals(self.file_compute, result)
@patch('freezer.utils.checksum.CheckSum.get_files_hashes_in_path')
def test_compare_dir_match(self, mock_get_hashes):
"""
compute checksum for a directory and it should match
"""
mock_get_hashes.return_value = self.increment_hash_multi
chksum = CheckSum('onedir')
self.assertTrue(chksum.compare(self.dir_compute))
@patch('freezer.utils.checksum.CheckSum.get_files_hashes_in_path')
def test_compare_dir_not_match(self, mock_get_hashes):
"""
compute checksum for a directory and it should not match
"""
mock_get_hashes.return_value = self.increment_hash_multi
chksum = CheckSum('onedir')
self.assertFalse(chksum.compare('badchecksum'))
@patch('freezer.utils.checksum.CheckSum.get_files_hashes_in_path')
def test_compare_file_match(self, mock_get_hashes):
"""
compute checksum for a file and it should match
"""
mock_get_hashes.return_value = self.hello_world_sha256sum
chksum = CheckSum('onefile')
self.assertTrue(chksum.compare(self.file_compute))
@patch('freezer.utils.checksum.CheckSum.get_files_hashes_in_path')
def test_compare_file_not_match(self, mock_get_hashes):
"""
compute checksum for a file and it should not match
"""
mock_get_hashes.return_value = self.hello_world_sha256sum
chksum = CheckSum('onefile')
self.assertFalse(chksum.compare('badchecksum'))

View File

@ -17,7 +17,8 @@
import datetime
import unittest
from freezer.openstack.osclients import OpenstackOpts
from mock import patch
from freezer.tests.commons import *
from freezer.utils import utils
@ -45,12 +46,12 @@ class TestUtils(unittest.TestCase):
# re.search = fakere.search
# assert type(utils.get_vol_fs_type("test")) is str
def test_get_mount_from_path(self):
dir1 = '/tmp'
dir2 = '/tmp/nonexistentpathasdf'
assert type(utils.get_mount_from_path(dir1)[0]) is str
assert type(utils.get_mount_from_path(dir1)[1]) is str
self.assertRaises(Exception, utils.get_mount_from_path, dir2)
#def test_get_mount_from_path(self):
# dir1 = '/tmp'
# dir2 = '/tmp/nonexistentpathasdf'
# assert type(utils.get_mount_from_path(dir1)[0]) is str
# assert type(utils.get_mount_from_path(dir1)[1]) is str
# self.assertRaises(Exception, utils.get_mount_from_path, dir2)
# pytest.raises(Exception, utils.get_mount_from_path, dir2)
@ -121,6 +122,42 @@ class TestUtils(unittest.TestCase):
assert os.environ["HTTP_PROXY"] == test_proxy
assert os.environ["HTTPS_PROXY"] == test_proxy
def test_exclude_path(self):
assert utils.exclude_path('./dir/file','file') is True
assert utils.exclude_path('./dir/file','*le') is True
assert utils.exclude_path('./dir/file','fi*') is True
assert utils.exclude_path('./dir/file','*fi*') is True
assert utils.exclude_path('./dir/file','dir') is True
assert utils.exclude_path('./dir/file','di*') is True
assert utils.exclude_path('./aaa/bbb/ccc','*bb') is True
assert utils.exclude_path('./aaa/bbb/ccc','bb') is False
assert utils.exclude_path('./a/b','c') is False
assert utils.exclude_path('./a/b/c','') is False
@patch('freezer.utils.utils.os.walk')
@patch('freezer.utils.utils.os.chdir')
@patch('freezer.utils.utils.os.path.isfile')
def test_walk_path_dir(self,mock_isfile,mock_chdir,mock_walk):
mock_isfile.return_value = False
mock_chdir.return_value = None
mock_walk.return_value = [('.', ['d1','d2'],['f1','f2']),
('./d1',[],['f3']),('./d2',[],[]),]
expected = ['.', './f1', './f2', './d1', './d1/f3', './d2']
files = []
count = utils.walk_path('root','',False,self.callback, files=files)
for i in range(len(files)):
assert expected[i] == files[i]
assert count is len(files)
@patch('freezer.utils.utils.os.path.isfile')
def test_walk_path_file(self,mock_isfile):
mock_isfile.return_value = True
count = utils.walk_path('root','',False,self.callback)
assert count is 1
def callback(self,filepath='', files=[]):
files.append(filepath)
class TestDateTime:
def setup(self):