freezer/freezer/utils.py

549 lines
21 KiB
Python

'''
Copyright 2014 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product includes cryptographic software written by Eric Young
(eay@cryptsoft.com). This product includes software written by Tim
Hudson (tjh@cryptsoft.com).
========================================================================
Freezer general utils functions
'''
import logging
import os
import time
import datetime
import re
import subprocess
def gen_manifest_meta(
backup_opt_dict, manifest_meta_dict, meta_data_backup_file):
''' This function is used to load backup metadata information on Swift.
this is used to keep information between consecutive backup
executions.
If the manifest_meta_dict is available, most probably this is not
the first backup run for the provided backup name and host.
In this case we remove all the conflictive keys -> values from
the dictionary.
'''
if manifest_meta_dict.get('x-object-meta-tar-prev-meta-obj-name'):
tar_meta_prev = \
manifest_meta_dict['x-object-meta-tar-prev-meta-obj-name']
tar_meta_to_upload = \
manifest_meta_dict['x-object-meta-tar-meta-obj-name'] = \
manifest_meta_dict['x-object-meta-tar-prev-meta-obj-name'] = \
meta_data_backup_file
else:
manifest_meta_dict['x-object-meta-tar-prev-meta-obj-name'] = \
meta_data_backup_file
manifest_meta_dict['x-object-meta-backup-name'] = \
backup_opt_dict.backup_name
manifest_meta_dict['x-object-meta-src-file-to-backup'] = \
backup_opt_dict.src_file
manifest_meta_dict['x-object-meta-abs-file-path'] = ''
# Set manifest meta if encrypt_pass_file is provided
# The file will contain a plain password that will be used
# to encrypt and decrypt tasks
manifest_meta_dict['x-object-meta-encrypt-data'] = 'Yes'
if backup_opt_dict.encrypt_pass_file is False:
manifest_meta_dict['x-object-meta-encrypt-data'] = ''
manifest_meta_dict['x-object-meta-always-backup-level'] = ''
if backup_opt_dict.always_backup_level:
manifest_meta_dict['x-object-meta-always-backup-level'] = \
backup_opt_dict.always_backup_level
# Set manifest meta if max_backup_level argument is provided
# Once the incremental backup arrive to max_backup_level, it will
# restart from level 0
manifest_meta_dict['x-object-meta-maximum-backup-level'] = ''
if backup_opt_dict.max_backup_level is not False:
manifest_meta_dict['x-object-meta-maximum-backup-level'] = \
str(backup_opt_dict.max_backup_level)
# At the end of the execution, checks the objects ages for the
# specified swift container. If there are object older then the
# specified days they'll be removed.
# Unit is int and every int and 5 == five days.
manifest_meta_dict['x-object-meta-remove-backup-older-than-days'] = ''
if backup_opt_dict.remove_older_than is not False:
manifest_meta_dict['x-object-meta-remove-backup-older-than-days']\
= '{0}'.format(backup_opt_dict.remove_older_than)
manifest_meta_dict['x-object-meta-hostname'] = backup_opt_dict.hostname
manifest_meta_dict['x-object-meta-segments-size-bytes'] = \
str(backup_opt_dict.max_seg_size)
manifest_meta_dict['x-object-meta-backup-created-timestamp'] = \
str(backup_opt_dict.time_stamp)
manifest_meta_dict['x-object-meta-providers-list'] = 'HP'
manifest_meta_dict['x-object-meta-tar-meta-obj-name'] = \
meta_data_backup_file
tar_meta_to_upload = tar_meta_prev = \
manifest_meta_dict['x-object-meta-tar-meta-obj-name'] = \
manifest_meta_dict['x-object-meta-tar-prev-meta-obj-name']
# Need to be processed from the last existing backup file found
# in Swift, matching with hostname and backup name
# the last existing file can be extracted from the timestamp
manifest_meta_dict['x-object-meta-container-segments'] = \
backup_opt_dict.container_segments
# Set the restart_always_backup value to n days. According
# to the following option, when the always_backup_level is set
# the backup will be reset to level 0 if the current backup
# times tamp is older then the days in x-object-meta-container-segments
manifest_meta_dict['x-object-meta-restart-always-backup'] = ''
if backup_opt_dict.restart_always_backup is not False:
manifest_meta_dict['x-object-meta-restart-always-backup'] = \
backup_opt_dict.restart_always_backup
return (
backup_opt_dict, manifest_meta_dict,
tar_meta_to_upload, tar_meta_prev)
def validate_all_args(required_list):
'''
Ensure ALL the elements of required_list are True. raise ValueError
Exception otherwise
'''
try:
for element in required_list:
if not element:
return False
except Exception as error:
err = "[*] Error: validate_all_args: {0} {1}".format(
required_list, error)
logging.exception(err)
raise Exception(err)
return True
def validate_any_args(required_list):
'''
Ensure ANY of the elements of required_list are True. raise Exception
Exception otherwise
'''
try:
for element in required_list:
if element:
return True
except Exception as error:
err = "[*] Error: validate_any_args: {0} {1}".format(
required_list, error)
logging.exception(err)
raise Exception(err)
return False
def sort_backup_list(backup_opt_dict):
'''
Sort the backups by timestamp. The provided list contains strings in the
format hostname_backupname_timestamp_level
'''
# Remove duplicates objects
sorted_backups_list = list(set(backup_opt_dict.remote_match_backup))
sorted_backups_list.sort(key=lambda x: x.split('_')[2], reverse=True)
return sorted_backups_list
def create_dir(directory):
'''
Creates a directory if it doesn't exists and write the execution
in the logs
'''
try:
if not os.path.isdir(os.path.expanduser(directory)):
logging.warning('[*] Directory {0} does not exists,\
creating...'.format(os.path.expanduser(directory)))
os.makedirs(os.path.expanduser(directory))
else:
logging.warning('[*] Directory {0} found!'.format(
os.path.expanduser(directory)))
except Exception as error:
err = '[*] Error while creating directory {0}: {1}\
'.format(os.path.expanduser(directory), error)
logging.exception(err)
raise Exception(err)
def get_match_backup(backup_opt_dict):
'''
Return a dictionary containing a list of remote matching backups from
backup_opt_dict.remote_obj_list.
Backup have to exactly match against backup name and hostname of the
node where freezer is executed. The matching objects are stored and
available in backup_opt_dict.remote_match_backup
'''
if not backup_opt_dict.backup_name or not backup_opt_dict.container \
or not backup_opt_dict.remote_obj_list:
err = "[*] Error: please provide a valid Swift container,\
backup name and the container contents"
logging.exception(err)
raise Exception(err)
backup_name = backup_opt_dict.backup_name.lower()
if backup_opt_dict.remote_obj_list:
hostname = backup_opt_dict.hostname
for container_object in backup_opt_dict.remote_obj_list:
object_name = container_object.get('name', None)
if object_name:
obj_name_match = re.search(r'{0}_({1})_\d+?_\d+?$'.format(
hostname, backup_name), object_name.lower(), re.I)
if obj_name_match:
backup_opt_dict.remote_match_backup.append(
object_name)
backup_opt_dict.remote_objects.append(container_object)
return backup_opt_dict
def get_newest_backup(backup_opt_dict):
'''
Return from backup_opt_dict.remote_match_backup, the newest backup
matching the provided backup name and hostname of the node where
freezer is executed. It correspond to the previous backup executed.
'''
if not backup_opt_dict.remote_match_backup:
return backup_opt_dict
backup_timestamp = 0
hostname = backup_opt_dict.hostname
# Sort remote backup list using timestamp in reverse order,
# that is from the newest to the oldest executed backup
sorted_backups_list = sort_backup_list(backup_opt_dict)
for remote_obj in sorted_backups_list:
obj_name_match = re.search(r'^{0}_({1})_(\d+)_\d+?$'.format(
hostname, backup_opt_dict.backup_name), remote_obj, re.I)
if not obj_name_match:
continue
remote_obj_timestamp = int(obj_name_match.group(2))
if remote_obj_timestamp > backup_timestamp:
backup_timestamp = remote_obj_timestamp
backup_opt_dict.remote_newest_backup = remote_obj
break
return backup_opt_dict
def get_rel_oldest_backup(backup_opt_dict):
'''
Return from swift, the relative oldest backup matching the provided
backup name and hostname of the node where freezer is executed.
The relative oldest backup correspond the oldest backup from the
last level 0 backup.
'''
if not backup_opt_dict.backup_name:
err = "[*] Error: please provide a valid backup name in \
backup_opt_dict.backup_name"
logging.exception(err)
raise Exception(err)
backup_opt_dict.remote_rel_oldest = u''
backup_name = backup_opt_dict.backup_name
hostname = backup_opt_dict.hostname
first_backup_name = False
first_backup_ts = 0
for container_object in backup_opt_dict.remote_obj_list:
object_name = container_object.get('name', None)
if not object_name:
continue
obj_name_match = re.search(r'{0}_({1})_(\d+)_(\d+?)$'.format(
hostname, backup_name), object_name, re.I)
if not obj_name_match:
continue
remote_obj_timestamp = int(obj_name_match.group(2))
remote_obj_level = int(obj_name_match.group(3))
if remote_obj_level == 0 and (remote_obj_timestamp > first_backup_ts):
first_backup_name = object_name
first_backup_ts = remote_obj_timestamp
backup_opt_dict.remote_rel_oldest = first_backup_name
return backup_opt_dict
def get_abs_oldest_backup(backup_opt_dict):
'''
Return from swift, the absolute oldest backup matching the provided
backup name and hostname of the node where freezer is executed.
The absolute oldest backup correspond the oldest available level 0 backup.
'''
if not backup_opt_dict.backup_name:
err = "[*] Error: please provide a valid backup name in \
backup_opt_dict.backup_name"
logging.exception(err)
raise Exception(err)
backup_opt_dict.remote_abs_oldest = u''
if len(backup_opt_dict.remote_match_backup) == 0:
return backup_opt_dict
backup_timestamp = 0
hostname = backup_opt_dict.hostname
for remote_obj in backup_opt_dict.remote_match_backup:
object_name = remote_obj.get('name', '')
obj_name_match = re.search(r'{0}_({1})_(\d+)_(\d+?)$'.format(
hostname, backup_opt_dict.backup_name), object_name.lower(), re.I)
if not obj_name_match:
continue
remote_obj_timestamp = int(obj_name_match.group(2))
if backup_timestamp == 0:
backup_timestamp = remote_obj_timestamp
if remote_obj_timestamp <= backup_timestamp:
backup_timestamp = remote_obj_timestamp
backup_opt_dict.remote_abs_oldest = object_name
return backup_opt_dict
def eval_restart_backup(backup_opt_dict):
'''
Restart backup level if the first backup execute with always_backup_level
is older then restart_always_backup
'''
if not backup_opt_dict.restart_always_backup:
logging.info('[*] No need to set Backup {0} to level 0.'.format(
backup_opt_dict.backup_name))
return False
logging.info('[*] Checking always backup level timestamp...')
# Compute the amount of seconds to be compared with
# the remote backup timestamp
max_time = int(float(backup_opt_dict.restart_always_backup) * 86400)
current_timestamp = backup_opt_dict.time_stamp
backup_name = backup_opt_dict.backup_name
hostname = backup_opt_dict.hostname
# Get relative oldest backup by calling get_rel_oldes_backup()
backup_opt_dict = get_rel_oldest_backup(backup_opt_dict)
if not backup_opt_dict.remote_rel_oldest:
logging.info('[*] Relative oldest backup for backup name {0} on \
host {1} not available. The backup level is NOT restarted'.format(
backup_name, hostname))
return False
obj_name_match = re.search(r'{0}_({1})_(\d+)_(\d+?)$'.format(
hostname, backup_name), backup_opt_dict.remote_rel_oldest, re.I)
if not obj_name_match:
err = ('[*] No backup match available for backup {0} '
'and host {1}'.format(backup_name, hostname))
logging.info(err)
return Exception(err)
first_backup_ts = int(obj_name_match.group(2))
if (current_timestamp - first_backup_ts) > max_time:
logging.info(
'[*] Backup {0} older then {1} days. Backup level set to 0'.format(
backup_name, backup_opt_dict.restart_always_backup))
return True
else:
logging.info('[*] No need to set level 0 for Backup {0}.'.format(
backup_name))
return False
def start_time():
'''
Compute start execution time, write it in the logs and return timestamp
'''
fmt = '%Y-%m-%d %H:%M:%S'
today_start = datetime.datetime.now()
time_stamp = int(time.mktime(today_start.timetuple()))
fmt_date_start = today_start.strftime(fmt)
logging.info('[*] Execution Started at: {0}'.format(fmt_date_start))
return time_stamp, today_start
def elapsed_time(today_start):
'''
Compute elapsed time from today_start and write basic stats
in the log file
'''
fmt = '%Y-%m-%d %H:%M:%S'
today_finish = datetime.datetime.now()
fmt_date_finish = today_finish.strftime(fmt)
time_elapsed = today_finish - today_start
# Logging end execution information
logging.info('[*] Execution Finished, at: {0}'.format(fmt_date_finish))
logging.info('[*] Time Elapsed: {0}'.format(time_elapsed))
def set_backup_level(backup_opt_dict, manifest_meta_dict):
'''
Set the backup level params in backup_opt_dict and the swift
manifest. This is a fundamental part of the incremental backup
'''
if manifest_meta_dict.get('x-object-meta-backup-name'):
backup_opt_dict.curr_backup_level = int(
manifest_meta_dict.get('x-object-meta-backup-current-level'))
max_backup_level = manifest_meta_dict.get(
'x-object-meta-maximum-backup-level')
always_backup_level = manifest_meta_dict.get(
'x-object-meta-always-backup-level')
restart_always_backup = manifest_meta_dict.get(
'x-object-meta-restart-always-backup')
if max_backup_level:
max_backup_level = int(max_backup_level)
if backup_opt_dict.curr_backup_level < max_backup_level:
backup_opt_dict.curr_backup_level += 1
manifest_meta_dict['x-object-meta-backup-current-level'] = \
str(backup_opt_dict.curr_backup_level)
else:
manifest_meta_dict['x-object-meta-backup-current-level'] = \
backup_opt_dict.curr_backup_level = '0'
elif always_backup_level:
always_backup_level = int(always_backup_level)
if backup_opt_dict.curr_backup_level < always_backup_level:
backup_opt_dict.curr_backup_level += 1
manifest_meta_dict['x-object-meta-backup-current-level'] = \
str(backup_opt_dict.curr_backup_level)
# If restart_always_backup is set, the backup_age will be computed
# and if the backup age in days is >= restart_always_backup, then
# backup-current-level will be set to 0
if restart_always_backup:
backup_opt_dict.restart_always_backup = restart_always_backup
if eval_restart_backup(backup_opt_dict):
backup_opt_dict.curr_backup_level = '0'
manifest_meta_dict['x-object-meta-backup-current-level'] \
= '0'
else:
backup_opt_dict.curr_backup_level = \
manifest_meta_dict['x-object-meta-backup-current-level'] = '0'
return backup_opt_dict, manifest_meta_dict
def get_vol_fs_type(backup_opt_dict):
'''
The argument need to be a full path lvm name i.e. /dev/vg0/var
or a disk partition like /dev/sda1. The returnet value is the
file system type
'''
vol_name = backup_opt_dict.lvm_srcvol
if os.path.exists(vol_name) is False:
err = '[*] Provided volume name not found: {0} '.format(vol_name)
logging.exception(err)
raise Exception(err)
file_cmd = '{0} -0 -bLs --no-pad --no-buffer --preserve-date \
{1}'.format(backup_opt_dict.file_path, vol_name)
file_process = subprocess.Popen(
file_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True,
executable=backup_opt_dict.bash_path)
(file_out, file_err) = file_process.communicate()
file_match = re.search(r'(\S+?) filesystem data', file_out, re.I)
if file_match is None:
err = '[*] File system type not guessable: {0}'.format(file_err)
logging.exception(err)
raise (err)
else:
filesys_type = file_match.group(1)
logging.info('[*] File system {0} found for volume {1}'.format(
filesys_type, vol_name))
return filesys_type.lower().strip()
def check_backup_existance(backup_opt_dict):
'''
Check if any backup is already available on Swift.
The verification is done by backup_name, which needs to be unique
in Swift. This function will return an empty dict if no backup are
found or the Manifest metadata if the backup_name is available
'''
if not backup_opt_dict.backup_name or not backup_opt_dict.container or \
not backup_opt_dict.remote_obj_list:
logging.warning(
('[*] A valid Swift container, or backup name or container '
'content not available. Level 0 backup is being executed '))
return dict()
logging.info("[*] Retreiving backup name {0} on container \
{1}".format(
backup_opt_dict.backup_name.lower(), backup_opt_dict.container))
backup_opt_dict = get_match_backup(backup_opt_dict)
backup_opt_dict = get_newest_backup(backup_opt_dict)
if backup_opt_dict.remote_newest_backup:
sw_connector = backup_opt_dict.sw_connector
logging.info("[*] Backup {0} found!".format(
backup_opt_dict.backup_name))
backup_match = sw_connector.head_object(
backup_opt_dict.container, backup_opt_dict.remote_newest_backup)
return backup_match
else:
logging.warning("[*] No such backup {0} available... Executing \
level 0 backup".format(backup_opt_dict.backup_name))
return dict()
def add_host_name_ts_level(backup_opt_dict, time_stamp=int(time.time())):
'''
Create the object name as:
hostname_backupname_timestamp_backup_level
'''
if backup_opt_dict.backup_name is False:
err = ('[*] Error: Please specify the backup name with '
'--backup-name option')
logging.exception(err)
raise Exception(err)
backup_name = u'{0}_{1}_{2}_{3}'.format(
backup_opt_dict.hostname,
backup_opt_dict.backup_name,
time_stamp, backup_opt_dict.curr_backup_level)
return backup_name
def get_mount_from_path(path):
"""
Take a file system path as argument and return the mount point
for that file system path.
:param path: file system path
:returns: mount point of path
"""
if not os.path.exists(path):
logging.critical('[*] Error: provided path does not exist')
raise IOError
mount_point_path = os.path.abspath(path)
while not os.path.ismount(mount_point_path):
mount_point_path = os.path.dirname(mount_point_path)
return mount_point_path