freezer scheduler

The freezer scheduler is to be executed
as daemon process on the client machines

It has the following responsibilities:

  * when using the api:
    - register -if necessary- as a client in the api
    - download the list of jobs from the api
    - schedule the jobs for execution
    - launch the freezer client at the scheduled time
    - collect metadata and exit codes and upload them to the api
    - periodically poll the api for new/updated jobs
    - if a job is part of a session (a coordinated group of jobs)
      it updates the session status when job starts/stops

  * when not using the api
    - load jobs configurations from files
    - schedule the jobs for execution
    - launch the freezer client at the scheduled time

The freezer scheduler can also be used to manage jobs
and sessions using the following positional parameters:

  job-list
  job-get
  job-create
  job-delete
  job-start
  job-stop
  session-list
  session-get
  session-create
  session-delete
  session-list-job
  session-add-job
  session-remove-job

or to register the client in the api using the positional parameter:

  register

Implements blueprint: freezer-scheduler-start

Change-Id: I06ae202a0f464f7240c137744a5b54d1177cabd9
This commit is contained in:
Fabrizio Vanni 2015-06-08 12:05:08 +01:00 committed by Fausto Marzi
parent e4232f242d
commit d4b9399e9b
42 changed files with 5011 additions and 544 deletions

View File

@ -0,0 +1,80 @@
"""
Copyright 2015 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product includes cryptographic software written by Eric Young
(eay@cryptsoft.com). This product includes software written by Tim
Hudson (tjh@cryptsoft.com).
========================================================================
"""
import json
import requests
import exceptions
class ActionManager(object):
def __init__(self, client):
self.client = client
self.endpoint = self.client.endpoint + '/v1/actions/'
@property
def headers(self):
return {'X-Auth-Token': self.client.auth_token}
def create(self, doc, action_id=''):
action_id = action_id or doc.get('action_id', '')
endpoint = self.endpoint + action_id
r = requests.post(endpoint,
data=json.dumps(doc),
headers=self.headers)
if r.status_code != 201:
raise exceptions.ApiClientException(r)
action_id = r.json()['action_id']
return action_id
def delete(self, action_id):
endpoint = self.endpoint + action_id
r = requests.delete(endpoint, headers=self.headers)
if r.status_code != 204:
raise exceptions.ApiClientException(r)
def list(self, limit=10, offset=0, search=None):
data = json.dumps(search) if search else None
query = {'limit': int(limit), 'offset': int(offset)}
r = requests.get(self.endpoint, headers=self.headers,
params=query, data=data)
if r.status_code != 200:
raise exceptions.ApiClientException(r)
return r.json()['actions']
def get(self, action_id):
endpoint = self.endpoint + action_id
r = requests.get(endpoint, headers=self.headers)
if r.status_code == 200:
return r.json()
if r.status_code == 404:
return None
raise exceptions.ApiClientException(r)
def update(self, action_id, update_doc):
endpoint = self.endpoint + action_id
r = requests.patch(endpoint,
headers=self.headers,
data=json.dumps(update_doc))
if r.status_code != 200:
raise exceptions.ApiClientException(r)
return r.json()['version']

View File

@ -26,6 +26,8 @@ from openstackclient.identity import client as os_client
from backups import BackupsManager
from registration import RegistrationManager
from jobs import JobManager
from actions import ActionManager
from sessions import SessionManager
import exceptions
@ -64,6 +66,8 @@ class Client(object):
self.backups = BackupsManager(self)
self.registration = RegistrationManager(self)
self.jobs = JobManager(self)
self.actions = ActionManager(self)
self.sessions = SessionManager(self)
@cached_property
def endpoint(self):

View File

@ -67,4 +67,8 @@ class ApiClientException(Exception):
message = self.get_message_from_api_response(r) or \
self.get_message_from_response(r) or \
str(r)
try:
self.status_code = r.status_code
except:
self.status_code = None
super(ApiClientException, self).__init__(message)

View File

@ -0,0 +1,162 @@
"""
Copyright 2015 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product includes cryptographic software written by Eric Young
(eay@cryptsoft.com). This product includes software written by Tim
Hudson (tjh@cryptsoft.com).
========================================================================
"""
import json
import requests
import exceptions
class SessionManager(object):
def __init__(self, client):
self.client = client
self.endpoint = self.client.endpoint + '/v1/sessions/'
@property
def headers(self):
return {'X-Auth-Token': self.client.auth_token}
def create(self, doc, session_id=''):
session_id = session_id or doc.get('session_id', '')
endpoint = self.endpoint + session_id
r = requests.post(endpoint,
data=json.dumps(doc),
headers=self.headers)
if r.status_code != 201:
raise exceptions.ApiClientException(r)
session_id = r.json()['session_id']
return session_id
def delete(self, session_id):
endpoint = self.endpoint + session_id
r = requests.delete(endpoint, headers=self.headers)
if r.status_code != 204:
raise exceptions.ApiClientException(r)
def list_all(self, limit=10, offset=0, search=None):
data = json.dumps(search) if search else None
query = {'limit': int(limit), 'offset': int(offset)}
r = requests.get(self.endpoint, headers=self.headers,
params=query, data=data)
if r.status_code != 200:
raise exceptions.ApiClientException(r)
return r.json()['sessions']
def list(self, limit=10, offset=0, search={}):
new_search = search.copy()
new_search['match'] = search.get('match', [])
return self.list_all(limit, offset, new_search)
def get(self, session_id):
endpoint = self.endpoint + session_id
r = requests.get(endpoint, headers=self.headers)
if r.status_code == 200:
return r.json()
if r.status_code == 404:
return None
raise exceptions.ApiClientException(r)
def update(self, session_id, update_doc):
endpoint = self.endpoint + session_id
r = requests.patch(endpoint,
headers=self.headers,
data=json.dumps(update_doc))
if r.status_code != 200:
raise exceptions.ApiClientException(r)
return r.json()['version']
def add_job(self, session_id, job_id):
# endpoint /v1/sessions/{sessions_id}/jobs/{job_id}
endpoint = '{0}{1}/jobs/{2}'.format(self.endpoint, session_id, job_id)
r = requests.put(endpoint,
headers=self.headers)
if r.status_code != 204:
raise exceptions.ApiClientException(r)
return
def remove_job(self, session_id, job_id):
# endpoint /v1/sessions/{sessions_id}/jobs/{job_id}
endpoint = '{0}{1}/jobs/{2}'.format(self.endpoint, session_id, job_id)
retry = 5
r = ''
while retry:
r = requests.delete(endpoint,
headers=self.headers)
if r.status_code == 204:
return
retry -= 1
raise exceptions.ApiClientException(r)
def start_session(self, session_id, job_id, session_tag):
"""
Informs the api that the client is starting the session
identified by the session_id and request the session_tag
to be incremented up to the requested value.
The returned session_id could be:
* current_tag + 1 if the session has started
* > current_tag + 1 if the action had already been started
by some other node and this node was out of sync
:param session_id:
:param job_id:
:param session_tag: the new session_id
:return: the response obj:
{ result: string 'running' or 'error',
'session_tag': the new session_tag )
"""
# endpoint /v1/sessions/{sessions_id}/action
endpoint = '{0}{1}/action'.format(self.endpoint, session_id)
doc = {"start": {
"job_id": job_id,
"current_tag": session_tag
}}
r = requests.post(endpoint,
headers=self.headers,
data=json.dumps(doc))
if r.status_code != 202:
raise exceptions.ApiClientException(r)
return r.json()
def end_session(self, session_id, job_id, session_tag, result):
"""
Informs the freezer service that the job has ended.
Privides information about the job's result and the session tag
:param session_id:
:param job_id:
:param session_tag:
:param result:
:return:
"""
# endpoint /v1/sessions/{sessions_id}/action
endpoint = '{0}{1}/action'.format(self.endpoint, session_id)
doc = {"end": {
"job_id": job_id,
"current_tag": session_tag,
"result": result
}}
r = requests.post(endpoint,
headers=self.headers,
data=json.dumps(doc))
if r.status_code != 202:
raise exceptions.ApiClientException(r)
return r.json()

View File

@ -64,7 +64,7 @@ DEFAULT_PARAMS = {
'restore_abs_path': False, 'log_file': None,
'upload': True, 'mode': 'fs', 'action': 'backup',
'vssadmin': True, 'shadow': '', 'shadow_path': '',
'windows_volume': '', 'command': None
'windows_volume': '', 'command': None, 'metadata_out': False
}
@ -268,6 +268,12 @@ def backup_arguments(args_dict={}):
password = <mysqlpass>
port = <db-port>''',
dest='mysql_conf', default=False)
arg_parser.add_argument(
'--metadata-out', action='store',
help=('Set the filename to which write the metadata regarding '
'the backup metrics. Use "-" to output to standard output.'),
dest='metadata_out', default=False)
if is_windows():
arg_parser.add_argument(
'--log-file', action='store',

View File

@ -19,6 +19,8 @@ Hudson (tjh@cryptsoft.com).
========================================================================
"""
import sys
from freezer import swift
from freezer import utils
from freezer import backup
@ -35,6 +37,9 @@ class Job:
def execute(self):
logging.info('[*] Action not implemented')
def get_metadata(self):
return None
@staticmethod
def executemethod(func):
def wrapper(self):
@ -115,6 +120,25 @@ class BackupJob(Job):
else:
raise ValueError('Please provide a valid backup mode')
def get_metadata(self):
metadata = {
'current_level': self.conf.curr_backup_level,
'fs_real_path': (self.conf.lvm_auto_snap or
self.conf.path_to_backup),
'vol_snap_path':
self.conf.path_to_backup if self.conf.lvm_auto_snap else '',
'client_os': sys.platform,
'client_version': self.conf.__version__
}
fields = ['action', 'always_level', 'backup_media', 'backup_name',
'container', 'container_segments', 'curr_backup_level',
'dry_run', 'hostname', 'path_to_backup', 'max_level',
'mode', 'meta_data_file', 'backup_name', 'hostname',
'time_stamp', 'curr_backup_level']
for field_name in fields:
metadata[field_name] = self.conf.__dict__.get(field_name, '')
return metadata
class RestoreJob(Job):
@Job.executemethod

View File

@ -31,7 +31,7 @@ import os
import subprocess
import logging
import sys
import json
# Initialize backup options
(backup_args, arg_parse) = backup_arguments()
@ -125,6 +125,11 @@ def freezer_main(args={}):
freezer_job = job.create_job(backup_args)
freezer_job.execute()
if backup_args.metadata_out == '-':
metadata = freezer_job.get_metadata()
if metadata:
sys.stdout.write(json.dumps(metadata))
return backup_args

View File

View File

@ -0,0 +1,170 @@
"""
Copyright 2015 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product includes cryptographic software written by Eric Young
(eay@cryptsoft.com). This product includes software written by Tim
Hudson (tjh@cryptsoft.com).
========================================================================
"""
import argparse
from prettytable import PrettyTable
SCHEDULER_CONF_D = '/etc/freezer/scheduler/conf.d'
class OpenstackOptions(object):
def __init__(self, args, default_dict={}):
self.username = args.os_username or\
default_dict.get('OS_USERNAME', None)
self.tenant_name = args.os_tenant_name or\
default_dict.get('OS_TENANT_NAME', None)
self.auth_url = args.os_auth_url or\
default_dict.get('OS_AUTH_URL', None)
self.password = args.os_password or\
default_dict.get('OS_PASSWORD', None)
self.tenant_id = args.os_tenant_id or\
default_dict.get('OS_TENANT_ID', None)
self.region_name = args.os_region_name or\
default_dict.get('OS_REGION_NAME', None)
self.endpoint = args.os_endpoint or\
default_dict.get('OS_SERVICE_ENDPOINT', None)
if not self.is_valid():
raise Exception('ERROR: OS Options not valid: {0}'.
format(self.reason()))
def __str__(self):
table = PrettyTable(["variable", "value"])
table.add_row(['username', self.username])
table.add_row(['tenant_name', self.tenant_name])
table.add_row(['auth_url', self.auth_url])
table.add_row(['password', self.password])
table.add_row(['tenant_id', self.tenant_id])
table.add_row(['region_name', self.region_name])
table.add_row(['endpoint', self.endpoint])
return table.__str__()
def is_valid(self):
if self.reason():
return False
return True
def reason(self):
missing = []
for attr in ['username', 'password', 'tenant_name', 'region_name']:
if not self.__getattribute__(attr):
missing.append(attr)
if missing:
return 'missing {0}'.format(', '.join(missing))
return ''
def get_args(choices):
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
'action', action='store', default=None, choices=choices, help='')
arg_parser.add_argument(
'--debug', action='store_true',
help='Prints debugging output onto the console, this may include '
'OS environment variables, request and response calls. '
'Helpful for debugging and understanding the API calls.',
dest='debug', default=False)
arg_parser.add_argument(
'-j', '--job', action='store',
help=('name or ID of the job'),
dest='job', default=None)
arg_parser.add_argument(
'-s', '--session', action='store',
help=('name or ID of the session'),
dest='session', default=None)
arg_parser.add_argument(
'--file', action='store',
help=('Local file that contains the resource '
'to be uploaded/downloaded'),
dest='fname', default=None)
arg_parser.add_argument(
'--os-endpoint', action='store',
help=('Specify an endpoint to use instead of retrieving '
'one from the service catalog (via authentication). '
'Defaults to env[OS_SERVICE_ENDPOINT]'),
dest='os_endpoint', default=None)
arg_parser.add_argument(
'--os-username', action='store',
help=('Name used for authentication with the OpenStack '
'Identity service. Defaults to env[OS_USERNAME].'),
dest='os_username', default=None)
arg_parser.add_argument(
'--os-password', action='store',
help=('Password used for authentication with the OpenStack '
'Identity service. Defaults to env[OS_PASSWORD].'),
dest='os_password', default=None)
arg_parser.add_argument(
'--os-tenant-name', action='store',
help=('Tenant to request authorization on. Defaults to '
'env[OS_TENANT_NAME].'),
dest='os_tenant_name', default=None)
arg_parser.add_argument(
'--os-tenant-id', action='store',
help=('Tenant to request authorization on. Defaults to '
'env[OS_TENANT_ID].'),
dest='os_tenant_id', default=None)
arg_parser.add_argument(
'--os-auth-url', action='store',
help=('Specify the Identity endpoint to use for '
'authentication. Defaults to env[OS_AUTH_URL].'),
dest='os_auth_url', default=None)
arg_parser.add_argument(
'--os-region-name', action='store',
help=('Specify the region to use. Defaults to '
'env[OS_REGION_NAME].'),
dest='os_region_name', default=None)
arg_parser.add_argument(
'--os-token', action='store',
help=('Specify an existing token to use instead of retrieving'
' one via authentication (e.g. with username & password). '
'Defaults to env[OS_SERVICE_TOKEN].'),
dest='os_token', default=None)
arg_parser.add_argument(
'-c', '--client-id', action='store',
help=('Specifies the client_id used when contacting the service.'
'If not specified it will be automatically created'
'using the tenant-id and the machine hostname.'),
dest='client_id', default=None)
arg_parser.add_argument(
'-n', '--no-api', action='store_true',
help='Prevents the scheduler from using the api service',
dest='no_api', default=False)
arg_parser.add_argument(
'-a', '--active-only', action='store_true',
help='Filter only active jobs/session',
dest='active_only', default=False)
arg_parser.add_argument(
'-d', '--dir', action='store',
help=('Used to store/retrieve files on local storage, including '
'those exchanged with the api service. '
'Default value is {0}'.format(SCHEDULER_CONF_D)),
dest='jobs_dir', default=SCHEDULER_CONF_D)
arg_parser.add_argument(
'-i', '--interval', action='store',
help=('Specifies the api-polling interval in seconds.'
'Defaults to 60 seconds'),
dest='interval', default=60)
arg_parser.add_argument(
'-l', '--log-file', action='store',
help=('location of log file'),
dest='log_file', default=None)
return arg_parser.parse_args()

160
freezer/scheduler/daemon.py Normal file
View File

@ -0,0 +1,160 @@
"""
Copyright 2015 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product includes cryptographic software written by Eric Young
(eay@cryptsoft.com). This product includes software written by Tim
Hudson (tjh@cryptsoft.com).
========================================================================
"""
import logging
import os
import signal
from tempfile import gettempdir
from time import sleep
from pep3143daemon import DaemonContext, PidFile
from freezer.utils import create_dir
class Daemon:
"""
A class to manage all the daemon-related stuff
"""
instance = None
exit_flag = False
def __init__(self, daemonizable=None, pid_fname=None):
# daemonizable has to provide start/stop (and possibly reload) methods
Daemon.instance = self
self._pid_fname = pid_fname
self.daemonizable = daemonizable
@staticmethod
def setup_logging(log_file):
class NoLogFilter(logging.Filter):
def filter(self, record):
return False
def configure_logging(file_name):
expanded_file_name = os.path.expanduser(file_name)
expanded_dir_name = os.path.dirname(expanded_file_name)
create_dir(expanded_dir_name, do_log=False)
logging.basicConfig(
filename=expanded_file_name,
level=logging.INFO,
format=('%(asctime)s %(name)s %(levelname)s %(message)s'))
# filter out some annoying messages
# not the best position for this code
log_filter = NoLogFilter()
logging.getLogger("apscheduler.scheduler").\
addFilter(log_filter)
logging.getLogger("apscheduler.executors.default").\
addFilter(log_filter)
logging.getLogger("requests.packages.urllib3.connectionpool").\
addFilter(log_filter)
return expanded_file_name
log_file_paths = [log_file] if log_file else [
'/var/log/freezer-scheduler.log',
'~/.freezer/freezer-scheduler.log']
for file_name in log_file_paths:
try:
return configure_logging(file_name)
except IOError:
pass
raise Exception("Unable to write to log file")
@staticmethod
def handle_program_exit(signum, frame):
Daemon.exit_flag = True
Daemon.instance.daemonizable.stop()
@staticmethod
def handle_reload(signum, frame):
Daemon.instance.daemonizable.reload()
@property
def signal_map(self):
return {
signal.SIGTERM: Daemon.handle_program_exit,
signal.SIGHUP: Daemon.handle_reload,
}
@property
def pid_fname(self):
if not self._pid_fname:
fname = '{0}/freezer_sched_{1}.pid'.format(
gettempdir(),
os.path.split(os.path.expanduser('~'))[-1])
self._pid_fname = os.path.normpath(fname)
return self._pid_fname
@property
def pid(self):
if os.path.isfile(self.pid_fname):
with open(self.pid_fname, 'r') as f:
return int(f.read())
return None
@property
def jobs_file(self):
return ''
@property
def no_api(self):
return False
def start(self, log_file=None):
pidfile = PidFile(self.pid_fname)
with DaemonContext(pidfile=pidfile, signal_map=self.signal_map):
self.setup_logging(log_file)
while not Daemon.exit_flag:
try:
logging.info('[*] freezer daemon starting, pid: {0}'.
format(self.pid))
self.daemonizable.start()
Daemon.exit_flag = True
except Exception as e:
logging.error('[*] Restarting daemonized procedure '
'after Fatal Error: {0}'.format(e))
sleep(10)
logging.info('[*] freezer daemon done, pid: {0}'.format(self.pid))
def stop(self):
pid = self.pid
if pid:
os.kill(self.pid, signal.SIGTERM)
else:
print "Not Running"
def status(self):
pid = self.pid
if pid:
print "Running with pid: {0}".format(pid)
else:
print "Not Running"
def reload(self):
pid = self.pid
if pid:
os.kill(pid, signal.SIGHUP)
else:
print "Not Running"

View File

@ -0,0 +1,229 @@
#!/usr/bin/env python
"""
Copyright 2015 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product includes cryptographic software written by Eric Young
(eay@cryptsoft.com). This product includes software written by Tim
Hudson (tjh@cryptsoft.com).
========================================================================
"""
import logging
import os
import sys
import threading
from distutils import spawn
from apscheduler.schedulers.blocking import BlockingScheduler
from freezer.apiclient import client
import arguments
import shell
import utils
from freezer.utils import create_dir
from daemon import Daemon
from scheduler_job import Job
class FreezerScheduler(object):
def __init__(self, apiclient, interval, job_path):
# config_manager
self.client = apiclient
self.freezerc_executable = spawn.find_executable('freezerc')
self.job_path = job_path
self._client = None
self.lock = threading.Lock()
self.execution_lock = threading.Lock()
job_defaults = {
'coalesce': True,
'max_instances': 1
}
self.scheduler = BlockingScheduler(job_defaults=job_defaults)
if self.client:
self.scheduler.add_job(self.poll, 'interval',
seconds=interval, id='api_poll')
self.add_job = self.scheduler.add_job
self.remove_job = self.scheduler.remove_job
self.jobs = {}
def get_jobs(self):
if self.client:
job_doc_list = []
try:
job_doc_list = utils.get_active_jobs_from_api(self.client)
except Exception as e:
logging.error('Unable to get jobs from freezer api service. '
'{0}'.format(e))
try:
utils.save_jobs_to_disk(job_doc_list, self.job_path)
except Exception as e:
logging.error('Unable to save jobs to {0}. '
'{1}'.format(self.job_path, e))
return job_doc_list
else:
return utils.get_jobs_from_disk(self.job_path)
def start_session(self, session_id, job_id, session_tag):
if self.client:
return self.client.sessions.start_session(session_id,
job_id,
session_tag)
else:
raise Exception("Unable to start session: api not in use.")
def end_session(self, session_id, job_id, session_tag, result):
if self.client:
return self.client.sessions.end_session(session_id,
job_id,
session_tag,
result)
else:
raise Exception("Unable to end session: api not in use.")
def upload_metadata(self, metadata_doc):
if self.client:
self.client.backups.create(metadata_doc)
def start(self):
utils.do_register(self.client)
self.poll()
self.scheduler.start()
def update_job(self, job_id, job_doc):
if self.client:
try:
return self.client.jobs.update(job_id, job_doc)
except Exception as e:
logging.error("[*] Job update error: {0}".format(e))
def update_job_status(self, job_id, status):
doc = {'job_schedule': {'status': status}}
self.update_job(job_id, doc)
def is_scheduled(self, job_id):
return self.scheduler.get_job(job_id) is not None
def create_job(self, job_doc):
job = Job.create(self, self.freezerc_executable, job_doc)
if job:
self.jobs[job.id] = job
logging.info("Created job {0}".format(job.id))
return job
def poll(self):
work_job_doc_list = []
try:
work_job_doc_list = self.get_jobs()
except Exception as e:
logging.error("[*] Unable to get jobs: {0}".format(e))
work_job_id_list = []
# create job if necessary, then let it process its events
for job_doc in work_job_doc_list:
job_id = job_doc['job_id']
work_job_id_list.append(job_id)
job = self.jobs.get(job_id, None) or self.create_job(job_doc)
if job:
job.process_event(job_doc)
# request removal of any job that has been removed in the api
for job_id, job in self.jobs.iteritems():
if job_id not in work_job_id_list:
job.remove()
remove_list = [job_id for job_id, job in self.jobs.items()
if job.can_be_removed()]
for k in remove_list:
self.jobs.pop(k)
def stop(self):
try:
self.scheduler.shutdown(wait=False)
except:
pass
def reload(self):
logging.warning("reload not supported")
def _get_doers(module):
doers = {}
for attr in (a for a in dir(module) if a.startswith('do_')):
command = attr[3:].replace('_', '-')
callback = getattr(module, attr)
doers[command] = callback
return doers
def main():
doers = _get_doers(shell)
doers.update(_get_doers(utils))
possible_actions = doers.keys() + ['start', 'stop', 'status']
args = arguments.get_args(possible_actions)
if args.action is None:
print "No action"
sys.exit(1)
os_options = arguments.OpenstackOptions(args, os.environ)
if args.debug:
print os_options
apiclient = None
if args.no_api is False:
apiclient = client.Client(username=os_options.username,
password=os_options.password,
tenant_name=os_options.tenant_name,
endpoint=os_options.endpoint,
auth_url=os_options.auth_url)
if args.client_id:
apiclient.client_id = args.client_id
create_dir(args.jobs_dir, do_log=False)
if args.action in doers:
try:
return doers[args.action](apiclient, args)
except Exception as e:
print ('ERROR {0}'.format(e))
return 1
freezer_scheduler = FreezerScheduler(apiclient=apiclient,
interval=int(args.interval),
job_path=args.jobs_dir)
daemon = Daemon(daemonizable=freezer_scheduler)
if args.action == 'start':
daemon.start(log_file=args.log_file)
elif args.action == 'stop':
daemon.stop()
elif args.action == 'reload':
daemon.reload()
elif args.action == 'status':
daemon.status()
return os.EX_OK
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,440 @@
"""
Copyright 2015 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product includes cryptographic software written by Eric Young
(eay@cryptsoft.com). This product includes software written by Tim
Hudson (tjh@cryptsoft.com).
========================================================================
"""
import logging
import subprocess
import tempfile
import datetime
import json
import time
from ConfigParser import ConfigParser
class StopState(object):
@staticmethod
def stop(job, doc):
job.job_doc = doc
job.event = Job.NO_EVENT
job.job_doc_status = Job.STOP_STATUS
job.scheduler.update_job(job.id, job.job_doc)
return Job.NO_EVENT
@staticmethod
def abort(job, doc):
job.job_doc = doc
job.event = Job.NO_EVENT
job.job_doc_status = Job.STOP_STATUS
job.scheduler.update_job(job.id, job.job_doc)
return Job.NO_EVENT
@staticmethod
def start(job, doc):
job.job_doc = doc
job.event = Job.NO_EVENT
job.job_doc_status = Job.STOP_STATUS
job.schedule()
job.scheduler.update_job(job.id, job.job_doc)
return Job.NO_EVENT
@staticmethod
def remove(job):
job.unschedule()
job.job_doc_status = Job.REMOVED_STATUS
return Job.NO_EVENT
class ScheduledState(object):
@staticmethod
def stop(job, doc):
job.unschedule()
job.scheduler.update_job(job.id, job.job_doc)
return Job.STOP_EVENT
@staticmethod
def abort(job, doc):
return ScheduledState.stop(job, doc)
@staticmethod
def start(job, doc):
job.event = Job.NO_EVENT
job.scheduler.update_job(job.id, job.job_doc)
return Job.NO_EVENT
@staticmethod
def remove(job):
job.unschedule()
job.job_doc_status = Job.REMOVED_STATUS
return Job.NO_EVENT
class RunningState(object):
@staticmethod
def stop(job, doc):
job.event = Job.STOP_EVENT
return Job.NO_EVENT
@staticmethod
def abort(job, doc):
job.event = Job.ABORT_EVENT
return Job.NO_EVENT
@staticmethod
def start(job, doc):
job.event = Job.NO_EVENT
job.scheduler.update_job(job.id, job.job_doc)
return Job.NO_EVENT
@staticmethod
def remove(job):
job.event = Job.REMOVE_EVENT
return Job.NO_EVENT
class Job(object):
NO_EVENT = ''
STOP_EVENT = 'stop'
START_EVENT = 'start'
ABORT_EVENT = 'abort'
REMOVE_EVENT = 'remove'
STOP_STATUS = 'stop'
SCHEDULED_STATUS = 'scheduled'
RUNNING_STATUS = 'running'
REMOVED_STATUS = 'removed'
COMPLETED_STATUS = 'completed'
FAIL_RESULT = 'fail'
SUCCESS_RESULT = 'success'
ABORTED_RESULT = 'aborted'
@staticmethod
def create(scheduler, executable, job_doc):
job = Job(scheduler, executable, job_doc)
if job.job_doc_status in ['running', 'scheduled']:
logging.warning('Job {0} already has {1} status, '
'skipping'.format(job.id, job.job_doc_status))
return None
if not job.event and not job.job_doc_status:
logging.warning('Autostart Job {0}'.format(job.id))
job.event = Job.START_EVENT
return job
def __init__(self, scheduler, executable, job_doc):
self.scheduler = scheduler
self.executable = executable
self.job_doc = job_doc
self.process = None
self.state = StopState
def remove(self):
with self.scheduler.lock:
# delegate to state object
logging.info('REMOVE job {0}'.format(self.id))
self.state.remove(self)
@property
def id(self):
return self.job_doc['job_id']
@property
def session_id(self):
return self.job_doc.get('session_id', '')
@session_id.setter
def session_id(self, value):
self.job_doc['session_id'] = value
@property
def session_tag(self):
return self.job_doc.get('session_tag', 0)
@session_tag.setter
def session_tag(self, value):
self.job_doc['session_tag'] = value
@property
def event(self):
return self.job_doc['job_schedule'].get('event', '')
@event.setter
def event(self, value):
self.job_doc['job_schedule']['event'] = value
@property
def job_doc_status(self):
return self.job_doc['job_schedule'].get('status', '')
@job_doc_status.setter
def job_doc_status(self, value):
self.job_doc['job_schedule']['status'] = value
@property
def result(self):
return self.job_doc['job_schedule'].get('result', '')
@result.setter
def result(self, value):
self.job_doc['job_schedule']['result'] = value
def can_be_removed(self):
return self.job_doc_status == Job.REMOVED_STATUS
@staticmethod
def save_action_to_file(action, f):
parser = ConfigParser()
parser.add_section('action')
for action_k, action_v in action.items():
parser.set('action', action_k, action_v)
parser.write(f)
f.seek(0)
@property
def schedule_date(self):
return self.job_doc['job_schedule'].get('schedule_date', '')
@property
def schedule_interval(self):
return self.job_doc['job_schedule'].get('schedule_interval', '')
@property
def schedule_cron_fields(self):
cron_fields = ['year', 'month', 'day', 'week', 'day_of_week',
'hour', 'minute', 'second']
return {key: value
for key, value in self.job_doc['job_schedule'].items()
if key in cron_fields}
@property
def scheduled(self):
return self.scheduler.is_scheduled(self.id)
def get_schedule_args(self):
if self.schedule_date:
return {'trigger': 'date',
'run_date': self.schedule_date}
elif self.schedule_interval:
kwargs = {'trigger': 'interval'}
if self.schedule_interval == 'continuous':
kwargs.update({'seconds': 1})
else:
val, unit = self.schedule_interval.split(' ')
kwargs.update({unit: int(val)})
return kwargs
else:
cron_fields = self.schedule_cron_fields
if cron_fields:
return {'trigger': 'cron'}.update(cron_fields)
# no scheduling information, schedule to start within a few seconds
return {'trigger': 'date',
'run_date': datetime.datetime.now() +
datetime.timedelta(0, 2, 0)}
def process_event(self, job_doc):
with self.scheduler.lock:
next_event = job_doc['job_schedule'].get('event', '')
while next_event:
if next_event == Job.STOP_EVENT:
logging.info('JOB {0} event: STOP'.format(self.id))
next_event = self.state.stop(self, job_doc)
elif next_event == Job.START_EVENT:
logging.info('JOB {0} event: START'.format(self.id))
next_event = self.state.start(self, job_doc)
elif next_event == Job.ABORT_EVENT:
logging.info('JOB {0} event: ABORT'.format(self.id))
next_event = self.state.abort(self, job_doc)
def upload_metadata(self, metadata_string):
try:
metadata = json.loads(metadata_string)
if metadata:
self.scheduler.upload_metadata(metadata)
except Exception as e:
logging.error('[*] metrics upload error: {0}'.format(e))
logging.info("[*] Job {0}, freezer action metadata uploaded")
def execute_job_action(self, job_action):
max_retries = job_action.get('max_retries', 1)
tries = max_retries
freezer_action = job_action.get('freezer_action', {})
max_retries_interval = job_action.get('max_retries_interval', 60)
action_name = freezer_action.get('action', '')
while tries:
with tempfile.NamedTemporaryFile() as config_file:
self.save_action_to_file(freezer_action, config_file)
freezer_command = 'python {0} --metadata-out - --config {1}'.\
format(self.executable, config_file.name)
self.process = subprocess.Popen(freezer_command.split(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, error = self.process.communicate()
if error:
logging.error("[*] Freezer client error: {0}".format(error))
elif output:
self.upload_metadata(output)
if self.process.returncode:
# ERROR
tries -= 1
if tries:
logging.warning('[*] Job {0} failed {1} action,'
' retrying in {2} seconds'
.format(self.id, action_name,
max_retries_interval))
# sleeping with the bloody lock, but we don't want other
# actions to mess with our stuff like fs snapshots, do we ?
time.sleep(max_retries_interval)
else:
# SUCCESS
logging.info('[*] Job {0} action {1}'
' returned success exit code'.
format(self.id, action_name))
return Job.SUCCESS_RESULT
logging.error('[*] Job {0} action {1} failed after {2} tries'
.format(self.id, action_name, max_retries))
return Job.FAIL_RESULT
def execute(self):
result = Job.SUCCESS_RESULT
with self.scheduler.execution_lock:
with self.scheduler.lock:
logging.info('job {0} running'.format(self.id))
self.state = RunningState
self.job_doc_status = Job.RUNNING_STATUS
self.scheduler.update_job_status(self.id, self.job_doc_status)
self.start_session()
for job_action in self.job_doc.get('job_actions', []):
if job_action.get('mandatory', False) or\
(result == Job.SUCCESS_RESULT):
result = self.execute_job_action(job_action)
else:
freezer_action = job_action.get('freezer_action', {})
action_name = freezer_action.get('action', '')
logging.warning("[*]skipping {0} action".
format(action_name))
self.result = result
self.finish()
def finish(self):
self.end_session(self.result)
with self.scheduler.lock:
if self.event == Job.REMOVE_EVENT:
self.unschedule()
self.job_doc_status = Job.REMOVED_STATUS
return
if not self.scheduled:
self.job_doc_status = Job.COMPLETED_STATUS
self.state = StopState
self.scheduler.update_job(self.id, self.job_doc)
return
if self.event in [Job.STOP_EVENT, Job.ABORT_EVENT]:
self.unschedule()
self.job_doc_status = Job.COMPLETED_STATUS
self.scheduler.update_job(self.id, self.job_doc)
else:
self.job_doc_status = Job.SCHEDULED_STATUS
self.state = ScheduledState
self.scheduler.update_job_status(self.id, self.job_doc_status)
def start_session(self):
if not self.session_id:
return
retry = 5
while retry:
try:
resp = self.scheduler.start_session(self.session_id,
self.id,
self.session_tag)
if resp['result'] == 'success':
self.session_tag = resp['session_tag']
return
except Exception as e:
logging.error('[*]Error while starting session {0}. {1}'.
format(self.session_id, e))
logging.warning('[*]Retrying to start session {0}'.
format(self.session_id))
retry -= 1
logging.error('[*]Unable to start session {0}'.format(self.session_id))
def end_session(self, result):
if not self.session_id:
return
retry = 5
while retry:
try:
resp = self.scheduler.end_session(self.session_id,
self.id,
self.session_tag,
result)
if resp['result'] == 'success':
return
except Exception as e:
logging.error('[*]Error while ending session {0}. {1}'.
format(self.session_id, e))
logging.warning('[*]Retrying to end session {0}'.
format(self.session_id))
retry -= 1
logging.error('[*]Unable to end session {0}'.format(self.session_id))
def schedule(self):
try:
kwargs = self.get_schedule_args()
self.scheduler.add_job(self.execute, id=self.id, **kwargs)
except Exception as e:
logging.error("[*] Unable to schedule job {0}: {1}".
format(self.id, e))
if self.scheduled:
self.job_doc_status = Job.SCHEDULED_STATUS
self.state = ScheduledState
else:
# job not scheduled or already started and waiting for lock
self.job_doc_status = Job.COMPLETED_STATUS
self.state = StopState
def unschedule(self):
try:
# already executing job are not present in the apscheduler list
self.scheduler.remove_job(job_id=self.id)
except:
pass
self.event = Job.NO_EVENT
self.job_doc_status = Job.STOP_STATUS
self.state = StopState
def terminate(self):
if self.process:
self.process.terminate()
def kill(self):
if self.process:
self.process.kill()

236
freezer/scheduler/shell.py Normal file
View File

@ -0,0 +1,236 @@
#!/usr/bin/env python
"""
Copyright 2015 Hewlett-Packard
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the