PM: Ensure only one periodic task runs per host

Ensure multiple periodic sync/recovery tasks do not run concurrently on
a single host by using an inter process lock inside the periodic task.

This has the effect of serializing the tasks, rather than the more correct
deduplication of tasks. Bug 1445127 tracks the longer term fix.

Change-Id: Ib0041f127aacfd0f49ca84deecc2a6950594dd58
Closes-Bug: 1445125
(cherry picked from commit f3dd7a8f42)
This commit is contained in:
Kiall Mac Innes 2015-04-21 13:07:38 +01:00 committed by Kiall Mac Innes
parent 09ddb2f743
commit c376367c38
4 changed files with 62 additions and 41 deletions

View File

@ -159,6 +159,9 @@ function configure_designate {
sudo cp $DESIGNATE_DIR/etc/designate/rootwrap.conf.sample $DESIGNATE_ROOTWRAP_CONF
iniset $DESIGNATE_ROOTWRAP_CONF DEFAULT filters_path $DESIGNATE_DIR/etc/designate/rootwrap.d root-helper
# Oslo Concurrency
iniset $DESIGNATE_CONF oslo_concurrency lock_path "$DESIGNATE_STATE_PATH"
# Set up the rootwrap sudoers for designate
local rootwrap_sudoer_cmd="$DESIGNATE_BIN_DIR/designate-rootwrap $DESIGNATE_ROOTWRAP_CONF *"
local tempfile=`mktemp`

View File

@ -22,6 +22,7 @@ import socket
from oslo.config import cfg
from oslo_log import log
from oslo_concurrency import lockutils
from oslo import messaging
@ -67,3 +68,6 @@ log.set_defaults(default_log_levels=[
# Set some Oslo RPC defaults
messaging.set_transport_defaults('designate')
# Set some Oslo Oslo Concurrency defaults
lockutils.set_defaults(lock_path='$state_path')

View File

@ -158,68 +158,74 @@ class Service(service.RPCService, service.Service):
# Periodioc Tasks
def periodic_recovery(self):
"""
:return:
:return: None
"""
context = DesignateContext.get_admin_context(all_tenants=True)
# TODO(kiall): Replace this inter-process-lock with a distributed
# lock, likely using the tooz library - see bug 1445127.
with lockutils.lock('periodic_recovery', external=True, delay=30):
context = DesignateContext.get_admin_context(all_tenants=True)
LOG.debug("Starting Periodic Recovery")
LOG.debug("Starting Periodic Recovery")
try:
# Handle Deletion Failures
domains = self._get_failed_domains(context, DELETE_ACTION)
try:
# Handle Deletion Failures
domains = self._get_failed_domains(context, DELETE_ACTION)
for domain in domains:
self.delete_domain(context, domain)
for domain in domains:
self.delete_domain(context, domain)
# Handle Creation Failures
domains = self._get_failed_domains(context, CREATE_ACTION)
# Handle Creation Failures
domains = self._get_failed_domains(context, CREATE_ACTION)
for domain in domains:
self.create_domain(context, domain)
for domain in domains:
self.create_domain(context, domain)
# Handle Update Failures
domains = self._get_failed_domains(context, UPDATE_ACTION)
# Handle Update Failures
domains = self._get_failed_domains(context, UPDATE_ACTION)
for domain in domains:
self.update_domain(context, domain)
for domain in domains:
self.update_domain(context, domain)
except Exception:
LOG.exception(_LE('An unhandled exception in periodic recovery '
'occurred'))
except Exception:
LOG.exception(_LE('An unhandled exception in periodic '
'recovery occurred'))
def periodic_sync(self):
"""
:return: None
"""
context = DesignateContext.get_admin_context(all_tenants=True) # noqa
# TODO(kiall): Replace this inter-process-lock with a distributed
# lock, likely using the tooz library - see bug 1445127.
with lockutils.lock('periodic_sync', external=True, delay=30):
context = DesignateContext.get_admin_context(all_tenants=True)
LOG.debug("Starting Periodic Synchronization")
LOG.debug("Starting Periodic Synchronization")
criterion = {
'pool_id': CONF['service:pool_manager'].pool_id,
'status': '!%s' % ERROR_STATUS
}
criterion = {
'pool_id': CONF['service:pool_manager'].pool_id,
'status': '!%s' % ERROR_STATUS
}
periodic_sync_seconds = \
CONF['service:pool_manager'].periodic_sync_seconds
periodic_sync_seconds = \
CONF['service:pool_manager'].periodic_sync_seconds
if periodic_sync_seconds is not None:
# Generate the current serial, will provide a UTC Unix TS.
current = utils.increment_serial()
criterion['serial'] = ">%s" % (current - periodic_sync_seconds)
if periodic_sync_seconds is not None:
# Generate the current serial, will provide a UTC Unix TS.
current = utils.increment_serial()
criterion['serial'] = ">%s" % (current - periodic_sync_seconds)
domains = self.central_api.find_domains(context, criterion)
domains = self.central_api.find_domains(context, criterion)
try:
for domain in domains:
# TODO(kiall): If the domain was created within the last
# periodic_sync_seconds, attempt to recreate to
# fill in targets which may have failed.
self.update_domain(context, domain)
try:
for domain in domains:
# TODO(kiall): If the domain was created within the last
# periodic_sync_seconds, attempt to recreate
# to fill in targets which may have failed.
self.update_domain(context, domain)
except Exception:
LOG.exception(_LE('An unhandled exception in periodic '
'synchronization occurred.'))
except Exception:
LOG.exception(_LE('An unhandled exception in periodic '
'synchronization occurred.'))
# Standard Create/Update/Delete Methods
def create_domain(self, context, domain):

View File

@ -281,3 +281,11 @@ debug = False
#rndc_key_file = /etc/rndc.key
#zone_file_path = $state_path/zones
#query_destination = 127.0.0.1
########################
## Library Configuration
########################
[oslo_concurrency]
# Path for Oslo Concurrency to store lock files, defaults to the value
# of the state_path setting.
#lock_path = $state_path