Added support for sync on ansible playbook operations. Writes cannot occur

when ansible operations are happening.

Jira-Issue: OSTACKDEV-22
This commit is contained in:
Borne Mace 2016-04-07 17:40:48 -07:00
parent 2a9497fd09
commit b31c030ad1
4 changed files with 112 additions and 40 deletions

View File

@ -133,6 +133,13 @@ fi
%post %post
setfacl -m d:g:%{kolla_group}:rw %{_var}/log/kolla setfacl -m d:g:%{kolla_group}:rw %{_var}/log/kolla
if ! test -f %{_sysconfdir}/kolla/kollacli/ansible.lock
then
touch %{_sysconfdir}/kolla/kollacli/ansible.lock
chown %{kolla_user}:%{kolla_group} %{_sysconfdir}/kolla/kollacli/ansible.lock
chmod 0660 %{_sysconfdir}/kolla/kollacli/ansible.lock
fi
if ! test -f ~%{kolla_user}/.ssh/id_rsa if ! test -f ~%{kolla_user}/.ssh/id_rsa
then then
runuser -m -s /bin/bash -c \ runuser -m -s /bin/bash -c \
@ -188,7 +195,10 @@ esac
%changelog %changelog
* Tue Apr 07 2016 - Steve Noyes <steve.noyes@oracle.com> * Thu Apr 07 2016 - Borne Mace <borne.mace@oracle.com>
- added ansible.lock file to coordinate ansible synchronization
* Thu Apr 07 2016 - Steve Noyes <steve.noyes@oracle.com>
- rename passwd_editor.py to kolla_actions.py - rename passwd_editor.py to kolla_actions.py
* Tue Apr 05 2016 - Steve Noyes <steve.noyes@oracle.com> * Tue Apr 05 2016 - Steve Noyes <steve.noyes@oracle.com>

View File

@ -88,11 +88,10 @@ class ClientApi(
backupCount=4) backupCount=4)
except IOError as e: except IOError as e:
# most likely the caller is not part of the kolla group # most likely the caller is not part of the kolla group
raise IOError(u._ raise IOError(u._('Permission denied to run the kolla client.'
(str(e) + '\nPlease add user to the kolla group and '
'\nPermission denied to run the kolla client.' 'then log out and back in. {error}')
'\nPlease add user to the kolla group and ' .format(error=str(e)))
'then log out and back in.'))
formatter = logging.Formatter(LOG_FILE_MESSAGE_FORMAT) formatter = logging.Formatter(LOG_FILE_MESSAGE_FORMAT)
rotate_handler.setFormatter(formatter) rotate_handler.setFormatter(formatter)

View File

@ -27,6 +27,8 @@ from kollacli.common.utils import PidManager
from kollacli.common.utils import get_kolla_actions_path from kollacli.common.utils import get_kolla_actions_path
from kollacli.common.utils import get_admin_uids from kollacli.common.utils import get_admin_uids
from kollacli.common.utils import get_admin_user from kollacli.common.utils import get_admin_user
from kollacli.common.utils import get_ansible_lock_path
from kollacli.common.utils import Lock
from kollacli.common.utils import run_cmd from kollacli.common.utils import run_cmd
from kollacli.common.utils import safe_decode from kollacli.common.utils import safe_decode
@ -63,9 +65,19 @@ class AnsibleJob(object):
self._errors = [] self._errors = []
self._cmd_output = '' self._cmd_output = ''
self._kill_uname = None self._kill_uname = None
self._ansible_lock = Lock(get_ansible_lock_path(), 'ansible_job')
def run(self): def run(self):
try: try:
locked = self._ansible_lock.wait_acquire()
if not locked:
raise Exception(
u._('unable to run ansible job {cmd} '
'as we couldn\'t get lock held by {owner}:{pid}.')
.format(cmd=self._command,
owner=self._ansible_lock.current_owner,
pid=self._ansible_lock.current_pid))
# create and open named pipe, must be owned by kolla group # create and open named pipe, must be owned by kolla group
os.mkfifo(self._fifo_path, 0o660) os.mkfifo(self._fifo_path, 0o660)
_, grp_id = get_admin_uids() _, grp_id = get_admin_uids()
@ -196,13 +208,18 @@ class AnsibleJob(object):
- close stdout and stderr - close stdout and stderr
- close and delete named pipe (fifo) - close and delete named pipe (fifo)
""" """
# try to clear the ansible lock
self._ansible_lock.release()
# delete temp inventory file # delete temp inventory file
remove_temp_inventory(self._temp_inv_path) remove_temp_inventory(self._temp_inv_path)
# close the process's stdout and stderr streams # close the process's stdout and stderr streams
if self._process.stdout and not self._process.stdout.closed: if (self._process and self._process.stdout and not
self._process.stdout.closed):
self._process.stdout.close() self._process.stdout.close()
if self._process.stderr and not self._process.stderr.closed: if (self._process and self._process.stderr and not
self._process.stderr.closed):
self._process.stderr.close() self._process.stderr.close()
# close and delete the named pipe (fifo) # close and delete the named pipe (fifo)

View File

@ -11,6 +11,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import fcntl
import grp import grp
import logging import logging
import os import os
@ -56,6 +57,10 @@ def get_kolla_log_dir():
return '/var/log/kolla/' return '/var/log/kolla/'
def get_ansible_lock_path():
return os.path.join(get_kollacli_etc(), 'ansible/ansible.lock')
def get_kolla_actions_path(): def get_kolla_actions_path():
return os.path.join(get_kollacli_home(), 'tools', 'kolla_actions.py') return os.path.join(get_kollacli_home(), 'tools', 'kolla_actions.py')
@ -238,14 +243,13 @@ def sync_read_file(path, mode='r'):
""" """
lock = None lock = None
try: try:
lock = Lock(path + '.lock', 'sync_read') lock = Lock(path, 'sync_read')
locked = lock.wait_acquire(10) locked = lock.wait_acquire()
if not locked: if not locked:
raise Exception( raise Exception(
u._('unable to read file {path} ' u._('unable to read file {path} '
'as it was locked by {owner}:{pid}.') 'as it was locked.')
.format(path=path, owner=lock.current_owner, .format(path=path))
pid=lock.current_pid))
with open(path, mode) as data_file: with open(path, mode) as data_file:
data = data_file.read() data = data_file.read()
except Exception as e: except Exception as e:
@ -258,21 +262,31 @@ def sync_read_file(path, mode='r'):
def sync_write_file(path, data, mode='w'): def sync_write_file(path, data, mode='w'):
"""synchronously write file""" """synchronously write file"""
ansible_lock = None
lock = None lock = None
try: try:
lock = Lock(path + '.lock', 'sync_write') ansible_lock = Lock(get_ansible_lock_path(), 'sync_write')
locked = lock.wait_acquire(10) locked = ansible_lock.wait_acquire()
if not locked:
raise Exception(
u._('unable to get ansible lock while writing to {path} '
'as it was locked.')
.format(path=path))
lock = Lock(path, 'sync_write')
locked = lock.wait_acquire()
if not locked: if not locked:
raise Exception( raise Exception(
u._('unable to write file {path} ' u._('unable to write file {path} '
'as it was locked by {owner}:{pid}.') 'as it was locked.')
.format(path=path, owner=lock.current_owner, .format(path=path))
pid=lock.current_pid))
with open(path, mode) as data_file: with open(path, mode) as data_file:
data_file.write(data) data_file.write(data)
except Exception as e: except Exception as e:
raise e raise e
finally: finally:
if ansible_lock:
ansible_lock.release()
if lock: if lock:
lock.release() lock.release()
@ -336,29 +350,48 @@ def check_arg(param, param_name, expected_type, none_ok=False, empty_ok=False):
class Lock(object): class Lock(object):
""" Object which represents an exclusive resource lock
def __init__(self, lockpath, owner='unknown owner'): flock usage is the default behavior but a separate pidfile mechanism
is also available. flock doesn't have the same orphaned lock issue
that pidfile usage does. both need to be tests on NFS. if flock
works then it seems better / less complicated for our needs.
"""
def __init__(self, lockpath, owner='unknown owner', use_flock=True):
self.lockpath = lockpath self.lockpath = lockpath
self.pid = str(os.getpid()) self.pid = str(os.getpid())
self.fd = None
self.owner = owner self.owner = owner
self.current_pid = -1 self.current_pid = -1
self.current_owner = '' self.current_owner = ''
self.use_flock = use_flock
def acquire(self): def acquire(self):
if not self.is_owned_by_me():
try: try:
fd = os.open(self.lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR) if self.use_flock:
with os.fdopen(fd, 'a') as f: return self._acquire_flock()
f.write(self.pid + '\n' + self.owner) else:
return self.is_owned_by_me() return self._acquire_pidfile()
except Exception as e: except Exception as e:
# it is ok to fail to acquire, we just return that we failed # it is ok to fail to acquire, we just return that we failed
LOG.debug('Exception in acquire lock. ' LOG.debug('Exception in acquire lock. '
'path: %s pid: %s owner: %s error: %s' % 'path: %s pid: %s owner: %s error: %s' %
(self.lockpath, self.pid, self.owner, str(e))) (self.lockpath, self.pid, self.owner, str(e)))
return False
def wait_acquire(self, wait_duration, interval=0.1): def _acquire_pidfile(self):
if not self.is_owned_by_me():
fd = os.open(self.lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR)
with os.fdopen(fd, 'a') as f:
f.write(self.pid + '\n' + self.owner)
return self.is_owned_by_me()
def _acquire_flock(self):
self.fd = os.open(self.lockpath, os.O_RDWR)
fcntl.flock(self.fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
def wait_acquire(self, wait_duration=3, interval=0.1):
wait_time = 0 wait_time = 0
while (wait_time < wait_duration): while (wait_time < wait_duration):
if not self.acquire(): if not self.acquire():
@ -371,10 +404,14 @@ class Lock(object):
def is_owned_by_me(self): def is_owned_by_me(self):
"""Returns True if we own the lock or False otherwise""" """Returns True if we own the lock or False otherwise"""
try: try:
if self.use_flock:
raise Exception(u._('Invalid use of is_owned_by_me while'
'using flock'))
if not os.path.exists(self.lockpath): if not os.path.exists(self.lockpath):
# lock doesn't exist, just return # lock doesn't exist, just return
return False return False
fd = os.open(self.lockpath, os.O_RDWR) fd = os.open(self.lockpath, os.O_RDONLY)
with os.fdopen(fd, 'r') as f: with os.fdopen(fd, 'r') as f:
contents = f.read(2048).strip().split('\n') contents = f.read(2048).strip().split('\n')
if len(contents) > 0: if len(contents) > 0:
@ -394,22 +431,31 @@ class Lock(object):
return False return False
def release(self): def release(self):
if self.is_owned_by_me(): try:
try: if self.use_flock:
os.remove(self.lockpath) self._release_flock()
return True else:
except Exception: self._release_pidfile()
# this really shouldn't happen unless for some reason except Exception:
# two areas in the same process try to release the lock # this really shouldn't happen unless for some reason
# at the same time and if that happens you want to see # two areas in the same process try to release the lock
# an error about it # at the same time and if that happens you want to see
LOG.error('Error releasing lock', exc_info=True) # an error about it
return False LOG.error('Error releasing lock', exc_info=True)
else:
return False return False
def _release_pidfile(self):
if self.is_owned_by_me():
os.remove(self.lockpath)
return True
class PidManager(): def _release_flock(self):
fcntl.flock(self.fd, fcntl.LOCK_UN)
os.close(self.fd)
return True
class PidManager(object):
@staticmethod @staticmethod
def get_child_pids(pid, child_pids=[]): def get_child_pids(pid, child_pids=[]):
"""get child pids of parent pid""" """get child pids of parent pid"""