Added support for sync on ansible playbook operations. Writes cannot occur

when ansible operations are happening.

Jira-Issue: OSTACKDEV-22
This commit is contained in:
Borne Mace 2016-04-07 17:40:48 -07:00
parent 2a9497fd09
commit b31c030ad1
4 changed files with 112 additions and 40 deletions

View File

@ -133,6 +133,13 @@ fi
%post
setfacl -m d:g:%{kolla_group}:rw %{_var}/log/kolla
if ! test -f %{_sysconfdir}/kolla/kollacli/ansible.lock
then
touch %{_sysconfdir}/kolla/kollacli/ansible.lock
chown %{kolla_user}:%{kolla_group} %{_sysconfdir}/kolla/kollacli/ansible.lock
chmod 0660 %{_sysconfdir}/kolla/kollacli/ansible.lock
fi
if ! test -f ~%{kolla_user}/.ssh/id_rsa
then
runuser -m -s /bin/bash -c \
@ -188,7 +195,10 @@ esac
%changelog
* Tue Apr 07 2016 - Steve Noyes <steve.noyes@oracle.com>
* Thu Apr 07 2016 - Borne Mace <borne.mace@oracle.com>
- added ansible.lock file to coordinate ansible synchronization
* Thu Apr 07 2016 - Steve Noyes <steve.noyes@oracle.com>
- rename passwd_editor.py to kolla_actions.py
* Tue Apr 05 2016 - Steve Noyes <steve.noyes@oracle.com>

View File

@ -88,11 +88,10 @@ class ClientApi(
backupCount=4)
except IOError as e:
# most likely the caller is not part of the kolla group
raise IOError(u._
(str(e) +
'\nPermission denied to run the kolla client.'
'\nPlease add user to the kolla group and '
'then log out and back in.'))
raise IOError(u._('Permission denied to run the kolla client.'
'\nPlease add user to the kolla group and '
'then log out and back in. {error}')
.format(error=str(e)))
formatter = logging.Formatter(LOG_FILE_MESSAGE_FORMAT)
rotate_handler.setFormatter(formatter)

View File

@ -27,6 +27,8 @@ from kollacli.common.utils import PidManager
from kollacli.common.utils import get_kolla_actions_path
from kollacli.common.utils import get_admin_uids
from kollacli.common.utils import get_admin_user
from kollacli.common.utils import get_ansible_lock_path
from kollacli.common.utils import Lock
from kollacli.common.utils import run_cmd
from kollacli.common.utils import safe_decode
@ -63,9 +65,19 @@ class AnsibleJob(object):
self._errors = []
self._cmd_output = ''
self._kill_uname = None
self._ansible_lock = Lock(get_ansible_lock_path(), 'ansible_job')
def run(self):
try:
locked = self._ansible_lock.wait_acquire()
if not locked:
raise Exception(
u._('unable to run ansible job {cmd} '
'as we couldn\'t get lock held by {owner}:{pid}.')
.format(cmd=self._command,
owner=self._ansible_lock.current_owner,
pid=self._ansible_lock.current_pid))
# create and open named pipe, must be owned by kolla group
os.mkfifo(self._fifo_path, 0o660)
_, grp_id = get_admin_uids()
@ -196,13 +208,18 @@ class AnsibleJob(object):
- close stdout and stderr
- close and delete named pipe (fifo)
"""
# try to clear the ansible lock
self._ansible_lock.release()
# delete temp inventory file
remove_temp_inventory(self._temp_inv_path)
# close the process's stdout and stderr streams
if self._process.stdout and not self._process.stdout.closed:
if (self._process and self._process.stdout and not
self._process.stdout.closed):
self._process.stdout.close()
if self._process.stderr and not self._process.stderr.closed:
if (self._process and self._process.stderr and not
self._process.stderr.closed):
self._process.stderr.close()
# close and delete the named pipe (fifo)

View File

@ -11,6 +11,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import fcntl
import grp
import logging
import os
@ -56,6 +57,10 @@ def get_kolla_log_dir():
return '/var/log/kolla/'
def get_ansible_lock_path():
return os.path.join(get_kollacli_etc(), 'ansible/ansible.lock')
def get_kolla_actions_path():
return os.path.join(get_kollacli_home(), 'tools', 'kolla_actions.py')
@ -238,14 +243,13 @@ def sync_read_file(path, mode='r'):
"""
lock = None
try:
lock = Lock(path + '.lock', 'sync_read')
locked = lock.wait_acquire(10)
lock = Lock(path, 'sync_read')
locked = lock.wait_acquire()
if not locked:
raise Exception(
u._('unable to read file {path} '
'as it was locked by {owner}:{pid}.')
.format(path=path, owner=lock.current_owner,
pid=lock.current_pid))
'as it was locked.')
.format(path=path))
with open(path, mode) as data_file:
data = data_file.read()
except Exception as e:
@ -258,21 +262,31 @@ def sync_read_file(path, mode='r'):
def sync_write_file(path, data, mode='w'):
"""synchronously write file"""
ansible_lock = None
lock = None
try:
lock = Lock(path + '.lock', 'sync_write')
locked = lock.wait_acquire(10)
ansible_lock = Lock(get_ansible_lock_path(), 'sync_write')
locked = ansible_lock.wait_acquire()
if not locked:
raise Exception(
u._('unable to get ansible lock while writing to {path} '
'as it was locked.')
.format(path=path))
lock = Lock(path, 'sync_write')
locked = lock.wait_acquire()
if not locked:
raise Exception(
u._('unable to write file {path} '
'as it was locked by {owner}:{pid}.')
.format(path=path, owner=lock.current_owner,
pid=lock.current_pid))
'as it was locked.')
.format(path=path))
with open(path, mode) as data_file:
data_file.write(data)
except Exception as e:
raise e
finally:
if ansible_lock:
ansible_lock.release()
if lock:
lock.release()
@ -336,29 +350,48 @@ def check_arg(param, param_name, expected_type, none_ok=False, empty_ok=False):
class Lock(object):
""" Object which represents an exclusive resource lock
def __init__(self, lockpath, owner='unknown owner'):
flock usage is the default behavior but a separate pidfile mechanism
is also available. flock doesn't have the same orphaned lock issue
that pidfile usage does. both need to be tests on NFS. if flock
works then it seems better / less complicated for our needs.
"""
def __init__(self, lockpath, owner='unknown owner', use_flock=True):
self.lockpath = lockpath
self.pid = str(os.getpid())
self.fd = None
self.owner = owner
self.current_pid = -1
self.current_owner = ''
self.use_flock = use_flock
def acquire(self):
if not self.is_owned_by_me():
try:
fd = os.open(self.lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR)
with os.fdopen(fd, 'a') as f:
f.write(self.pid + '\n' + self.owner)
return self.is_owned_by_me()
if self.use_flock:
return self._acquire_flock()
else:
return self._acquire_pidfile()
except Exception as e:
# it is ok to fail to acquire, we just return that we failed
LOG.debug('Exception in acquire lock. '
'path: %s pid: %s owner: %s error: %s' %
(self.lockpath, self.pid, self.owner, str(e)))
return False
def wait_acquire(self, wait_duration, interval=0.1):
def _acquire_pidfile(self):
if not self.is_owned_by_me():
fd = os.open(self.lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR)
with os.fdopen(fd, 'a') as f:
f.write(self.pid + '\n' + self.owner)
return self.is_owned_by_me()
def _acquire_flock(self):
self.fd = os.open(self.lockpath, os.O_RDWR)
fcntl.flock(self.fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
def wait_acquire(self, wait_duration=3, interval=0.1):
wait_time = 0
while (wait_time < wait_duration):
if not self.acquire():
@ -371,10 +404,14 @@ class Lock(object):
def is_owned_by_me(self):
"""Returns True if we own the lock or False otherwise"""
try:
if self.use_flock:
raise Exception(u._('Invalid use of is_owned_by_me while'
'using flock'))
if not os.path.exists(self.lockpath):
# lock doesn't exist, just return
return False
fd = os.open(self.lockpath, os.O_RDWR)
fd = os.open(self.lockpath, os.O_RDONLY)
with os.fdopen(fd, 'r') as f:
contents = f.read(2048).strip().split('\n')
if len(contents) > 0:
@ -394,22 +431,31 @@ class Lock(object):
return False
def release(self):
if self.is_owned_by_me():
try:
os.remove(self.lockpath)
return True
except Exception:
# this really shouldn't happen unless for some reason
# two areas in the same process try to release the lock
# at the same time and if that happens you want to see
# an error about it
LOG.error('Error releasing lock', exc_info=True)
return False
else:
try:
if self.use_flock:
self._release_flock()
else:
self._release_pidfile()
except Exception:
# this really shouldn't happen unless for some reason
# two areas in the same process try to release the lock
# at the same time and if that happens you want to see
# an error about it
LOG.error('Error releasing lock', exc_info=True)
return False
def _release_pidfile(self):
if self.is_owned_by_me():
os.remove(self.lockpath)
return True
class PidManager():
def _release_flock(self):
fcntl.flock(self.fd, fcntl.LOCK_UN)
os.close(self.fd)
return True
class PidManager(object):
@staticmethod
def get_child_pids(pid, child_pids=[]):
"""get child pids of parent pid"""