Added support for sync on ansible playbook operations. Writes cannot occur
when ansible operations are happening. Jira-Issue: OSTACKDEV-22
This commit is contained in:
parent
2a9497fd09
commit
b31c030ad1
|
@ -133,6 +133,13 @@ fi
|
||||||
%post
|
%post
|
||||||
setfacl -m d:g:%{kolla_group}:rw %{_var}/log/kolla
|
setfacl -m d:g:%{kolla_group}:rw %{_var}/log/kolla
|
||||||
|
|
||||||
|
if ! test -f %{_sysconfdir}/kolla/kollacli/ansible.lock
|
||||||
|
then
|
||||||
|
touch %{_sysconfdir}/kolla/kollacli/ansible.lock
|
||||||
|
chown %{kolla_user}:%{kolla_group} %{_sysconfdir}/kolla/kollacli/ansible.lock
|
||||||
|
chmod 0660 %{_sysconfdir}/kolla/kollacli/ansible.lock
|
||||||
|
fi
|
||||||
|
|
||||||
if ! test -f ~%{kolla_user}/.ssh/id_rsa
|
if ! test -f ~%{kolla_user}/.ssh/id_rsa
|
||||||
then
|
then
|
||||||
runuser -m -s /bin/bash -c \
|
runuser -m -s /bin/bash -c \
|
||||||
|
@ -188,7 +195,10 @@ esac
|
||||||
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Tue Apr 07 2016 - Steve Noyes <steve.noyes@oracle.com>
|
* Thu Apr 07 2016 - Borne Mace <borne.mace@oracle.com>
|
||||||
|
- added ansible.lock file to coordinate ansible synchronization
|
||||||
|
|
||||||
|
* Thu Apr 07 2016 - Steve Noyes <steve.noyes@oracle.com>
|
||||||
- rename passwd_editor.py to kolla_actions.py
|
- rename passwd_editor.py to kolla_actions.py
|
||||||
|
|
||||||
* Tue Apr 05 2016 - Steve Noyes <steve.noyes@oracle.com>
|
* Tue Apr 05 2016 - Steve Noyes <steve.noyes@oracle.com>
|
||||||
|
|
|
@ -88,11 +88,10 @@ class ClientApi(
|
||||||
backupCount=4)
|
backupCount=4)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
# most likely the caller is not part of the kolla group
|
# most likely the caller is not part of the kolla group
|
||||||
raise IOError(u._
|
raise IOError(u._('Permission denied to run the kolla client.'
|
||||||
(str(e) +
|
'\nPlease add user to the kolla group and '
|
||||||
'\nPermission denied to run the kolla client.'
|
'then log out and back in. {error}')
|
||||||
'\nPlease add user to the kolla group and '
|
.format(error=str(e)))
|
||||||
'then log out and back in.'))
|
|
||||||
|
|
||||||
formatter = logging.Formatter(LOG_FILE_MESSAGE_FORMAT)
|
formatter = logging.Formatter(LOG_FILE_MESSAGE_FORMAT)
|
||||||
rotate_handler.setFormatter(formatter)
|
rotate_handler.setFormatter(formatter)
|
||||||
|
|
|
@ -27,6 +27,8 @@ from kollacli.common.utils import PidManager
|
||||||
from kollacli.common.utils import get_kolla_actions_path
|
from kollacli.common.utils import get_kolla_actions_path
|
||||||
from kollacli.common.utils import get_admin_uids
|
from kollacli.common.utils import get_admin_uids
|
||||||
from kollacli.common.utils import get_admin_user
|
from kollacli.common.utils import get_admin_user
|
||||||
|
from kollacli.common.utils import get_ansible_lock_path
|
||||||
|
from kollacli.common.utils import Lock
|
||||||
from kollacli.common.utils import run_cmd
|
from kollacli.common.utils import run_cmd
|
||||||
from kollacli.common.utils import safe_decode
|
from kollacli.common.utils import safe_decode
|
||||||
|
|
||||||
|
@ -63,9 +65,19 @@ class AnsibleJob(object):
|
||||||
self._errors = []
|
self._errors = []
|
||||||
self._cmd_output = ''
|
self._cmd_output = ''
|
||||||
self._kill_uname = None
|
self._kill_uname = None
|
||||||
|
self._ansible_lock = Lock(get_ansible_lock_path(), 'ansible_job')
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
try:
|
||||||
|
locked = self._ansible_lock.wait_acquire()
|
||||||
|
if not locked:
|
||||||
|
raise Exception(
|
||||||
|
u._('unable to run ansible job {cmd} '
|
||||||
|
'as we couldn\'t get lock held by {owner}:{pid}.')
|
||||||
|
.format(cmd=self._command,
|
||||||
|
owner=self._ansible_lock.current_owner,
|
||||||
|
pid=self._ansible_lock.current_pid))
|
||||||
|
|
||||||
# create and open named pipe, must be owned by kolla group
|
# create and open named pipe, must be owned by kolla group
|
||||||
os.mkfifo(self._fifo_path, 0o660)
|
os.mkfifo(self._fifo_path, 0o660)
|
||||||
_, grp_id = get_admin_uids()
|
_, grp_id = get_admin_uids()
|
||||||
|
@ -196,13 +208,18 @@ class AnsibleJob(object):
|
||||||
- close stdout and stderr
|
- close stdout and stderr
|
||||||
- close and delete named pipe (fifo)
|
- close and delete named pipe (fifo)
|
||||||
"""
|
"""
|
||||||
|
# try to clear the ansible lock
|
||||||
|
self._ansible_lock.release()
|
||||||
|
|
||||||
# delete temp inventory file
|
# delete temp inventory file
|
||||||
remove_temp_inventory(self._temp_inv_path)
|
remove_temp_inventory(self._temp_inv_path)
|
||||||
|
|
||||||
# close the process's stdout and stderr streams
|
# close the process's stdout and stderr streams
|
||||||
if self._process.stdout and not self._process.stdout.closed:
|
if (self._process and self._process.stdout and not
|
||||||
|
self._process.stdout.closed):
|
||||||
self._process.stdout.close()
|
self._process.stdout.close()
|
||||||
if self._process.stderr and not self._process.stderr.closed:
|
if (self._process and self._process.stderr and not
|
||||||
|
self._process.stderr.closed):
|
||||||
self._process.stderr.close()
|
self._process.stderr.close()
|
||||||
|
|
||||||
# close and delete the named pipe (fifo)
|
# close and delete the named pipe (fifo)
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
import fcntl
|
||||||
import grp
|
import grp
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
@ -56,6 +57,10 @@ def get_kolla_log_dir():
|
||||||
return '/var/log/kolla/'
|
return '/var/log/kolla/'
|
||||||
|
|
||||||
|
|
||||||
|
def get_ansible_lock_path():
|
||||||
|
return os.path.join(get_kollacli_etc(), 'ansible/ansible.lock')
|
||||||
|
|
||||||
|
|
||||||
def get_kolla_actions_path():
|
def get_kolla_actions_path():
|
||||||
return os.path.join(get_kollacli_home(), 'tools', 'kolla_actions.py')
|
return os.path.join(get_kollacli_home(), 'tools', 'kolla_actions.py')
|
||||||
|
|
||||||
|
@ -238,14 +243,13 @@ def sync_read_file(path, mode='r'):
|
||||||
"""
|
"""
|
||||||
lock = None
|
lock = None
|
||||||
try:
|
try:
|
||||||
lock = Lock(path + '.lock', 'sync_read')
|
lock = Lock(path, 'sync_read')
|
||||||
locked = lock.wait_acquire(10)
|
locked = lock.wait_acquire()
|
||||||
if not locked:
|
if not locked:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
u._('unable to read file {path} '
|
u._('unable to read file {path} '
|
||||||
'as it was locked by {owner}:{pid}.')
|
'as it was locked.')
|
||||||
.format(path=path, owner=lock.current_owner,
|
.format(path=path))
|
||||||
pid=lock.current_pid))
|
|
||||||
with open(path, mode) as data_file:
|
with open(path, mode) as data_file:
|
||||||
data = data_file.read()
|
data = data_file.read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -258,21 +262,31 @@ def sync_read_file(path, mode='r'):
|
||||||
|
|
||||||
def sync_write_file(path, data, mode='w'):
|
def sync_write_file(path, data, mode='w'):
|
||||||
"""synchronously write file"""
|
"""synchronously write file"""
|
||||||
|
ansible_lock = None
|
||||||
lock = None
|
lock = None
|
||||||
try:
|
try:
|
||||||
lock = Lock(path + '.lock', 'sync_write')
|
ansible_lock = Lock(get_ansible_lock_path(), 'sync_write')
|
||||||
locked = lock.wait_acquire(10)
|
locked = ansible_lock.wait_acquire()
|
||||||
|
if not locked:
|
||||||
|
raise Exception(
|
||||||
|
u._('unable to get ansible lock while writing to {path} '
|
||||||
|
'as it was locked.')
|
||||||
|
.format(path=path))
|
||||||
|
|
||||||
|
lock = Lock(path, 'sync_write')
|
||||||
|
locked = lock.wait_acquire()
|
||||||
if not locked:
|
if not locked:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
u._('unable to write file {path} '
|
u._('unable to write file {path} '
|
||||||
'as it was locked by {owner}:{pid}.')
|
'as it was locked.')
|
||||||
.format(path=path, owner=lock.current_owner,
|
.format(path=path))
|
||||||
pid=lock.current_pid))
|
|
||||||
with open(path, mode) as data_file:
|
with open(path, mode) as data_file:
|
||||||
data_file.write(data)
|
data_file.write(data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
finally:
|
finally:
|
||||||
|
if ansible_lock:
|
||||||
|
ansible_lock.release()
|
||||||
if lock:
|
if lock:
|
||||||
lock.release()
|
lock.release()
|
||||||
|
|
||||||
|
@ -336,29 +350,48 @@ def check_arg(param, param_name, expected_type, none_ok=False, empty_ok=False):
|
||||||
|
|
||||||
|
|
||||||
class Lock(object):
|
class Lock(object):
|
||||||
|
""" Object which represents an exclusive resource lock
|
||||||
|
|
||||||
def __init__(self, lockpath, owner='unknown owner'):
|
flock usage is the default behavior but a separate pidfile mechanism
|
||||||
|
is also available. flock doesn't have the same orphaned lock issue
|
||||||
|
that pidfile usage does. both need to be tests on NFS. if flock
|
||||||
|
works then it seems better / less complicated for our needs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, lockpath, owner='unknown owner', use_flock=True):
|
||||||
self.lockpath = lockpath
|
self.lockpath = lockpath
|
||||||
self.pid = str(os.getpid())
|
self.pid = str(os.getpid())
|
||||||
|
self.fd = None
|
||||||
self.owner = owner
|
self.owner = owner
|
||||||
self.current_pid = -1
|
self.current_pid = -1
|
||||||
self.current_owner = ''
|
self.current_owner = ''
|
||||||
|
self.use_flock = use_flock
|
||||||
|
|
||||||
def acquire(self):
|
def acquire(self):
|
||||||
if not self.is_owned_by_me():
|
|
||||||
try:
|
try:
|
||||||
fd = os.open(self.lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR)
|
if self.use_flock:
|
||||||
with os.fdopen(fd, 'a') as f:
|
return self._acquire_flock()
|
||||||
f.write(self.pid + '\n' + self.owner)
|
else:
|
||||||
return self.is_owned_by_me()
|
return self._acquire_pidfile()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# it is ok to fail to acquire, we just return that we failed
|
# it is ok to fail to acquire, we just return that we failed
|
||||||
LOG.debug('Exception in acquire lock. '
|
LOG.debug('Exception in acquire lock. '
|
||||||
'path: %s pid: %s owner: %s error: %s' %
|
'path: %s pid: %s owner: %s error: %s' %
|
||||||
(self.lockpath, self.pid, self.owner, str(e)))
|
(self.lockpath, self.pid, self.owner, str(e)))
|
||||||
return False
|
|
||||||
|
|
||||||
def wait_acquire(self, wait_duration, interval=0.1):
|
def _acquire_pidfile(self):
|
||||||
|
if not self.is_owned_by_me():
|
||||||
|
fd = os.open(self.lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR)
|
||||||
|
with os.fdopen(fd, 'a') as f:
|
||||||
|
f.write(self.pid + '\n' + self.owner)
|
||||||
|
return self.is_owned_by_me()
|
||||||
|
|
||||||
|
def _acquire_flock(self):
|
||||||
|
self.fd = os.open(self.lockpath, os.O_RDWR)
|
||||||
|
fcntl.flock(self.fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def wait_acquire(self, wait_duration=3, interval=0.1):
|
||||||
wait_time = 0
|
wait_time = 0
|
||||||
while (wait_time < wait_duration):
|
while (wait_time < wait_duration):
|
||||||
if not self.acquire():
|
if not self.acquire():
|
||||||
|
@ -371,10 +404,14 @@ class Lock(object):
|
||||||
def is_owned_by_me(self):
|
def is_owned_by_me(self):
|
||||||
"""Returns True if we own the lock or False otherwise"""
|
"""Returns True if we own the lock or False otherwise"""
|
||||||
try:
|
try:
|
||||||
|
if self.use_flock:
|
||||||
|
raise Exception(u._('Invalid use of is_owned_by_me while'
|
||||||
|
'using flock'))
|
||||||
|
|
||||||
if not os.path.exists(self.lockpath):
|
if not os.path.exists(self.lockpath):
|
||||||
# lock doesn't exist, just return
|
# lock doesn't exist, just return
|
||||||
return False
|
return False
|
||||||
fd = os.open(self.lockpath, os.O_RDWR)
|
fd = os.open(self.lockpath, os.O_RDONLY)
|
||||||
with os.fdopen(fd, 'r') as f:
|
with os.fdopen(fd, 'r') as f:
|
||||||
contents = f.read(2048).strip().split('\n')
|
contents = f.read(2048).strip().split('\n')
|
||||||
if len(contents) > 0:
|
if len(contents) > 0:
|
||||||
|
@ -394,22 +431,31 @@ class Lock(object):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def release(self):
|
def release(self):
|
||||||
if self.is_owned_by_me():
|
try:
|
||||||
try:
|
if self.use_flock:
|
||||||
os.remove(self.lockpath)
|
self._release_flock()
|
||||||
return True
|
else:
|
||||||
except Exception:
|
self._release_pidfile()
|
||||||
# this really shouldn't happen unless for some reason
|
except Exception:
|
||||||
# two areas in the same process try to release the lock
|
# this really shouldn't happen unless for some reason
|
||||||
# at the same time and if that happens you want to see
|
# two areas in the same process try to release the lock
|
||||||
# an error about it
|
# at the same time and if that happens you want to see
|
||||||
LOG.error('Error releasing lock', exc_info=True)
|
# an error about it
|
||||||
return False
|
LOG.error('Error releasing lock', exc_info=True)
|
||||||
else:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _release_pidfile(self):
|
||||||
|
if self.is_owned_by_me():
|
||||||
|
os.remove(self.lockpath)
|
||||||
|
return True
|
||||||
|
|
||||||
class PidManager():
|
def _release_flock(self):
|
||||||
|
fcntl.flock(self.fd, fcntl.LOCK_UN)
|
||||||
|
os.close(self.fd)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class PidManager(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_child_pids(pid, child_pids=[]):
|
def get_child_pids(pid, child_pids=[]):
|
||||||
"""get child pids of parent pid"""
|
"""get child pids of parent pid"""
|
||||||
|
|
Loading…
Reference in New Issue