add timeout option to deploy

- add timeout option (in minutes) to deploy
- add utest for it
- add some logic to ensure ansible.lock is closed
- run cleanup after kill

Jira-Issue: OPENSTACK-465
This commit is contained in:
Steve Noyes 2016-06-02 16:43:31 -04:00
parent 0d1776114d
commit 05e0161ca2
5 changed files with 52 additions and 18 deletions

View File

@ -12,6 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import logging
import time
import traceback
import kollacli.i18n as u
@ -34,22 +35,42 @@ class Deploy(Command):
help=u._('Deployment host list'))
parser.add_argument('--serial', action='store_true',
help=u._('Deploy serially'))
parser.add_argument('--timeout', nargs=1,
metavar='<timeout>',
help=u._('timeout (in minutes)'))
return parser
def take_action(self, parsed_args):
hosts = None
serial_flag = False
verbose_level = self.app.options.verbose_level
timeout_target = 0
try:
if parsed_args.hosts:
host_list = parsed_args.hosts.strip()
hosts = host_list.split(',')
if parsed_args.serial:
serial_flag = True
if parsed_args.timeout:
try:
timeout = float(parsed_args.timeout[0])
except Exception:
raise CommandError(u._('Timeout value is not a number.'))
timeout_target = time.time() + (60 * timeout)
job = CLIENT.async_deploy(hosts, serial_flag,
verbose_level)
status = job.wait()
# wait for job to complete
status = None
while status is None:
if timeout_target and time.time() > timeout_target:
job.kill()
raise CommandError(u._('Job timed out and was killed.'))
time.sleep(1)
status = job.get_status()
# job is done
if verbose_level > 2:
LOG.info('\n\n' + 80 * '=')
LOG.info(u._('DEBUG command output:\n{out}')

View File

@ -174,22 +174,27 @@ class AnsibleJob(object):
"""
# the kill must be run as the kolla user so the
# kolla_actions program must be used.
actions_path = get_kolla_actions_path()
kolla_user = get_admin_user()
cmd_prefix = ('/usr/bin/sudo -u %s %s job -t -p '
% (kolla_user, actions_path))
try:
actions_path = get_kolla_actions_path()
kolla_user = get_admin_user()
cmd_prefix = ('/usr/bin/sudo -u %s %s job -t -p '
% (kolla_user, actions_path))
# kill the children from largest to smallest pids.
child_pids = PidManager.get_child_pids(self._process.pid)
for child_pid in sorted(child_pids, reverse=True):
cmd = ''.join([cmd_prefix, child_pid])
err_msg, output = run_cmd(cmd, print_output=False)
if err_msg:
LOG.debug('kill failed: %s %s' % (err_msg, output))
# kill the children from largest to smallest pids.
child_pids = PidManager.get_child_pids(self._process.pid)
for child_pid in sorted(child_pids, reverse=True):
cmd = ''.join([cmd_prefix, child_pid])
err_msg, output = run_cmd(cmd, print_output=False)
if err_msg:
LOG.debug('kill failed: %s %s' % (err_msg, output))
else:
LOG.debug('kill succeeded: %s' % child_pid)
# record the name of user who killed the job
cur_uid = os.getuid()
self._kill_uname = pwd.getpwuid(cur_uid)[0]
# record the name of user who killed the job
cur_uid = os.getuid()
self._kill_uname = pwd.getpwuid(cur_uid)[0]
finally:
self._cleanup()
def _get_msg_from_cmdout(self, msg):
"""get message from command output

View File

@ -483,8 +483,12 @@ class Lock(object):
return True
def _release_flock(self):
fcntl.flock(self.fd, fcntl.LOCK_UN)
os.close(self.fd)
try:
fcntl.flock(self.fd, fcntl.LOCK_UN)
except Exception as e:
LOG.debug('Exception while releasing lock: %s' % str(e))
finally:
os.close(self.fd)
return True

View File

@ -142,6 +142,10 @@ class TestFunctional(KollaCliTest):
self.run_cli_cmd('deploy')
self.run_cli_cmd('deploy --serial -v')
# test deploy with timeout
msg = self.run_cli_cmd('deploy --timeout .001', expect_error=True)
self.assertIn('timed out', msg)
# run compute host deploy to invalid host
err_msg = 'Status: unreachable'
msg = ''

View File

@ -115,7 +115,7 @@ class TestFunctional(KollaCliTest):
# test killing a deploy
self.log.info('Kill a deployment')
job = CLIENT.async_deploy()
time.sleep(random.randint(1, 5))
time.sleep(random.randint(5, 8))
job.kill()
self._process_job(job, 'deploy-kill',
is_physical_host, expect_kill=True)