Fix misused deployment vs connection timeouts

Fix misused ansible connection timeout and deployment timeout passed in
config download and ansible runner utility.

Allow ansible runner utility to be given a job_timeout as well.

Also fix the misuse of timeout parameters in related worklows. Add
--overcloud-ssh-port-timeout and use it to configure ansible connection
timeout for the DeleteNode interace of the involved
workflows. Then use the timeout parameter as real timeout instead of
mistakingly passing it as a connection timeout.

Add new unit test for ansible timeout in config_download. Add missing
coverage for the existing timeout-related params in other unit tests.

Closes-Bug: #1868063
Co-authored-by: Kevin Carter <kevin@cloudnull.com>
Change-Id: I2a4d151bcb83074af5bcf7d1b8c68d81d3c0400d
Signed-off-by: Bogdan Dobrelya <bdobreli@redhat.com>
changes/07/713807/13
Bogdan Dobrelya 3 years ago
parent d9174e83fd
commit 9c602da452
  1. 11
      releasenotes/notes/ansible_connection_timeout-78e45f20ff91f672.yaml
  2. 7
      tripleoclient/tests/fixture_data/deployment.py
  3. 20
      tripleoclient/tests/test_utils.py
  4. 81
      tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py
  5. 9
      tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py
  6. 2
      tripleoclient/tests/v1/test_overcloud_plan.py
  7. 30
      tripleoclient/utils.py
  8. 14
      tripleoclient/v1/overcloud_deploy.py
  9. 7
      tripleoclient/v1/overcloud_node.py
  10. 26
      tripleoclient/workflows/deployment.py
  11. 13
      tripleoclient/workflows/scale.py

@ -0,0 +1,11 @@
---
features:
- |
`openstack overcloud node delete` now can take
`--overcloud-ssh-port-timeout` to configure connection timeout
for Ansible. While `--timeout` configures the command timeout as expected.
fixes:
- |
Ansible connection timeout used for config download and the deployment
timeout now will be given proper values. It fixes `bug 1868063
<https://bugs.launchpad.net/tripleo/+bug/1868063>`__.

@ -54,6 +54,10 @@ class UtilsOvercloudFixture(fixtures.Fixture):
self.mock_create_ocrc = self.useFixture(fixtures.MockPatch(
'tripleoclient.utils.write_overcloudrc')
).mock
self.mock_update_deployment_status = self.useFixture(
fixtures.MockPatch(
'tripleoclient.utils.update_deployment_status')
).mock
class UtilsFixture(fixtures.Fixture):
@ -70,3 +74,6 @@ class UtilsFixture(fixtures.Fixture):
self.mock_write_overcloudrc = self.useFixture(fixtures.MockPatch(
'tripleoclient.utils.write_overcloudrc')
).mock
self.mock_run_ansible_playbook = self.useFixture(fixtures.MockPatch(
'tripleoclient.utils.run_ansible_playbook')
).mock

@ -14,6 +14,7 @@
#
import ansible_runner
import argparse
import datetime
import logging
@ -195,6 +196,25 @@ class TestRunAnsiblePlaybook(TestCase):
)
self.assertEqual(retcode, 0)
@mock.patch('six.moves.builtins.open')
@mock.patch('tripleoclient.utils.makedirs')
@mock.patch('os.path.exists', side_effect=(False, True, True))
def test_run_with_timeout(self, mock_exists, mock_mkdir, mock_open):
ansible_runner.ArtifactLoader = mock.MagicMock()
ansible_runner.Runner.run = mock.MagicMock(return_value=('', 0))
ansible_runner.runner_config = mock.MagicMock()
retcode, output = utils.run_ansible_playbook(
playbook='existing.yaml',
inventory='localhost,',
workdir='/tmp',
timeout=42
)
self.assertIn(mock.call('/tmp/env/settings', 'w'),
mock_open.mock_calls)
self.assertIn(
mock.call().__enter__().write('job_timeout: 2520\n'), # 42m * 60
mock_open.mock_calls)
class TestRunCommandAndLog(TestCase):
def setUp(self):

@ -1543,9 +1543,13 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud):
@mock.patch('tripleoclient.workflows.deployment.create_overcloudrc',
autospec=True)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_deploy_tripleo_heat_templates_tmpdir', autospec=True)
'_heat_deploy', autospec=True)
@mock.patch('tripleoclient.utils.check_stack_network_matches_env_files')
@mock.patch('heatclient.common.template_utils.deep_update', autospec=True)
@mock.patch('tripleoclient.workflows.plan_management.'
'create_plan_from_templates', autospec=True)
def test_config_download_timeout(
self, mock_deploy_tmpdir,
self, mock_plan_man, mock_hc, mock_stack_network_check, mock_hd,
mock_overcloudrc, mock_get_undercloud_host_entry, mock_copy):
fixture = deployment.DeploymentWorkflowFixture()
self.useFixture(fixture)
@ -1555,17 +1559,84 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud):
orchestration_client = clients.orchestration
orchestration_client.stacks.get.return_value = fakes.create_tht_stack()
arglist = ['--templates', '--config-download-timeout', '240']
arglist = ['--templates', '--overcloud-ssh-port-timeout', '42',
'--timeout', '451']
verifylist = [
('templates', '/usr/share/openstack-tripleo-heat-templates/'),
('config_download_timeout', 240),
('overcloud_ssh_port_timeout', 42), ('timeout', 451)
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
# assuming heat deploy consumed a 3m out of total 451m timeout
with mock.patch('time.time', side_effect=[0, 1585820346, 1585820526]):
self.cmd.take_action(parsed_args)
self.assertIn(
[mock.call(mock.ANY, mock.ANY, 'overcloud', mock.ANY,
{'DeployIdentifier': '', 'UpdateIdentifier': '',
'StackAction': 'UPDATE', 'UndercloudHostsEntries':
['192.168.0.1 uc.ctlplane.localhost uc.ctlplane']}, {},
451, mock.ANY, {}, False, False, False, None,
deployment_options={})],
mock_hd.mock_calls)
self.assertIn(
[mock.call(mock.ANY, mock.ANY, mock.ANY, 'ctlplane', None, None,
deployment_options={},
deployment_timeout=448, # 451 - 3, total time left
in_flight_validations=False, timeout=42, verbosity=3)],
fixture.mock_config_download.mock_calls)
fixture.mock_config_download.assert_called()
mock_copy.assert_called_once()
@mock.patch('tripleoclient.utils.copy_clouds_yaml')
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_update_parameters')
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_get_undercloud_host_entry', autospec=True,
return_value='192.168.0.1 uc.ctlplane.localhost uc.ctlplane')
@mock.patch('tripleoclient.workflows.deployment.create_overcloudrc',
autospec=True)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_deploy_tripleo_heat_templates_tmpdir', autospec=True)
def test_config_download_only_timeout(
self, mock_deploy_tmpdir,
mock_overcloudrc, mock_get_undercloud_host_entry, mock_update,
mock_copy):
utils_fixture = deployment.UtilsOvercloudFixture()
self.useFixture(utils_fixture)
utils_fixture2 = deployment.UtilsFixture()
self.useFixture(utils_fixture2)
clients = self.app.client_manager
stack = mock.Mock()
stack.stack_name = 'overcloud'
stack.output_show.return_value = {'output': {'output_value': []}}
orchestration_client = clients.orchestration
orchestration_client.stacks.get.return_value = stack
arglist = ['--templates', '--config-download-only',
'--overcloud-ssh-port-timeout', '42',
'--config-download-timeout', '240']
verifylist = [
('templates', '/usr/share/openstack-tripleo-heat-templates/'),
('config_download_only', True),
('config_download_timeout', 240),
('overcloud_ssh_port_timeout', 42)
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
playbook = '/var/lib/mistral/overcloud/deploy_steps_playbook.yaml'
self.assertIn(
[mock.call(
ansible_cfg=None, ansible_timeout=42,
extra_env_variables={'ANSIBLE_BECOME': True}, extra_vars=None,
inventory=mock.ANY, key=mock.ANY, limit_hosts=None,
playbook=playbook, playbook_dir=mock.ANY,
reproduce_command=True, skip_tags='opendev-validation',
ssh_user='tripleo-admin', tags=None,
timeout=240,
verbosity=3, workdir=mock.ANY)],
utils_fixture2.mock_run_ansible_playbook.mock_calls)
def test_download_missing_files_from_plan(self):
# Restore the real function so we don't accidentally call the mock
self.cmd._download_missing_files_from_plan = self.real_download_missing

@ -185,9 +185,11 @@ class TestDeleteNode(fakes.TestDeleteNode):
inp.flush()
argslist = ['--baremetal-deployment', inp.name, '--templates',
'--stack', 'overcast', '--timeout', '90', '--yes']
'--stack', 'overcast', '--overcloud-ssh-port-timeout',
'42', '--timeout', '90', '--yes']
verifylist = [
('stack', 'overcast'),
('overcloud_ssh_port_timeout', 42),
('baremetal_deployment', inp.name)
]
parsed_args = self.check_parser(self.cmd, argslist, verifylist)
@ -245,11 +247,12 @@ class TestDeleteNode(fakes.TestDeleteNode):
ssh_user='tripleo-admin',
key=mock.ANY,
limit_hosts='overcast-controller-1:overcast-compute-0',
ansible_timeout=90,
ansible_timeout=42,
reproduce_command=True,
extra_env_variables={'ANSIBLE_BECOME': True},
extra_vars=None,
tags=None
tags=None,
timeout=90
),
mock.call(
inventory='localhost,',

@ -287,11 +287,11 @@ class TestOvercloudDeployPlan(utils.TestCommand):
'undercloud,',
mock.ANY,
constants.ANSIBLE_TRIPLEO_PLAYBOOKS,
timeout=240,
extra_vars={
"container": "overcast",
"run_validations": True,
"skip_deploy_identifier": False,
"ansible_timeout": 240
},
verbosity=3,
)

@ -231,7 +231,7 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
extra_env_variables=None, parallel_run=False,
callback_whitelist=None, ansible_cfg=None,
ansible_timeout=30, reproduce_command=False,
fail_on_rc=True):
fail_on_rc=True, timeout=None):
"""Simple wrapper for ansible-playbook.
:param playbook: Playbook filename.
@ -320,6 +320,9 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
return code from the playbook execution results in a
non 0 exit code. The default is True.
:type fail_on_rc: Boolean
:param timeout: Timeout for ansible to finish playbook execution (minutes).
:type timeout: int
"""
def _playbook_check(play):
@ -347,9 +350,30 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
'hosts'
)
def _running_ansible_msg(playbook, timeout=None):
if timeout and timeout > 0:
return ('Running Ansible playbook with timeout %sm: %s,' %
(timeout, playbook))
else:
return ('Running Ansible playbook: %s,' % playbook)
if not playbook_dir:
playbook_dir = workdir
if timeout and timeout > 0:
makedirs(os.path.join(workdir, 'env'))
settings_file = os.path.join(workdir, 'env/settings')
timeout_value = timeout * 60
if os.path.exists(settings_file):
with open(os.path.join(workdir, 'env/settings'), 'r') as f:
settings_object = yaml.safe_load(f.read())
settings_object['job_timeout'] = timeout_value
else:
settings_object = {'job_timeout': timeout_value}
with open(os.path.join(workdir, 'env/settings'), 'w') as f:
f.write(yaml.safe_dump(settings_object, default_flow_style=False))
if isinstance(playbook, (list, set)):
verified_playbooks = [_playbook_check(play=i) for i in playbook]
playbook = os.path.join(workdir, 'tripleo-multi-playbook.yaml')
@ -362,7 +386,7 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
)
LOG.info(
'Running Ansible playbook: {},'
_running_ansible_msg(playbook, timeout) +
' multi-playbook execution: {}'
' Working directory: {},'
' Playbook directory: {}'.format(
@ -375,7 +399,7 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
else:
playbook = _playbook_check(play=playbook)
LOG.info(
'Running Ansible playbook: {},'
_running_ansible_msg(playbook, timeout) +
' Working directory: {},'
' Playbook directory: {}'.format(
playbook,

@ -1023,10 +1023,13 @@ class DeployOvercloud(command.Command):
)
if parsed_args.config_download_timeout:
timeout = parsed_args.config_download_timeout * 60
timeout = parsed_args.config_download_timeout
else:
used = int(time.time() - start)
timeout = (parsed_args.timeout * 60) - used
used = int((time.time() - start) // 60)
timeout = parsed_args.timeout - used
if timeout <= 0:
raise exceptions.DeploymentError(
'Deployment timed out after %sm' % used)
deployment_options = {}
if parsed_args.deployment_python_interpreter:
@ -1040,10 +1043,11 @@ class DeployOvercloud(command.Command):
parsed_args.overcloud_ssh_network,
parsed_args.output_dir,
parsed_args.override_ansible_cfg,
timeout,
timeout=parsed_args.overcloud_ssh_port_timeout,
verbosity=utils.playbook_verbosity(self=self),
deployment_options=deployment_options,
in_flight_validations=parsed_args.inflight
in_flight_validations=parsed_args.inflight,
deployment_timeout=timeout
)
except Exception:
deployment.set_deployment_status(

@ -81,6 +81,12 @@ class DeleteNode(command.Command):
"Keep in mind that due to keystone session duration "
"that timeout has an upper bound of 4 hours ")
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
parser.add_argument('-y', '--yes',
help=_('Skip yes/no prompt (assume yes).'),
default=False,
@ -169,6 +175,7 @@ class DeleteNode(command.Command):
clients=clients,
stack=stack,
nodes=nodes,
connection_timeout=parsed_args.overcloud_ssh_port_timeout,
timeout=parsed_args.timeout,
verbosity=oooutils.playbook_verbosity(self=self)
)

@ -46,7 +46,7 @@ def deploy(container, run_validations, skip_deploy_identifier,
:param skip_deploy_identifier: Enable or disable validations
:type skip_deploy_identifier: Boolean
:param timeout: Timeout
:param timeout: Deployment timeout (minutes).
:type timeout: Integer
:param verbosity: Verbosity level
@ -59,11 +59,11 @@ def deploy(container, run_validations, skip_deploy_identifier,
workdir=tmp,
playbook_dir=ANSIBLE_TRIPLEO_PLAYBOOKS,
verbosity=verbosity,
timeout=timeout,
extra_vars={
"container": container,
"run_validations": run_validations,
"skip_deploy_identifier": skip_deploy_identifier,
"ansible_timeout": timeout
}
)
@ -171,7 +171,7 @@ def get_hosts_and_enable_ssh_admin(stack, overcloud_ssh_network,
:param overcloud_ssh_key: SSH access key.
:type overcloud_ssh_key: String
:param overcloud_ssh_port_timeout: Ansible connection timeout
:param overcloud_ssh_port_timeout: Ansible connection timeout in seconds
:type overcloud_ssh_port_timeout: Int
:param verbosity: Verbosity level
@ -213,7 +213,7 @@ def enable_ssh_admin(stack, hosts, ssh_user, ssh_key, timeout,
:param ssh_key: SSH access key.
:type ssh_key: String
:param timeout: Ansible connection timeout
:param timeout: Ansible connection timeout in seconds
:type timeout: int
:param verbosity: Verbosity level
@ -251,11 +251,12 @@ def enable_ssh_admin(stack, hosts, ssh_user, ssh_key, timeout,
def config_download(log, clients, stack, ssh_network=None,
output_dir=None, override_ansible_cfg=None,
timeout=None, verbosity=0, deployment_options=None,
timeout=600, verbosity=0, deployment_options=None,
in_flight_validations=False,
ansible_playbook_name='deploy_steps_playbook.yaml',
limit_list=None, extra_vars=None, inventory_path=None,
ssh_user='tripleo-admin', tags=None, skip_tags=None):
ssh_user='tripleo-admin', tags=None, skip_tags=None,
deployment_timeout=None):
"""Run config download.
:param log: Logging object
@ -276,8 +277,7 @@ def config_download(log, clients, stack, ssh_network=None,
:param override_ansible_cfg: Ansible configuration file location.
:type override_ansible_cfg: String
:param timeout: Ansible connection timeout. If None, the effective
default will be set to 30 at playbook runtime.
:param timeout: Ansible connection timeout in seconds.
:type timeout: Integer
:param verbosity: Ansible verbosity level.
@ -311,6 +311,10 @@ def config_download(log, clients, stack, ssh_network=None,
:param skip_tags: Ansible exclusion tags.
:type skip_tags: String
:param deployment_timeout: Deployment timeout in minutes.
:type deployment_timeout: Integer
"""
def _log_and_print(message, logger, level='info', print_msg=True):
@ -347,9 +351,6 @@ def config_download(log, clients, stack, ssh_network=None,
else:
skip_tags = 'opendev-validation'
if not timeout:
timeout = 30
# NOTE(cloudnull): List of hosts to limit the current playbook execution
# The list is later converted into an ansible compatible
# string. Storing hosts in list format will ensure all
@ -480,7 +481,8 @@ def config_download(log, clients, stack, ssh_network=None,
'ANSIBLE_BECOME': True,
},
extra_vars=extra_vars,
tags=tags
tags=tags,
timeout=deployment_timeout,
)
_log_and_print(

@ -20,7 +20,8 @@ from tripleoclient import utils
from tripleoclient.workflows import deployment
def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0):
def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0,
connection_timeout=None):
"""Unprovision and deletes overcloud nodes from a heat stack.
:param log: Logging object
@ -38,11 +39,14 @@ def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0):
:type nodes: List
:param timeout: Timeout to use when deleting nodes. If timeout is None
it will be set to 240.
it will be set to 240 minutes.
:type timeout: Integer
:param verbosity: Verbosity level
:type verbosity: Integer
:param connection_timeout: Ansible connection timeout in seconds.
:type connection_timeout: Integer
"""
if not timeout:
@ -60,10 +64,11 @@ def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0):
log=log,
clients=clients,
stack=stack,
timeout=timeout,
timeout=connection_timeout,
ansible_playbook_name='scale_playbook.yaml',
limit_list=limit_list,
verbosity=verbosity
verbosity=verbosity,
deployment_timeout=timeout
)
print('Running scale down')

Loading…
Cancel
Save