Merge "Fix misused deployment vs connection timeouts"

This commit is contained in:
Zuul
2020-04-07 17:32:19 +00:00
committed by Gerrit Code Review
11 changed files with 187 additions and 33 deletions

View File

@@ -0,0 +1,11 @@
---
features:
- |
`openstack overcloud node delete` now can take
`--overcloud-ssh-port-timeout` to configure connection timeout
for Ansible. While `--timeout` configures the command timeout as expected.
fixes:
- |
Ansible connection timeout used for config download and the deployment
timeout now will be given proper values. It fixes `bug 1868063
<https://bugs.launchpad.net/tripleo/+bug/1868063>`__.

View File

@@ -54,6 +54,10 @@ class UtilsOvercloudFixture(fixtures.Fixture):
self.mock_create_ocrc = self.useFixture(fixtures.MockPatch(
'tripleoclient.utils.write_overcloudrc')
).mock
self.mock_update_deployment_status = self.useFixture(
fixtures.MockPatch(
'tripleoclient.utils.update_deployment_status')
).mock
class UtilsFixture(fixtures.Fixture):
@@ -70,3 +74,6 @@ class UtilsFixture(fixtures.Fixture):
self.mock_write_overcloudrc = self.useFixture(fixtures.MockPatch(
'tripleoclient.utils.write_overcloudrc')
).mock
self.mock_run_ansible_playbook = self.useFixture(fixtures.MockPatch(
'tripleoclient.utils.run_ansible_playbook')
).mock

View File

@@ -14,6 +14,7 @@
#
import ansible_runner
import argparse
import datetime
import logging
@@ -195,6 +196,25 @@ class TestRunAnsiblePlaybook(TestCase):
)
self.assertEqual(retcode, 0)
@mock.patch('six.moves.builtins.open')
@mock.patch('tripleoclient.utils.makedirs')
@mock.patch('os.path.exists', side_effect=(False, True, True))
def test_run_with_timeout(self, mock_exists, mock_mkdir, mock_open):
ansible_runner.ArtifactLoader = mock.MagicMock()
ansible_runner.Runner.run = mock.MagicMock(return_value=('', 0))
ansible_runner.runner_config = mock.MagicMock()
retcode, output = utils.run_ansible_playbook(
playbook='existing.yaml',
inventory='localhost,',
workdir='/tmp',
timeout=42
)
self.assertIn(mock.call('/tmp/env/settings', 'w'),
mock_open.mock_calls)
self.assertIn(
mock.call().__enter__().write('job_timeout: 2520\n'), # 42m * 60
mock_open.mock_calls)
class TestRunCommandAndLog(TestCase):
def setUp(self):

View File

@@ -1543,9 +1543,13 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud):
@mock.patch('tripleoclient.workflows.deployment.create_overcloudrc',
autospec=True)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_deploy_tripleo_heat_templates_tmpdir', autospec=True)
'_heat_deploy', autospec=True)
@mock.patch('tripleoclient.utils.check_stack_network_matches_env_files')
@mock.patch('heatclient.common.template_utils.deep_update', autospec=True)
@mock.patch('tripleoclient.workflows.plan_management.'
'create_plan_from_templates', autospec=True)
def test_config_download_timeout(
self, mock_deploy_tmpdir,
self, mock_plan_man, mock_hc, mock_stack_network_check, mock_hd,
mock_overcloudrc, mock_get_undercloud_host_entry, mock_copy):
fixture = deployment.DeploymentWorkflowFixture()
self.useFixture(fixture)
@@ -1555,16 +1559,83 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud):
orchestration_client = clients.orchestration
orchestration_client.stacks.get.return_value = fakes.create_tht_stack()
arglist = ['--templates', '--config-download-timeout', '240']
arglist = ['--templates', '--overcloud-ssh-port-timeout', '42',
'--timeout', '451']
verifylist = [
('templates', '/usr/share/openstack-tripleo-heat-templates/'),
('overcloud_ssh_port_timeout', 42), ('timeout', 451)
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
# assuming heat deploy consumed a 3m out of total 451m timeout
with mock.patch('time.time', side_effect=[0, 1585820346, 1585820526]):
self.cmd.take_action(parsed_args)
self.assertIn(
[mock.call(mock.ANY, mock.ANY, 'overcloud', mock.ANY,
{'DeployIdentifier': '', 'UpdateIdentifier': '',
'StackAction': 'UPDATE', 'UndercloudHostsEntries':
['192.168.0.1 uc.ctlplane.localhost uc.ctlplane']}, {},
451, mock.ANY, {}, False, False, False, None,
deployment_options={})],
mock_hd.mock_calls)
self.assertIn(
[mock.call(mock.ANY, mock.ANY, mock.ANY, 'ctlplane', None, None,
deployment_options={},
deployment_timeout=448, # 451 - 3, total time left
in_flight_validations=False, timeout=42, verbosity=3)],
fixture.mock_config_download.mock_calls)
fixture.mock_config_download.assert_called()
mock_copy.assert_called_once()
@mock.patch('tripleoclient.utils.copy_clouds_yaml')
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_update_parameters')
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_get_undercloud_host_entry', autospec=True,
return_value='192.168.0.1 uc.ctlplane.localhost uc.ctlplane')
@mock.patch('tripleoclient.workflows.deployment.create_overcloudrc',
autospec=True)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_deploy_tripleo_heat_templates_tmpdir', autospec=True)
def test_config_download_only_timeout(
self, mock_deploy_tmpdir,
mock_overcloudrc, mock_get_undercloud_host_entry, mock_update,
mock_copy):
utils_fixture = deployment.UtilsOvercloudFixture()
self.useFixture(utils_fixture)
utils_fixture2 = deployment.UtilsFixture()
self.useFixture(utils_fixture2)
clients = self.app.client_manager
stack = mock.Mock()
stack.stack_name = 'overcloud'
stack.output_show.return_value = {'output': {'output_value': []}}
orchestration_client = clients.orchestration
orchestration_client.stacks.get.return_value = stack
arglist = ['--templates', '--config-download-only',
'--overcloud-ssh-port-timeout', '42',
'--config-download-timeout', '240']
verifylist = [
('templates', '/usr/share/openstack-tripleo-heat-templates/'),
('config_download_only', True),
('config_download_timeout', 240),
('overcloud_ssh_port_timeout', 42)
]
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
fixture.mock_config_download.assert_called()
mock_copy.assert_called_once()
playbook = '/var/lib/mistral/overcloud/deploy_steps_playbook.yaml'
self.assertIn(
[mock.call(
ansible_cfg=None, ansible_timeout=42,
extra_env_variables={'ANSIBLE_BECOME': True}, extra_vars=None,
inventory=mock.ANY, key=mock.ANY, limit_hosts=None,
playbook=playbook, playbook_dir=mock.ANY,
reproduce_command=True, skip_tags='opendev-validation',
ssh_user='tripleo-admin', tags=None,
timeout=240,
verbosity=3, workdir=mock.ANY)],
utils_fixture2.mock_run_ansible_playbook.mock_calls)
def test_download_missing_files_from_plan(self):
# Restore the real function so we don't accidentally call the mock

View File

@@ -185,9 +185,11 @@ class TestDeleteNode(fakes.TestDeleteNode):
inp.flush()
argslist = ['--baremetal-deployment', inp.name, '--templates',
'--stack', 'overcast', '--timeout', '90', '--yes']
'--stack', 'overcast', '--overcloud-ssh-port-timeout',
'42', '--timeout', '90', '--yes']
verifylist = [
('stack', 'overcast'),
('overcloud_ssh_port_timeout', 42),
('baremetal_deployment', inp.name)
]
parsed_args = self.check_parser(self.cmd, argslist, verifylist)
@@ -245,11 +247,12 @@ class TestDeleteNode(fakes.TestDeleteNode):
ssh_user='tripleo-admin',
key=mock.ANY,
limit_hosts='overcast-controller-1:overcast-compute-0',
ansible_timeout=90,
ansible_timeout=42,
reproduce_command=True,
extra_env_variables={'ANSIBLE_BECOME': True},
extra_vars=None,
tags=None
tags=None,
timeout=90
),
mock.call(
inventory='localhost,',

View File

@@ -287,11 +287,11 @@ class TestOvercloudDeployPlan(utils.TestCommand):
'undercloud,',
mock.ANY,
constants.ANSIBLE_TRIPLEO_PLAYBOOKS,
timeout=240,
extra_vars={
"container": "overcast",
"run_validations": True,
"skip_deploy_identifier": False,
"ansible_timeout": 240
},
verbosity=3,
)

View File

@@ -231,7 +231,7 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
extra_env_variables=None, parallel_run=False,
callback_whitelist=None, ansible_cfg=None,
ansible_timeout=30, reproduce_command=False,
fail_on_rc=True):
fail_on_rc=True, timeout=None):
"""Simple wrapper for ansible-playbook.
:param playbook: Playbook filename.
@@ -320,6 +320,9 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
return code from the playbook execution results in a
non 0 exit code. The default is True.
:type fail_on_rc: Boolean
:param timeout: Timeout for ansible to finish playbook execution (minutes).
:type timeout: int
"""
def _playbook_check(play):
@@ -347,9 +350,30 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
'hosts'
)
def _running_ansible_msg(playbook, timeout=None):
if timeout and timeout > 0:
return ('Running Ansible playbook with timeout %sm: %s,' %
(timeout, playbook))
else:
return ('Running Ansible playbook: %s,' % playbook)
if not playbook_dir:
playbook_dir = workdir
if timeout and timeout > 0:
makedirs(os.path.join(workdir, 'env'))
settings_file = os.path.join(workdir, 'env/settings')
timeout_value = timeout * 60
if os.path.exists(settings_file):
with open(os.path.join(workdir, 'env/settings'), 'r') as f:
settings_object = yaml.safe_load(f.read())
settings_object['job_timeout'] = timeout_value
else:
settings_object = {'job_timeout': timeout_value}
with open(os.path.join(workdir, 'env/settings'), 'w') as f:
f.write(yaml.safe_dump(settings_object, default_flow_style=False))
if isinstance(playbook, (list, set)):
verified_playbooks = [_playbook_check(play=i) for i in playbook]
playbook = os.path.join(workdir, 'tripleo-multi-playbook.yaml')
@@ -362,7 +386,7 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
)
LOG.info(
'Running Ansible playbook: {},'
_running_ansible_msg(playbook, timeout) +
' multi-playbook execution: {}'
' Working directory: {},'
' Playbook directory: {}'.format(
@@ -375,7 +399,7 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
else:
playbook = _playbook_check(play=playbook)
LOG.info(
'Running Ansible playbook: {},'
_running_ansible_msg(playbook, timeout) +
' Working directory: {},'
' Playbook directory: {}'.format(
playbook,

View File

@@ -1023,10 +1023,13 @@ class DeployOvercloud(command.Command):
)
if parsed_args.config_download_timeout:
timeout = parsed_args.config_download_timeout * 60
timeout = parsed_args.config_download_timeout
else:
used = int(time.time() - start)
timeout = (parsed_args.timeout * 60) - used
used = int((time.time() - start) // 60)
timeout = parsed_args.timeout - used
if timeout <= 0:
raise exceptions.DeploymentError(
'Deployment timed out after %sm' % used)
deployment_options = {}
if parsed_args.deployment_python_interpreter:
@@ -1040,10 +1043,11 @@ class DeployOvercloud(command.Command):
parsed_args.overcloud_ssh_network,
parsed_args.output_dir,
parsed_args.override_ansible_cfg,
timeout,
timeout=parsed_args.overcloud_ssh_port_timeout,
verbosity=utils.playbook_verbosity(self=self),
deployment_options=deployment_options,
in_flight_validations=parsed_args.inflight
in_flight_validations=parsed_args.inflight,
deployment_timeout=timeout
)
except Exception:
deployment.set_deployment_status(

View File

@@ -81,6 +81,12 @@ class DeleteNode(command.Command):
"Keep in mind that due to keystone session duration "
"that timeout has an upper bound of 4 hours ")
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
parser.add_argument('-y', '--yes',
help=_('Skip yes/no prompt (assume yes).'),
default=False,
@@ -169,6 +175,7 @@ class DeleteNode(command.Command):
clients=clients,
stack=stack,
nodes=nodes,
connection_timeout=parsed_args.overcloud_ssh_port_timeout,
timeout=parsed_args.timeout,
verbosity=oooutils.playbook_verbosity(self=self)
)

View File

@@ -46,7 +46,7 @@ def deploy(container, run_validations, skip_deploy_identifier,
:param skip_deploy_identifier: Enable or disable validations
:type skip_deploy_identifier: Boolean
:param timeout: Timeout
:param timeout: Deployment timeout (minutes).
:type timeout: Integer
:param verbosity: Verbosity level
@@ -59,11 +59,11 @@ def deploy(container, run_validations, skip_deploy_identifier,
workdir=tmp,
playbook_dir=ANSIBLE_TRIPLEO_PLAYBOOKS,
verbosity=verbosity,
timeout=timeout,
extra_vars={
"container": container,
"run_validations": run_validations,
"skip_deploy_identifier": skip_deploy_identifier,
"ansible_timeout": timeout
}
)
@@ -171,7 +171,7 @@ def get_hosts_and_enable_ssh_admin(stack, overcloud_ssh_network,
:param overcloud_ssh_key: SSH access key.
:type overcloud_ssh_key: String
:param overcloud_ssh_port_timeout: Ansible connection timeout
:param overcloud_ssh_port_timeout: Ansible connection timeout in seconds
:type overcloud_ssh_port_timeout: Int
:param verbosity: Verbosity level
@@ -213,7 +213,7 @@ def enable_ssh_admin(stack, hosts, ssh_user, ssh_key, timeout,
:param ssh_key: SSH access key.
:type ssh_key: String
:param timeout: Ansible connection timeout
:param timeout: Ansible connection timeout in seconds
:type timeout: int
:param verbosity: Verbosity level
@@ -251,11 +251,12 @@ def enable_ssh_admin(stack, hosts, ssh_user, ssh_key, timeout,
def config_download(log, clients, stack, ssh_network=None,
output_dir=None, override_ansible_cfg=None,
timeout=None, verbosity=0, deployment_options=None,
timeout=600, verbosity=0, deployment_options=None,
in_flight_validations=False,
ansible_playbook_name='deploy_steps_playbook.yaml',
limit_list=None, extra_vars=None, inventory_path=None,
ssh_user='tripleo-admin', tags=None, skip_tags=None):
ssh_user='tripleo-admin', tags=None, skip_tags=None,
deployment_timeout=None):
"""Run config download.
:param log: Logging object
@@ -276,8 +277,7 @@ def config_download(log, clients, stack, ssh_network=None,
:param override_ansible_cfg: Ansible configuration file location.
:type override_ansible_cfg: String
:param timeout: Ansible connection timeout. If None, the effective
default will be set to 30 at playbook runtime.
:param timeout: Ansible connection timeout in seconds.
:type timeout: Integer
:param verbosity: Ansible verbosity level.
@@ -311,6 +311,10 @@ def config_download(log, clients, stack, ssh_network=None,
:param skip_tags: Ansible exclusion tags.
:type skip_tags: String
:param deployment_timeout: Deployment timeout in minutes.
:type deployment_timeout: Integer
"""
def _log_and_print(message, logger, level='info', print_msg=True):
@@ -347,9 +351,6 @@ def config_download(log, clients, stack, ssh_network=None,
else:
skip_tags = 'opendev-validation'
if not timeout:
timeout = 30
# NOTE(cloudnull): List of hosts to limit the current playbook execution
# The list is later converted into an ansible compatible
# string. Storing hosts in list format will ensure all
@@ -480,7 +481,8 @@ def config_download(log, clients, stack, ssh_network=None,
'ANSIBLE_BECOME': True,
},
extra_vars=extra_vars,
tags=tags
tags=tags,
timeout=deployment_timeout,
)
_log_and_print(

View File

@@ -20,7 +20,8 @@ from tripleoclient import utils
from tripleoclient.workflows import deployment
def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0):
def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0,
connection_timeout=None):
"""Unprovision and deletes overcloud nodes from a heat stack.
:param log: Logging object
@@ -38,11 +39,14 @@ def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0):
:type nodes: List
:param timeout: Timeout to use when deleting nodes. If timeout is None
it will be set to 240.
it will be set to 240 minutes.
:type timeout: Integer
:param verbosity: Verbosity level
:type verbosity: Integer
:param connection_timeout: Ansible connection timeout in seconds.
:type connection_timeout: Integer
"""
if not timeout:
@@ -60,10 +64,11 @@ def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0):
log=log,
clients=clients,
stack=stack,
timeout=timeout,
timeout=connection_timeout,
ansible_playbook_name='scale_playbook.yaml',
limit_list=limit_list,
verbosity=verbosity
verbosity=verbosity,
deployment_timeout=timeout
)
print('Running scale down')