From 1f52be581bf079f7a2a16d24cc25b26774e47e26 Mon Sep 17 00:00:00 2001 From: Alex Schultz Date: Fri, 3 Jan 2020 11:46:21 -0700 Subject: [PATCH] Add additional ssh timeout options Added `--overcloud-ssh-enable-timeout` and `--overcloud-ssh-port-timeout` to the openstack overcloud deploy command to allow users the ability to tune these values. The ssh port timeout might need to be tuned if the hardware being deployed is slow to post and takes longer than 5 minutes to become ready. These options have been added to: * openstack overcloud deploy * openstack overcloud admin * openstack overcloud ffu upgrade * openstack overcloud upgrade Change-Id: I56f6001b5efeed7ccb3918a5c2622c3eca613c07 Closes-Bug: #1858222 (cherry picked from commit 188acfbc40448278084bc05ee8d4ad7ecc61905a) --- ...imeouts-configurable-326124c81ce56fca.yaml | 11 ++++++++ .../test_overcloud_ffwd_upgrade.py | 3 +- .../test_overcloud_upgrade.py | 9 ++++-- .../tests/v1/test_overcloud_admin.py | 4 ++- tripleoclient/v1/overcloud_admin.py | 17 ++++++++++- tripleoclient/v1/overcloud_deploy.py | 16 ++++++++++- tripleoclient/v1/overcloud_ffwd_upgrade.py | 4 ++- tripleoclient/v1/overcloud_upgrade.py | 4 ++- tripleoclient/workflows/deployment.py | 28 ++++++++++++------- 9 files changed, 78 insertions(+), 18 deletions(-) create mode 100644 releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml diff --git a/releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml b/releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml new file mode 100644 index 000000000..af5d8db0d --- /dev/null +++ b/releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Added `--overcloud-ssh-enable-timeout` to allow end users to increase the + wait time during the deploy, ffu, upgrade and admin actions. By default + this is 600 seconds. + - | + Added `--overcloud-ssh-port-timeout` to allow end users to increase the + time we wait for ssh to become ready on the hosts during the deploy, ffu, + upgrade and admin actions. On older hardware or slow booting hardware, the + 300 seconds we wait by default for the port to come up may not be sufficient. diff --git a/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py b/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py index 6c82aa5f1..b4ceab963 100644 --- a/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py +++ b/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py @@ -99,7 +99,8 @@ class TestFFWDUpgradePrepare(fakes.TestFFWDUpgradePrepare): mock_enable_ssh_admin.assert_called_once_with( self.cmd.log, self.app.client_manager, mock_stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + 600, 300) mock_overcloud_deploy.assert_called_once_with(parsed_args) @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' diff --git a/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py b/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py index b1f9eb59c..2c8c78b81 100644 --- a/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py +++ b/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py @@ -78,10 +78,14 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare): mock_yaml.return_value = {'fake_container': 'fake_value'} add_env = mock.Mock() add_env.return_value = True - argslist = ['--stack', 'overcloud', '--templates', ] + argslist = ['--stack', 'overcloud', '--templates', + '--overcloud-ssh-enable-timeout', '10', + '--overcloud-ssh-port-timeout', '10'] verifylist = [ ('stack', 'overcloud'), ('templates', constants.TRIPLEO_HEAT_TEMPLATES), + ('overcloud_ssh_enable_timeout', 10), + ('overcloud_ssh_port_timeout', 10), ] parsed_args = self.check_parser(self.cmd, argslist, verifylist) @@ -99,7 +103,8 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare): mock_enable_ssh_admin.assert_called_once_with( self.cmd.log, self.app.client_manager, mock_stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + 10, 10) @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' 'take_action') diff --git a/tripleoclient/tests/v1/test_overcloud_admin.py b/tripleoclient/tests/v1/test_overcloud_admin.py index a42b5cec8..825ce0ad4 100644 --- a/tripleoclient/tests/v1/test_overcloud_admin.py +++ b/tripleoclient/tests/v1/test_overcloud_admin.py @@ -39,4 +39,6 @@ class TestAdminAuthorize(test_plugin.TestPluginV1): mock_stack, parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_user, - parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_key, + 600, + 300) diff --git a/tripleoclient/v1/overcloud_admin.py b/tripleoclient/v1/overcloud_admin.py index 6bf59a782..2402eb2f2 100644 --- a/tripleoclient/v1/overcloud_admin.py +++ b/tripleoclient/v1/overcloud_admin.py @@ -21,6 +21,7 @@ from osc_lib.i18n import _ from osc_lib import utils from tripleoclient import command +from tripleoclient import constants from tripleoclient import utils as oooutils from tripleoclient.workflows import deployment @@ -59,6 +60,18 @@ class Authorize(command.Command): help=_('Network name to use for ssh access to overcloud nodes.'), default='ctlplane' ) + parser.add_argument( + '--overcloud-ssh-enable-timeout', + help=_('Timeout for the ssh enable process to finish.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_TIMEOUT + ) + parser.add_argument( + '--overcloud-ssh-port-timeout', + help=_('Timeout for to wait for the ssh port to become active.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT + ) return parser @@ -70,4 +83,6 @@ class Authorize(command.Command): self.log, clients, stack, parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_user, - parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) diff --git a/tripleoclient/v1/overcloud_deploy.py b/tripleoclient/v1/overcloud_deploy.py index 7d3bc2976..e97d71b96 100644 --- a/tripleoclient/v1/overcloud_deploy.py +++ b/tripleoclient/v1/overcloud_deploy.py @@ -680,6 +680,18 @@ class DeployOvercloud(command.Command): help=_('Network name to use for ssh access to overcloud nodes.'), default='ctlplane' ) + parser.add_argument( + '--overcloud-ssh-enable-timeout', + help=_('Timeout for the ssh enable process to finish.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_TIMEOUT + ) + parser.add_argument( + '--overcloud-ssh-port-timeout', + help=_('Timeout for to wait for the ssh port to become active.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT + ) parser.add_argument( '--environment-file', '-e', metavar='', action='append', dest='environment_files', @@ -944,7 +956,9 @@ class DeployOvercloud(command.Command): self.log, self.clients, stack, parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_user, - parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) if parsed_args.config_download_timeout: timeout = parsed_args.config_download_timeout * 60 diff --git a/tripleoclient/v1/overcloud_ffwd_upgrade.py b/tripleoclient/v1/overcloud_ffwd_upgrade.py index 69bb0ce79..78a250155 100644 --- a/tripleoclient/v1/overcloud_ffwd_upgrade.py +++ b/tripleoclient/v1/overcloud_ffwd_upgrade.py @@ -93,7 +93,9 @@ class FFWDUpgradePrepare(DeployOvercloud): stack = oooutils.get_stack(clients.orchestration, parsed_args.stack) deployment.get_hosts_and_enable_ssh_admin( self.log, clients, stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) self.log.info("FFWD Upgrade Prepare on stack {0} complete.".format( parsed_args.stack)) diff --git a/tripleoclient/v1/overcloud_upgrade.py b/tripleoclient/v1/overcloud_upgrade.py index 452f6e3a0..8b2637cc2 100644 --- a/tripleoclient/v1/overcloud_upgrade.py +++ b/tripleoclient/v1/overcloud_upgrade.py @@ -83,7 +83,9 @@ class UpgradePrepare(DeployOvercloud): stack = oooutils.get_stack(clients.orchestration, parsed_args.stack) deployment.get_hosts_and_enable_ssh_admin( self.log, clients, stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) self.log.info("Completed Overcloud Upgrade Prepare for stack " "{0}".format(stack_name)) diff --git a/tripleoclient/workflows/deployment.py b/tripleoclient/workflows/deployment.py index 9ce243211..5a331780b 100644 --- a/tripleoclient/workflows/deployment.py +++ b/tripleoclient/workflows/deployment.py @@ -23,7 +23,9 @@ import time from heatclient.common import event_utils from openstackclient import shell -from tripleoclient import constants +from tripleoclient.constants import ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT +from tripleoclient.constants import ENABLE_SSH_ADMIN_STATUS_INTERVAL +from tripleoclient.constants import ENABLE_SSH_ADMIN_TIMEOUT from tripleoclient import exceptions from tripleoclient import utils @@ -180,11 +182,11 @@ def get_overcloud_hosts(stack, ssh_network): return ips -def wait_for_ssh_port(host): +def wait_for_ssh_port(host, timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT): start = int(time.time()) while True: now = int(time.time()) - if (now - start) > constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT: + if (now - start) > timeout: raise exceptions.DeploymentError( "Timed out waiting for port 22 from %s" % host) # first check ipv4 then check ipv6 @@ -205,14 +207,17 @@ def wait_for_ssh_port(host): time.sleep(1) -def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network, - overcloud_ssh_user, overcloud_ssh_key): +def get_hosts_and_enable_ssh_admin( + log, clients, stack, overcloud_ssh_network, overcloud_ssh_user, + overcloud_ssh_key, enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT, + enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT): hosts = get_overcloud_hosts(stack, overcloud_ssh_network) if [host for host in hosts if host]: try: enable_ssh_admin(log, clients, stack.stack_name, hosts, - overcloud_ssh_user, overcloud_ssh_key) + overcloud_ssh_user, overcloud_ssh_key, + enable_ssh_timeout, enable_ssh_port_timeout) except subprocess.CalledProcessError as e: if e.returncode == 255: log.error("Couldn't not import keys to one of {}. " @@ -228,7 +233,10 @@ def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network, overcloud_ssh_network)) -def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): +def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key, + enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT, + enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT + ): print("Enabling ssh admin (tripleo-admin) for hosts:") print(" ".join(hosts)) print("Using ssh user %s for initial connection." % ssh_user) @@ -264,7 +272,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): tmp_key_private_contents = privkey.read() for host in hosts: - wait_for_ssh_port(host) + wait_for_ssh_port(host, enable_ssh_port_timeout) copy_tmp_key_command = ["ssh"] + ssh_options.split() copy_tmp_key_command += \ ["-o", "StrictHostKeyChecking=no", @@ -295,7 +303,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): start = int(time.time()) while True: now = int(time.time()) - if (now - start) > constants.ENABLE_SSH_ADMIN_TIMEOUT: + if (now - start) > enable_ssh_timeout: raise exceptions.DeploymentError( "ssh admin enablement workflow - TIMED OUT.") @@ -304,7 +312,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): state = execution.state if state == 'RUNNING': - if (now - start) % constants.ENABLE_SSH_ADMIN_STATUS_INTERVAL\ + if (now - start) % ENABLE_SSH_ADMIN_STATUS_INTERVAL\ == 0: print("ssh admin enablement workflow - RUNNING.") continue