From 188acfbc40448278084bc05ee8d4ad7ecc61905a Mon Sep 17 00:00:00 2001 From: Alex Schultz Date: Fri, 3 Jan 2020 11:46:21 -0700 Subject: [PATCH] Add additional ssh timeout options Added `--overcloud-ssh-enable-timeout` and `--overcloud-ssh-port-timeout` to the openstack overcloud deploy command to allow users the ability to tune these values. The ssh port timeout might need to be tuned if the hardware being deployed is slow to post and takes longer than 5 minutes to become ready. These options have been added to: * openstack overcloud deploy * openstack overcloud admin * openstack overcloud ffu upgrade * openstack overcloud upgrade Change-Id: I56f6001b5efeed7ccb3918a5c2622c3eca613c07 Closes-Bug: #1858222 --- ...imeouts-configurable-326124c81ce56fca.yaml | 11 ++++++++ .../test_overcloud_ffwd_upgrade.py | 3 +- .../test_overcloud_upgrade.py | 9 ++++-- .../tests/v1/test_overcloud_admin.py | 4 ++- tripleoclient/v1/overcloud_admin.py | 17 ++++++++++- tripleoclient/v1/overcloud_deploy.py | 16 ++++++++++- tripleoclient/v1/overcloud_ffwd_upgrade.py | 4 ++- tripleoclient/v1/overcloud_upgrade.py | 4 ++- tripleoclient/workflows/deployment.py | 28 ++++++++++++------- 9 files changed, 78 insertions(+), 18 deletions(-) create mode 100644 releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml diff --git a/releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml b/releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml new file mode 100644 index 000000000..af5d8db0d --- /dev/null +++ b/releasenotes/notes/make-ssh-enablement-timeouts-configurable-326124c81ce56fca.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Added `--overcloud-ssh-enable-timeout` to allow end users to increase the + wait time during the deploy, ffu, upgrade and admin actions. By default + this is 600 seconds. + - | + Added `--overcloud-ssh-port-timeout` to allow end users to increase the + time we wait for ssh to become ready on the hosts during the deploy, ffu, + upgrade and admin actions. On older hardware or slow booting hardware, the + 300 seconds we wait by default for the port to come up may not be sufficient. diff --git a/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py b/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py index 6c82aa5f1..b4ceab963 100644 --- a/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py +++ b/tripleoclient/tests/v1/overcloud_ffwd_upgrade/test_overcloud_ffwd_upgrade.py @@ -99,7 +99,8 @@ class TestFFWDUpgradePrepare(fakes.TestFFWDUpgradePrepare): mock_enable_ssh_admin.assert_called_once_with( self.cmd.log, self.app.client_manager, mock_stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + 600, 300) mock_overcloud_deploy.assert_called_once_with(parsed_args) @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' diff --git a/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py b/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py index b1f9eb59c..2c8c78b81 100644 --- a/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py +++ b/tripleoclient/tests/v1/overcloud_upgrade/test_overcloud_upgrade.py @@ -78,10 +78,14 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare): mock_yaml.return_value = {'fake_container': 'fake_value'} add_env = mock.Mock() add_env.return_value = True - argslist = ['--stack', 'overcloud', '--templates', ] + argslist = ['--stack', 'overcloud', '--templates', + '--overcloud-ssh-enable-timeout', '10', + '--overcloud-ssh-port-timeout', '10'] verifylist = [ ('stack', 'overcloud'), ('templates', constants.TRIPLEO_HEAT_TEMPLATES), + ('overcloud_ssh_enable_timeout', 10), + ('overcloud_ssh_port_timeout', 10), ] parsed_args = self.check_parser(self.cmd, argslist, verifylist) @@ -99,7 +103,8 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare): mock_enable_ssh_admin.assert_called_once_with( self.cmd.log, self.app.client_manager, mock_stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + 10, 10) @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' 'take_action') diff --git a/tripleoclient/tests/v1/test_overcloud_admin.py b/tripleoclient/tests/v1/test_overcloud_admin.py index a42b5cec8..825ce0ad4 100644 --- a/tripleoclient/tests/v1/test_overcloud_admin.py +++ b/tripleoclient/tests/v1/test_overcloud_admin.py @@ -39,4 +39,6 @@ class TestAdminAuthorize(test_plugin.TestPluginV1): mock_stack, parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_user, - parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_key, + 600, + 300) diff --git a/tripleoclient/v1/overcloud_admin.py b/tripleoclient/v1/overcloud_admin.py index 6bf59a782..2402eb2f2 100644 --- a/tripleoclient/v1/overcloud_admin.py +++ b/tripleoclient/v1/overcloud_admin.py @@ -21,6 +21,7 @@ from osc_lib.i18n import _ from osc_lib import utils from tripleoclient import command +from tripleoclient import constants from tripleoclient import utils as oooutils from tripleoclient.workflows import deployment @@ -59,6 +60,18 @@ class Authorize(command.Command): help=_('Network name to use for ssh access to overcloud nodes.'), default='ctlplane' ) + parser.add_argument( + '--overcloud-ssh-enable-timeout', + help=_('Timeout for the ssh enable process to finish.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_TIMEOUT + ) + parser.add_argument( + '--overcloud-ssh-port-timeout', + help=_('Timeout for to wait for the ssh port to become active.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT + ) return parser @@ -70,4 +83,6 @@ class Authorize(command.Command): self.log, clients, stack, parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_user, - parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) diff --git a/tripleoclient/v1/overcloud_deploy.py b/tripleoclient/v1/overcloud_deploy.py index c20f4f43c..83bcc32d9 100644 --- a/tripleoclient/v1/overcloud_deploy.py +++ b/tripleoclient/v1/overcloud_deploy.py @@ -683,6 +683,18 @@ class DeployOvercloud(command.Command): help=_('Network name to use for ssh access to overcloud nodes.'), default='ctlplane' ) + parser.add_argument( + '--overcloud-ssh-enable-timeout', + help=_('Timeout for the ssh enable process to finish.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_TIMEOUT + ) + parser.add_argument( + '--overcloud-ssh-port-timeout', + help=_('Timeout for to wait for the ssh port to become active.'), + type=int, + default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT + ) parser.add_argument( '--environment-file', '-e', metavar='', action='append', dest='environment_files', @@ -942,7 +954,9 @@ class DeployOvercloud(command.Command): self.log, self.clients, stack, parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_user, - parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) if parsed_args.config_download_timeout: timeout = parsed_args.config_download_timeout * 60 diff --git a/tripleoclient/v1/overcloud_ffwd_upgrade.py b/tripleoclient/v1/overcloud_ffwd_upgrade.py index 69bb0ce79..78a250155 100644 --- a/tripleoclient/v1/overcloud_ffwd_upgrade.py +++ b/tripleoclient/v1/overcloud_ffwd_upgrade.py @@ -93,7 +93,9 @@ class FFWDUpgradePrepare(DeployOvercloud): stack = oooutils.get_stack(clients.orchestration, parsed_args.stack) deployment.get_hosts_and_enable_ssh_admin( self.log, clients, stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) self.log.info("FFWD Upgrade Prepare on stack {0} complete.".format( parsed_args.stack)) diff --git a/tripleoclient/v1/overcloud_upgrade.py b/tripleoclient/v1/overcloud_upgrade.py index 452f6e3a0..8b2637cc2 100644 --- a/tripleoclient/v1/overcloud_upgrade.py +++ b/tripleoclient/v1/overcloud_upgrade.py @@ -83,7 +83,9 @@ class UpgradePrepare(DeployOvercloud): stack = oooutils.get_stack(clients.orchestration, parsed_args.stack) deployment.get_hosts_and_enable_ssh_admin( self.log, clients, stack, parsed_args.overcloud_ssh_network, - parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) + parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key, + parsed_args.overcloud_ssh_enable_timeout, + parsed_args.overcloud_ssh_port_timeout) self.log.info("Completed Overcloud Upgrade Prepare for stack " "{0}".format(stack_name)) diff --git a/tripleoclient/workflows/deployment.py b/tripleoclient/workflows/deployment.py index 083f81fcf..96657077e 100644 --- a/tripleoclient/workflows/deployment.py +++ b/tripleoclient/workflows/deployment.py @@ -22,7 +22,9 @@ import yaml from heatclient.common import event_utils from openstackclient import shell -from tripleoclient import constants +from tripleoclient.constants import ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT +from tripleoclient.constants import ENABLE_SSH_ADMIN_STATUS_INTERVAL +from tripleoclient.constants import ENABLE_SSH_ADMIN_TIMEOUT from tripleoclient import exceptions from tripleoclient import utils @@ -157,11 +159,11 @@ def get_overcloud_hosts(stack, ssh_network): return ips -def wait_for_ssh_port(host): +def wait_for_ssh_port(host, timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT): start = int(time.time()) while True: now = int(time.time()) - if (now - start) > constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT: + if (now - start) > timeout: raise exceptions.DeploymentError( "Timed out waiting for port 22 from %s" % host) # first check ipv4 then check ipv6 @@ -182,14 +184,17 @@ def wait_for_ssh_port(host): time.sleep(1) -def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network, - overcloud_ssh_user, overcloud_ssh_key): +def get_hosts_and_enable_ssh_admin( + log, clients, stack, overcloud_ssh_network, overcloud_ssh_user, + overcloud_ssh_key, enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT, + enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT): hosts = get_overcloud_hosts(stack, overcloud_ssh_network) if [host for host in hosts if host]: try: enable_ssh_admin(log, clients, stack.stack_name, hosts, - overcloud_ssh_user, overcloud_ssh_key) + overcloud_ssh_user, overcloud_ssh_key, + enable_ssh_timeout, enable_ssh_port_timeout) except subprocess.CalledProcessError as e: if e.returncode == 255: log.error("Couldn't not import keys to one of {}. " @@ -205,7 +210,10 @@ def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network, overcloud_ssh_network)) -def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): +def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key, + enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT, + enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT + ): print("Enabling ssh admin (tripleo-admin) for hosts:") print(" ".join(hosts)) print("Using ssh user %s for initial connection." % ssh_user) @@ -240,7 +248,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): tmp_key_private_contents = privkey.read() for host in hosts: - wait_for_ssh_port(host) + wait_for_ssh_port(host, enable_ssh_port_timeout) copy_tmp_key_command = ["ssh"] + ssh_options.split() copy_tmp_key_command += \ ["-o", "StrictHostKeyChecking=no", @@ -271,7 +279,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): start = int(time.time()) while True: now = int(time.time()) - if (now - start) > constants.ENABLE_SSH_ADMIN_TIMEOUT: + if (now - start) > enable_ssh_timeout: raise exceptions.DeploymentError( "ssh admin enablement workflow - TIMED OUT.") @@ -280,7 +288,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): state = execution.state if state == 'RUNNING': - if (now - start) % constants.ENABLE_SSH_ADMIN_STATUS_INTERVAL\ + if (now - start) % ENABLE_SSH_ADMIN_STATUS_INTERVAL\ == 0: print("ssh admin enablement workflow - RUNNING.") continue