Add additional ssh timeout options

Added `--overcloud-ssh-enable-timeout` and
`--overcloud-ssh-port-timeout` to the openstack overcloud deploy command
to allow users the ability to tune these values.  The ssh port timeout
might need to be tuned if the hardware being deployed is slow to post
and takes longer than 5 minutes to become ready.

These options have been added to:

 * openstack overcloud deploy
 * openstack overcloud admin
 * openstack overcloud ffu upgrade
 * openstack overcloud upgrade

Change-Id: I56f6001b5efeed7ccb3918a5c2622c3eca613c07
Closes-Bug: #1858222
This commit is contained in:
Alex Schultz 2020-01-03 11:46:21 -07:00
parent 8293e5eec5
commit 188acfbc40
9 changed files with 78 additions and 18 deletions

View File

@ -0,0 +1,11 @@
---
features:
- |
Added `--overcloud-ssh-enable-timeout` to allow end users to increase the
wait time during the deploy, ffu, upgrade and admin actions. By default
this is 600 seconds.
- |
Added `--overcloud-ssh-port-timeout` to allow end users to increase the
time we wait for ssh to become ready on the hosts during the deploy, ffu,
upgrade and admin actions. On older hardware or slow booting hardware, the
300 seconds we wait by default for the port to come up may not be sufficient.

View File

@ -99,7 +99,8 @@ class TestFFWDUpgradePrepare(fakes.TestFFWDUpgradePrepare):
mock_enable_ssh_admin.assert_called_once_with( mock_enable_ssh_admin.assert_called_once_with(
self.cmd.log, self.app.client_manager, mock_stack, self.cmd.log, self.app.client_manager, mock_stack,
parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
600, 300)
mock_overcloud_deploy.assert_called_once_with(parsed_args) mock_overcloud_deploy.assert_called_once_with(parsed_args)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'

View File

@ -78,10 +78,14 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare):
mock_yaml.return_value = {'fake_container': 'fake_value'} mock_yaml.return_value = {'fake_container': 'fake_value'}
add_env = mock.Mock() add_env = mock.Mock()
add_env.return_value = True add_env.return_value = True
argslist = ['--stack', 'overcloud', '--templates', ] argslist = ['--stack', 'overcloud', '--templates',
'--overcloud-ssh-enable-timeout', '10',
'--overcloud-ssh-port-timeout', '10']
verifylist = [ verifylist = [
('stack', 'overcloud'), ('stack', 'overcloud'),
('templates', constants.TRIPLEO_HEAT_TEMPLATES), ('templates', constants.TRIPLEO_HEAT_TEMPLATES),
('overcloud_ssh_enable_timeout', 10),
('overcloud_ssh_port_timeout', 10),
] ]
parsed_args = self.check_parser(self.cmd, argslist, verifylist) parsed_args = self.check_parser(self.cmd, argslist, verifylist)
@ -99,7 +103,8 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare):
mock_enable_ssh_admin.assert_called_once_with( mock_enable_ssh_admin.assert_called_once_with(
self.cmd.log, self.app.client_manager, mock_stack, self.cmd.log, self.app.client_manager, mock_stack,
parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
10, 10)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'take_action') 'take_action')

View File

@ -39,4 +39,6 @@ class TestAdminAuthorize(test_plugin.TestPluginV1):
mock_stack, mock_stack,
parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key) parsed_args.overcloud_ssh_key,
600,
300)

View File

@ -21,6 +21,7 @@ from osc_lib.i18n import _
from osc_lib import utils from osc_lib import utils
from tripleoclient import command from tripleoclient import command
from tripleoclient import constants
from tripleoclient import utils as oooutils from tripleoclient import utils as oooutils
from tripleoclient.workflows import deployment from tripleoclient.workflows import deployment
@ -59,6 +60,18 @@ class Authorize(command.Command):
help=_('Network name to use for ssh access to overcloud nodes.'), help=_('Network name to use for ssh access to overcloud nodes.'),
default='ctlplane' default='ctlplane'
) )
parser.add_argument(
'--overcloud-ssh-enable-timeout',
help=_('Timeout for the ssh enable process to finish.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_TIMEOUT
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for to wait for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
return parser return parser
@ -70,4 +83,6 @@ class Authorize(command.Command):
self.log, clients, stack, self.log, clients, stack,
parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key) parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)

View File

@ -683,6 +683,18 @@ class DeployOvercloud(command.Command):
help=_('Network name to use for ssh access to overcloud nodes.'), help=_('Network name to use for ssh access to overcloud nodes.'),
default='ctlplane' default='ctlplane'
) )
parser.add_argument(
'--overcloud-ssh-enable-timeout',
help=_('Timeout for the ssh enable process to finish.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_TIMEOUT
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for to wait for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
parser.add_argument( parser.add_argument(
'--environment-file', '-e', metavar='<HEAT ENVIRONMENT FILE>', '--environment-file', '-e', metavar='<HEAT ENVIRONMENT FILE>',
action='append', dest='environment_files', action='append', dest='environment_files',
@ -942,7 +954,9 @@ class DeployOvercloud(command.Command):
self.log, self.clients, stack, self.log, self.clients, stack,
parsed_args.overcloud_ssh_network, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key) parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
if parsed_args.config_download_timeout: if parsed_args.config_download_timeout:
timeout = parsed_args.config_download_timeout * 60 timeout = parsed_args.config_download_timeout * 60

View File

@ -93,7 +93,9 @@ class FFWDUpgradePrepare(DeployOvercloud):
stack = oooutils.get_stack(clients.orchestration, parsed_args.stack) stack = oooutils.get_stack(clients.orchestration, parsed_args.stack)
deployment.get_hosts_and_enable_ssh_admin( deployment.get_hosts_and_enable_ssh_admin(
self.log, clients, stack, parsed_args.overcloud_ssh_network, self.log, clients, stack, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
self.log.info("FFWD Upgrade Prepare on stack {0} complete.".format( self.log.info("FFWD Upgrade Prepare on stack {0} complete.".format(
parsed_args.stack)) parsed_args.stack))

View File

@ -83,7 +83,9 @@ class UpgradePrepare(DeployOvercloud):
stack = oooutils.get_stack(clients.orchestration, parsed_args.stack) stack = oooutils.get_stack(clients.orchestration, parsed_args.stack)
deployment.get_hosts_and_enable_ssh_admin( deployment.get_hosts_and_enable_ssh_admin(
self.log, clients, stack, parsed_args.overcloud_ssh_network, self.log, clients, stack, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key) parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
self.log.info("Completed Overcloud Upgrade Prepare for stack " self.log.info("Completed Overcloud Upgrade Prepare for stack "
"{0}".format(stack_name)) "{0}".format(stack_name))

View File

@ -22,7 +22,9 @@ import yaml
from heatclient.common import event_utils from heatclient.common import event_utils
from openstackclient import shell from openstackclient import shell
from tripleoclient import constants from tripleoclient.constants import ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
from tripleoclient.constants import ENABLE_SSH_ADMIN_STATUS_INTERVAL
from tripleoclient.constants import ENABLE_SSH_ADMIN_TIMEOUT
from tripleoclient import exceptions from tripleoclient import exceptions
from tripleoclient import utils from tripleoclient import utils
@ -157,11 +159,11 @@ def get_overcloud_hosts(stack, ssh_network):
return ips return ips
def wait_for_ssh_port(host): def wait_for_ssh_port(host, timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT):
start = int(time.time()) start = int(time.time())
while True: while True:
now = int(time.time()) now = int(time.time())
if (now - start) > constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT: if (now - start) > timeout:
raise exceptions.DeploymentError( raise exceptions.DeploymentError(
"Timed out waiting for port 22 from %s" % host) "Timed out waiting for port 22 from %s" % host)
# first check ipv4 then check ipv6 # first check ipv4 then check ipv6
@ -182,14 +184,17 @@ def wait_for_ssh_port(host):
time.sleep(1) time.sleep(1)
def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network, def get_hosts_and_enable_ssh_admin(
overcloud_ssh_user, overcloud_ssh_key): log, clients, stack, overcloud_ssh_network, overcloud_ssh_user,
overcloud_ssh_key, enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT,
enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT):
hosts = get_overcloud_hosts(stack, overcloud_ssh_network) hosts = get_overcloud_hosts(stack, overcloud_ssh_network)
if [host for host in hosts if host]: if [host for host in hosts if host]:
try: try:
enable_ssh_admin(log, clients, stack.stack_name, hosts, enable_ssh_admin(log, clients, stack.stack_name, hosts,
overcloud_ssh_user, overcloud_ssh_key) overcloud_ssh_user, overcloud_ssh_key,
enable_ssh_timeout, enable_ssh_port_timeout)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
if e.returncode == 255: if e.returncode == 255:
log.error("Couldn't not import keys to one of {}. " log.error("Couldn't not import keys to one of {}. "
@ -205,7 +210,10 @@ def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network,
overcloud_ssh_network)) overcloud_ssh_network))
def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key): def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key,
enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT,
enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
):
print("Enabling ssh admin (tripleo-admin) for hosts:") print("Enabling ssh admin (tripleo-admin) for hosts:")
print(" ".join(hosts)) print(" ".join(hosts))
print("Using ssh user %s for initial connection." % ssh_user) print("Using ssh user %s for initial connection." % ssh_user)
@ -240,7 +248,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key):
tmp_key_private_contents = privkey.read() tmp_key_private_contents = privkey.read()
for host in hosts: for host in hosts:
wait_for_ssh_port(host) wait_for_ssh_port(host, enable_ssh_port_timeout)
copy_tmp_key_command = ["ssh"] + ssh_options.split() copy_tmp_key_command = ["ssh"] + ssh_options.split()
copy_tmp_key_command += \ copy_tmp_key_command += \
["-o", "StrictHostKeyChecking=no", ["-o", "StrictHostKeyChecking=no",
@ -271,7 +279,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key):
start = int(time.time()) start = int(time.time())
while True: while True:
now = int(time.time()) now = int(time.time())
if (now - start) > constants.ENABLE_SSH_ADMIN_TIMEOUT: if (now - start) > enable_ssh_timeout:
raise exceptions.DeploymentError( raise exceptions.DeploymentError(
"ssh admin enablement workflow - TIMED OUT.") "ssh admin enablement workflow - TIMED OUT.")
@ -280,7 +288,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key):
state = execution.state state = execution.state
if state == 'RUNNING': if state == 'RUNNING':
if (now - start) % constants.ENABLE_SSH_ADMIN_STATUS_INTERVAL\ if (now - start) % ENABLE_SSH_ADMIN_STATUS_INTERVAL\
== 0: == 0:
print("ssh admin enablement workflow - RUNNING.") print("ssh admin enablement workflow - RUNNING.")
continue continue