Add additional ssh timeout options

Added `--overcloud-ssh-enable-timeout` and
`--overcloud-ssh-port-timeout` to the openstack overcloud deploy command
to allow users the ability to tune these values.  The ssh port timeout
might need to be tuned if the hardware being deployed is slow to post
and takes longer than 5 minutes to become ready.

These options have been added to:

 * openstack overcloud deploy
 * openstack overcloud admin
 * openstack overcloud ffu upgrade
 * openstack overcloud upgrade

Change-Id: I56f6001b5efeed7ccb3918a5c2622c3eca613c07
Closes-Bug: #1858222
This commit is contained in:
Alex Schultz 2020-01-03 11:46:21 -07:00
parent 8293e5eec5
commit 188acfbc40
9 changed files with 78 additions and 18 deletions

View File

@ -0,0 +1,11 @@
---
features:
- |
Added `--overcloud-ssh-enable-timeout` to allow end users to increase the
wait time during the deploy, ffu, upgrade and admin actions. By default
this is 600 seconds.
- |
Added `--overcloud-ssh-port-timeout` to allow end users to increase the
time we wait for ssh to become ready on the hosts during the deploy, ffu,
upgrade and admin actions. On older hardware or slow booting hardware, the
300 seconds we wait by default for the port to come up may not be sufficient.

View File

@ -99,7 +99,8 @@ class TestFFWDUpgradePrepare(fakes.TestFFWDUpgradePrepare):
mock_enable_ssh_admin.assert_called_once_with(
self.cmd.log, self.app.client_manager, mock_stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key)
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
600, 300)
mock_overcloud_deploy.assert_called_once_with(parsed_args)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'

View File

@ -78,10 +78,14 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare):
mock_yaml.return_value = {'fake_container': 'fake_value'}
add_env = mock.Mock()
add_env.return_value = True
argslist = ['--stack', 'overcloud', '--templates', ]
argslist = ['--stack', 'overcloud', '--templates',
'--overcloud-ssh-enable-timeout', '10',
'--overcloud-ssh-port-timeout', '10']
verifylist = [
('stack', 'overcloud'),
('templates', constants.TRIPLEO_HEAT_TEMPLATES),
('overcloud_ssh_enable_timeout', 10),
('overcloud_ssh_port_timeout', 10),
]
parsed_args = self.check_parser(self.cmd, argslist, verifylist)
@ -99,7 +103,8 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare):
mock_enable_ssh_admin.assert_called_once_with(
self.cmd.log, self.app.client_manager, mock_stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key)
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
10, 10)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'take_action')

View File

@ -39,4 +39,6 @@ class TestAdminAuthorize(test_plugin.TestPluginV1):
mock_stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key)
parsed_args.overcloud_ssh_key,
600,
300)

View File

@ -21,6 +21,7 @@ from osc_lib.i18n import _
from osc_lib import utils
from tripleoclient import command
from tripleoclient import constants
from tripleoclient import utils as oooutils
from tripleoclient.workflows import deployment
@ -59,6 +60,18 @@ class Authorize(command.Command):
help=_('Network name to use for ssh access to overcloud nodes.'),
default='ctlplane'
)
parser.add_argument(
'--overcloud-ssh-enable-timeout',
help=_('Timeout for the ssh enable process to finish.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_TIMEOUT
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for to wait for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
return parser
@ -70,4 +83,6 @@ class Authorize(command.Command):
self.log, clients, stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key)
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)

View File

@ -683,6 +683,18 @@ class DeployOvercloud(command.Command):
help=_('Network name to use for ssh access to overcloud nodes.'),
default='ctlplane'
)
parser.add_argument(
'--overcloud-ssh-enable-timeout',
help=_('Timeout for the ssh enable process to finish.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_TIMEOUT
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for to wait for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
parser.add_argument(
'--environment-file', '-e', metavar='<HEAT ENVIRONMENT FILE>',
action='append', dest='environment_files',
@ -942,7 +954,9 @@ class DeployOvercloud(command.Command):
self.log, self.clients, stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key)
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
if parsed_args.config_download_timeout:
timeout = parsed_args.config_download_timeout * 60

View File

@ -93,7 +93,9 @@ class FFWDUpgradePrepare(DeployOvercloud):
stack = oooutils.get_stack(clients.orchestration, parsed_args.stack)
deployment.get_hosts_and_enable_ssh_admin(
self.log, clients, stack, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key)
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
self.log.info("FFWD Upgrade Prepare on stack {0} complete.".format(
parsed_args.stack))

View File

@ -83,7 +83,9 @@ class UpgradePrepare(DeployOvercloud):
stack = oooutils.get_stack(clients.orchestration, parsed_args.stack)
deployment.get_hosts_and_enable_ssh_admin(
self.log, clients, stack, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key)
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
self.log.info("Completed Overcloud Upgrade Prepare for stack "
"{0}".format(stack_name))

View File

@ -22,7 +22,9 @@ import yaml
from heatclient.common import event_utils
from openstackclient import shell
from tripleoclient import constants
from tripleoclient.constants import ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
from tripleoclient.constants import ENABLE_SSH_ADMIN_STATUS_INTERVAL
from tripleoclient.constants import ENABLE_SSH_ADMIN_TIMEOUT
from tripleoclient import exceptions
from tripleoclient import utils
@ -157,11 +159,11 @@ def get_overcloud_hosts(stack, ssh_network):
return ips
def wait_for_ssh_port(host):
def wait_for_ssh_port(host, timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT):
start = int(time.time())
while True:
now = int(time.time())
if (now - start) > constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT:
if (now - start) > timeout:
raise exceptions.DeploymentError(
"Timed out waiting for port 22 from %s" % host)
# first check ipv4 then check ipv6
@ -182,14 +184,17 @@ def wait_for_ssh_port(host):
time.sleep(1)
def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network,
overcloud_ssh_user, overcloud_ssh_key):
def get_hosts_and_enable_ssh_admin(
log, clients, stack, overcloud_ssh_network, overcloud_ssh_user,
overcloud_ssh_key, enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT,
enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT):
hosts = get_overcloud_hosts(stack, overcloud_ssh_network)
if [host for host in hosts if host]:
try:
enable_ssh_admin(log, clients, stack.stack_name, hosts,
overcloud_ssh_user, overcloud_ssh_key)
overcloud_ssh_user, overcloud_ssh_key,
enable_ssh_timeout, enable_ssh_port_timeout)
except subprocess.CalledProcessError as e:
if e.returncode == 255:
log.error("Couldn't not import keys to one of {}. "
@ -205,7 +210,10 @@ def get_hosts_and_enable_ssh_admin(log, clients, stack, overcloud_ssh_network,
overcloud_ssh_network))
def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key):
def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key,
enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT,
enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
):
print("Enabling ssh admin (tripleo-admin) for hosts:")
print(" ".join(hosts))
print("Using ssh user %s for initial connection." % ssh_user)
@ -240,7 +248,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key):
tmp_key_private_contents = privkey.read()
for host in hosts:
wait_for_ssh_port(host)
wait_for_ssh_port(host, enable_ssh_port_timeout)
copy_tmp_key_command = ["ssh"] + ssh_options.split()
copy_tmp_key_command += \
["-o", "StrictHostKeyChecking=no",
@ -271,7 +279,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key):
start = int(time.time())
while True:
now = int(time.time())
if (now - start) > constants.ENABLE_SSH_ADMIN_TIMEOUT:
if (now - start) > enable_ssh_timeout:
raise exceptions.DeploymentError(
"ssh admin enablement workflow - TIMED OUT.")
@ -280,7 +288,7 @@ def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key):
state = execution.state
if state == 'RUNNING':
if (now - start) % constants.ENABLE_SSH_ADMIN_STATUS_INTERVAL\
if (now - start) % ENABLE_SSH_ADMIN_STATUS_INTERVAL\
== 0:
print("ssh admin enablement workflow - RUNNING.")
continue