Files
nodepool/nodepool/nodeutils.py
Paul Belanger aebd030a32 Retry SSHExceptions in nodepool
Today, when SSHExceptions are raise, nodepool will abort communication
with the node. Now, nodepool will properly trap them and try again
until the SSH timeout has been raised.

This help with potential race conditions with openssh-server and
nodepool, where nodes would restart sshd after nodepool has
established a connection.

Change-Id: I40bfa1b1af6e4e75f8f14c597c28407ed08023de
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
2016-10-05 12:11:47 -04:00

76 lines
2.5 KiB
Python

#!/usr/bin/env python
# Copyright (C) 2011-2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import errno
import time
import socket
import logging
from sshclient import SSHClient
import fakeprovider
import paramiko
import exceptions
log = logging.getLogger("nodepool.utils")
ITERATE_INTERVAL = 2 # How long to sleep while waiting for something
# in a loop
def iterate_timeout(max_seconds, exc, purpose):
start = time.time()
count = 0
while (time.time() < start + max_seconds):
count += 1
yield count
time.sleep(ITERATE_INTERVAL)
raise exc("Timeout waiting for %s" % purpose)
def ssh_connect(ip, username, connect_kwargs={}, timeout=60):
if 'fake' in ip:
return fakeprovider.FakeSSHClient()
# HPcloud may return ECONNREFUSED or EHOSTUNREACH
# for about 30 seconds after adding the IP
for count in iterate_timeout(
timeout, exceptions.SSHTimeoutException, "ssh access"):
try:
client = SSHClient(ip, username, **connect_kwargs)
break
except paramiko.SSHException as e:
# NOTE(pabelanger): Currently paramiko only returns a string with
# error code. If we want finer granularity we'll need to regex the
# string.
log.exception('Failed to negotiate SSH: %s' % (e))
except paramiko.AuthenticationException as e:
# This covers the case where the cloud user is created
# after sshd is up (Fedora for example)
log.info('Auth exception for %s@%s. Try number %i...' %
(username, ip, count))
except socket.error as e:
if e[0] not in [errno.ECONNREFUSED, errno.EHOSTUNREACH, None]:
log.exception(
'Exception while testing ssh access to %s:' % ip)
out = client.ssh("test ssh access", "echo access okay", output=True)
if "access okay" in out:
return client
return None