From aebd030a329261246d96102f2e7a75f47e8d1812 Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Wed, 5 Oct 2016 12:11:47 -0400 Subject: [PATCH] Retry SSHExceptions in nodepool Today, when SSHExceptions are raise, nodepool will abort communication with the node. Now, nodepool will properly trap them and try again until the SSH timeout has been raised. This help with potential race conditions with openssh-server and nodepool, where nodes would restart sshd after nodepool has established a connection. Change-Id: I40bfa1b1af6e4e75f8f14c597c28407ed08023de Signed-off-by: Paul Belanger --- nodepool/nodeutils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nodepool/nodeutils.py b/nodepool/nodeutils.py index 921a1f505..13a8384b5 100644 --- a/nodepool/nodeutils.py +++ b/nodepool/nodeutils.py @@ -54,6 +54,11 @@ def ssh_connect(ip, username, connect_kwargs={}, timeout=60): try: client = SSHClient(ip, username, **connect_kwargs) break + except paramiko.SSHException as e: + # NOTE(pabelanger): Currently paramiko only returns a string with + # error code. If we want finer granularity we'll need to regex the + # string. + log.exception('Failed to negotiate SSH: %s' % (e)) except paramiko.AuthenticationException as e: # This covers the case where the cloud user is created # after sshd is up (Fedora for example)