Enable TCP keepalives in zuul_stream

Some remote commands can run for some time without producing output.
Some network devices/firewalls/pnats/etc may decide to drop the
connection since it looks idle.  Try to avoid that by sending TCP
keepalives, and if that doesn't work, that should at least cause
us to be notified that the connection is dead.  In that case, the
log streamer will receive a timeout error on the socket recv call.
We will log it and stop streaming.

Change-Id: Ib8e2aae0de07ff2dcc9de8e244f09e0cf9796e20
This commit is contained in:
James E. Blair 2024-02-14 11:03:02 -08:00
parent d91efe232d
commit 556b9917a0
1 changed files with 20 additions and 1 deletions

View File

@ -141,6 +141,18 @@ class Streamer:
# logs continously. Without this we can easily trip the 5
# second timeout.
s.settimeout(None)
# Some remote commands can run for some time without
# producing output. In case there are network
# components that might drop an idle TCP connection,
# enable keepalives so that we can hopefully maintain
# the connection, or at the least, be notified if it
# is terminated. Ping every 30 seconds after 30
# seconds of idle activity. Allow 3 minutes of lost
# pings before we fail.
s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 30)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 9)
return s
except socket.timeout:
self.callback._log_streamline(
@ -229,7 +241,14 @@ class Streamer:
pass
return
else:
more = s.recv(4096)
try:
more = s.recv(4096)
except TimeoutError:
self.callback._log_streamline(
self.host,
"[Zuul] Lost log stream connection to [%s:%s]"
% (self.ip, self.port))
raise
if not more:
buffering = False
else: