Set 5 second socket read timeout

FM messaging socket reads that are triggered by FM API calls from
client services have been seen to rarely but occasionally block/stall
the fmManager process. This fmManager stall can then lead to other
client service process stalls which in the case of mtcAgent has been
seen to lead to uncontrolled switch of activity ; aka Swact.

This update adds a 5 second socket read timeout to FM's client services
socket setup to avoid the prolonged blocking cases that lead to Swact
or adversely affect (block) other client service process execution.

Setting a read timeout on Linux sockets is a good programming practice.
Doing so it helps ensure that an application, FM and client services
do not hang indefinitely if a network operation like a socket read
becomes unresponsive.

Configuring a timeout helps manage network communication reliability
and efficiency, especially in applications where responsiveness is
critical. Especially in server-client application such as FM.

Test Plan:

PASS: Verify AIO DX system install.
PASS: Verify blocked socket timeout and error log after 5 seconds.
PASS: Verify unblocked socket reads complete successfully.
PASS: Verify alarm assert/clear functions operate normally.
PASS: Verify set socket timeout failure handling.
PASS: Verify fmManager is not leaking files or memory.
PASS: Verify rook-ceph apply remove 100 loop soak
      - no stall or swact
      - AIO DX
      - with 2 OSDs on each controller

Closes-Bug: 2088025
Change-Id: I1d947bccf9faeedcc2b96c7bc398fbab77b7ae09
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2024-11-12 20:21:27 +00:00
parent 30b311381a
commit 54f9fed7c3
2 changed files with 26 additions and 8 deletions

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2017,2023 Wind River Systems, Inc.
// Copyright (c) 2017,2023-2024 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
@ -78,6 +78,15 @@ bool CFmSocket::create_socket() {
}
//FM_DEBUG_LOG("SO_KEEPALIVE set on socket\n");
struct timeval timeout;
timeout.tv_sec = SOCKET_TIMEOUT_DEFAULT ;
timeout.tv_usec = 0 ;
if(setsockopt(m_fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) < 0) {
FM_ERROR_LOG("Failed to setsockopt SO_RCVTIMEO timeout, error: (%d) (%s)", errno, strerror(errno));
close();
return false;
}
return true;
}
@ -390,13 +399,23 @@ bool FmSocketServer::server_reset() {
FM_INFO_LOG("Failed to create socket for port:(%d)\n", server_port);
return false;
}
// Set socket reusable
FM_INFO_LOG("Setting socket fd:%d as re-useable", m_fd);
int optval = 1;
setsockopt(m_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval);
// Set socket read timeout
struct timeval timeout;
timeout.tv_sec = SOCKET_TIMEOUT_DEFAULT ;
timeout.tv_usec = 0 ;
FM_INFO_LOG("Setting socket fd:%d with %d second read timeout", m_fd, timeout.tv_sec );
setsockopt(m_fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout);
switch (address_family) {
//When address is IPv4
case AF_INET:
{
int optval = 1;
setsockopt(m_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval);
struct sockaddr_in addr;
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr(server_addr.c_str());
@ -421,9 +440,6 @@ bool FmSocketServer::server_reset() {
//When address is IPv6
case AF_INET6:
{
int optval = 1;
setsockopt(m_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval);
struct sockaddr_in6 addr;
addr.sin6_family = AF_INET6;
inet_pton(AF_INET6,server_addr.c_str(),&(addr.sin6_addr));

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2014,2023 Wind River Systems, Inc.
// Copyright (c) 2014,2023-2024 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
@ -19,6 +19,8 @@
#include <sys/un.h>
#include <map>
#define SOCKET_TIMEOUT_DEFAULT (5)
typedef struct CFmSockAddr_s {
int type;
union {