Set 5 second socket read timeout
FM messaging socket reads that are triggered by FM API calls from client services have been seen to rarely but occasionally block/stall the fmManager process. This fmManager stall can then lead to other client service process stalls which in the case of mtcAgent has been seen to lead to uncontrolled switch of activity ; aka Swact. This update adds a 5 second socket read timeout to FM's client services socket setup to avoid the prolonged blocking cases that lead to Swact or adversely affect (block) other client service process execution. Setting a read timeout on Linux sockets is a good programming practice. Doing so it helps ensure that an application, FM and client services do not hang indefinitely if a network operation like a socket read becomes unresponsive. Configuring a timeout helps manage network communication reliability and efficiency, especially in applications where responsiveness is critical. Especially in server-client application such as FM. Test Plan: PASS: Verify AIO DX system install. PASS: Verify blocked socket timeout and error log after 5 seconds. PASS: Verify unblocked socket reads complete successfully. PASS: Verify alarm assert/clear functions operate normally. PASS: Verify set socket timeout failure handling. PASS: Verify fmManager is not leaking files or memory. PASS: Verify rook-ceph apply remove 100 loop soak - no stall or swact - AIO DX - with 2 OSDs on each controller Closes-Bug: 2088025 Change-Id: I1d947bccf9faeedcc2b96c7bc398fbab77b7ae09 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
30b311381a
commit
54f9fed7c3
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2017,2023 Wind River Systems, Inc.
|
||||
// Copyright (c) 2017,2023-2024 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
@ -78,6 +78,15 @@ bool CFmSocket::create_socket() {
|
||||
}
|
||||
//FM_DEBUG_LOG("SO_KEEPALIVE set on socket\n");
|
||||
|
||||
struct timeval timeout;
|
||||
timeout.tv_sec = SOCKET_TIMEOUT_DEFAULT ;
|
||||
timeout.tv_usec = 0 ;
|
||||
if(setsockopt(m_fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) < 0) {
|
||||
FM_ERROR_LOG("Failed to setsockopt SO_RCVTIMEO timeout, error: (%d) (%s)", errno, strerror(errno));
|
||||
close();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -390,13 +399,23 @@ bool FmSocketServer::server_reset() {
|
||||
FM_INFO_LOG("Failed to create socket for port:(%d)\n", server_port);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set socket reusable
|
||||
FM_INFO_LOG("Setting socket fd:%d as re-useable", m_fd);
|
||||
int optval = 1;
|
||||
setsockopt(m_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval);
|
||||
|
||||
// Set socket read timeout
|
||||
struct timeval timeout;
|
||||
timeout.tv_sec = SOCKET_TIMEOUT_DEFAULT ;
|
||||
timeout.tv_usec = 0 ;
|
||||
FM_INFO_LOG("Setting socket fd:%d with %d second read timeout", m_fd, timeout.tv_sec );
|
||||
setsockopt(m_fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout);
|
||||
|
||||
switch (address_family) {
|
||||
//When address is IPv4
|
||||
case AF_INET:
|
||||
{
|
||||
int optval = 1;
|
||||
setsockopt(m_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval);
|
||||
|
||||
struct sockaddr_in addr;
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_addr.s_addr = inet_addr(server_addr.c_str());
|
||||
@ -421,9 +440,6 @@ bool FmSocketServer::server_reset() {
|
||||
//When address is IPv6
|
||||
case AF_INET6:
|
||||
{
|
||||
int optval = 1;
|
||||
setsockopt(m_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval);
|
||||
|
||||
struct sockaddr_in6 addr;
|
||||
addr.sin6_family = AF_INET6;
|
||||
inet_pton(AF_INET6,server_addr.c_str(),&(addr.sin6_addr));
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2014,2023 Wind River Systems, Inc.
|
||||
// Copyright (c) 2014,2023-2024 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
@ -19,6 +19,8 @@
|
||||
#include <sys/un.h>
|
||||
#include <map>
|
||||
|
||||
#define SOCKET_TIMEOUT_DEFAULT (5)
|
||||
|
||||
typedef struct CFmSockAddr_s {
|
||||
int type;
|
||||
union {
|
||||
|
Loading…
Reference in New Issue
Block a user