From 3ee7272a780375240a090629bad54982579b0b85 Mon Sep 17 00:00:00 2001 From: Fabiano Correa Mercer Date: Wed, 19 Nov 2025 17:04:46 -0300 Subject: [PATCH] Delay fm-api start until DNS is available The fm-api must start as early as possible so it can process alarms from other services. During a fresh install the fm-api host is a valid IP address, but after unlock it receives a host FQDN (for example: controller-0.internal). The fm-api service is started by systemctl, while dnsmasq is managed by SM. Even when dnsmasq is running, it may take some time before it can answer DNS queries. To handle this, the fm-api now checks whether the host FQDN can be resolved before starting the wsgi.Server. Tests performed: - AIO-DX IPv4 fresh install - AIO-DX IPv6 fresh install - AIO-DX swact - AIO-SX subcloud factory install - AIO-SX backup and restore Partial-Bug: 2116270 Change-Id: Ia41f948a5baf3aedff514dd74eeea95a068c044c Signed-off-by: Fabiano Correa Mercer --- fm-rest-api/fm/fm/cmd/api.py | 68 +++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/fm-rest-api/fm/fm/cmd/api.py b/fm-rest-api/fm/fm/cmd/api.py index c61992fc..9e10c453 100644 --- a/fm-rest-api/fm/fm/cmd/api.py +++ b/fm-rest-api/fm/fm/cmd/api.py @@ -1,11 +1,13 @@ # -# Copyright (c) 2018 Wind River Systems, Inc. +# Copyright (c) 2018-2025 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # +import ipaddress import sys +import subprocess # nosec B404 import eventlet from oslo_config import cfg @@ -42,6 +44,69 @@ LOG = logging.getLogger(__name__) eventlet.monkey_patch(os=False) +def _resolve_host_once(host, record_type): + + try: + result = subprocess.run( # nosec B603 + ["/usr/bin/dig", "+short", record_type, host], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=5, + check=False + ) + except Exception as e: + # Ignore IPv6 errors in IPv4 scenarios + LOG.debug("Unexpected error resolving (%s) %s: %s", record_type, host, e) + return None + + if result.returncode != 0: + LOG.warning( + "dig error for (%s) %s (code=%d): %s", + record_type, + host, + result.returncode, + (result.stderr.strip() if result.stderr else "") + ) + return None + + for line in result.stdout.splitlines(): + line = line.strip() + if not line: + continue + try: + ipaddress.ip_address(line) + return line + except ValueError: + continue + return None + + +def _wait_for_host_dns_resolution(host, interval=1, retries=90): + + # just execute the DNS resolution for FQDN + # i.e: ( controller-0.internal ) + if not host.endswith(".internal"): + return + + LOG.info("Waiting for DNS resolution of %s (%d retries)...", host, retries) + + for attempt in range(1, retries + 1): + ip = _resolve_host_once(host, "A") + if not ip: + ip = _resolve_host_once(host, "AAAA") + if ip: + LOG.info("DNS resolved %s -> %s", host, ip) + return + + LOG.info("Attempt %d/%d failed to resolve %s", attempt, retries, host) + + if attempt < retries: + eventlet.sleep(interval) + + LOG.warning("DNS did not resolve %s after %d retries", host, retries) + + def main(): config.init(sys.argv[1:]) @@ -61,6 +126,7 @@ def main(): LOG.info("Server on http://%(host)s:%(port)s with %(workers)s", {'host': host, 'port': port, 'workers': workers}) + _wait_for_host_dns_resolution(host) systemd.notify_once() service = wsgi.Server(CONF, CONF.prog, application, host, port)