Delay fm-api start until DNS is available

The fm-api must start as early as possible so it can process alarms from
other services. During a fresh install the fm-api host is a valid IP
address, but after unlock it receives a host FQDN (for example:
controller-0.internal).

The fm-api service is started by systemctl, while dnsmasq is managed by
SM. Even when dnsmasq is running, it may take some time before it can
answer DNS queries. To handle this, the fm-api now checks whether the
host FQDN can be resolved before starting the wsgi.Server.

Tests performed:
- AIO-DX IPv4 fresh install
- AIO-DX IPv6 fresh install
- AIO-DX swact
- AIO-SX subcloud factory install
- AIO-SX backup and restore

Partial-Bug: 2116270

Change-Id: Ia41f948a5baf3aedff514dd74eeea95a068c044c
Signed-off-by: Fabiano Correa Mercer <fabiano.correamercer@windriver.com>
This commit is contained in:
Fabiano Correa Mercer
2025-11-19 17:04:46 -03:00
parent d7b1288976
commit 3ee7272a78

View File

@@ -1,11 +1,13 @@
#
# Copyright (c) 2018 Wind River Systems, Inc.
# Copyright (c) 2018-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import ipaddress
import sys
import subprocess # nosec B404
import eventlet
from oslo_config import cfg
@@ -42,6 +44,69 @@ LOG = logging.getLogger(__name__)
eventlet.monkey_patch(os=False)
def _resolve_host_once(host, record_type):
try:
result = subprocess.run( # nosec B603
["/usr/bin/dig", "+short", record_type, host],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=5,
check=False
)
except Exception as e:
# Ignore IPv6 errors in IPv4 scenarios
LOG.debug("Unexpected error resolving (%s) %s: %s", record_type, host, e)
return None
if result.returncode != 0:
LOG.warning(
"dig error for (%s) %s (code=%d): %s",
record_type,
host,
result.returncode,
(result.stderr.strip() if result.stderr else "")
)
return None
for line in result.stdout.splitlines():
line = line.strip()
if not line:
continue
try:
ipaddress.ip_address(line)
return line
except ValueError:
continue
return None
def _wait_for_host_dns_resolution(host, interval=1, retries=90):
# just execute the DNS resolution for FQDN
# i.e: ( controller-0.internal )
if not host.endswith(".internal"):
return
LOG.info("Waiting for DNS resolution of %s (%d retries)...", host, retries)
for attempt in range(1, retries + 1):
ip = _resolve_host_once(host, "A")
if not ip:
ip = _resolve_host_once(host, "AAAA")
if ip:
LOG.info("DNS resolved %s -> %s", host, ip)
return
LOG.info("Attempt %d/%d failed to resolve %s", attempt, retries, host)
if attempt < retries:
eventlet.sleep(interval)
LOG.warning("DNS did not resolve %s after %d retries", host, retries)
def main():
config.init(sys.argv[1:])
@@ -61,6 +126,7 @@ def main():
LOG.info("Server on http://%(host)s:%(port)s with %(workers)s",
{'host': host, 'port': port, 'workers': workers})
_wait_for_host_dns_resolution(host)
systemd.notify_once()
service = wsgi.Server(CONF, CONF.prog, application, host, port)