Delay fm-api start until DNS is available
The fm-api must start as early as possible so it can process alarms from other services. During a fresh install the fm-api host is a valid IP address, but after unlock it receives a host FQDN (for example: controller-0.internal). The fm-api service is started by systemctl, while dnsmasq is managed by SM. Even when dnsmasq is running, it may take some time before it can answer DNS queries. To handle this, the fm-api now checks whether the host FQDN can be resolved before starting the wsgi.Server. Tests performed: - AIO-DX IPv4 fresh install - AIO-DX IPv6 fresh install - AIO-DX swact - AIO-SX subcloud factory install - AIO-SX backup and restore Partial-Bug: 2116270 Change-Id: Ia41f948a5baf3aedff514dd74eeea95a068c044c Signed-off-by: Fabiano Correa Mercer <fabiano.correamercer@windriver.com>
This commit is contained in:
@@ -1,11 +1,13 @@
|
||||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
# Copyright (c) 2018-2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
|
||||
import ipaddress
|
||||
import sys
|
||||
import subprocess # nosec B404
|
||||
|
||||
import eventlet
|
||||
from oslo_config import cfg
|
||||
@@ -42,6 +44,69 @@ LOG = logging.getLogger(__name__)
|
||||
eventlet.monkey_patch(os=False)
|
||||
|
||||
|
||||
def _resolve_host_once(host, record_type):
|
||||
|
||||
try:
|
||||
result = subprocess.run( # nosec B603
|
||||
["/usr/bin/dig", "+short", record_type, host],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
timeout=5,
|
||||
check=False
|
||||
)
|
||||
except Exception as e:
|
||||
# Ignore IPv6 errors in IPv4 scenarios
|
||||
LOG.debug("Unexpected error resolving (%s) %s: %s", record_type, host, e)
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
LOG.warning(
|
||||
"dig error for (%s) %s (code=%d): %s",
|
||||
record_type,
|
||||
host,
|
||||
result.returncode,
|
||||
(result.stderr.strip() if result.stderr else "")
|
||||
)
|
||||
return None
|
||||
|
||||
for line in result.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
ipaddress.ip_address(line)
|
||||
return line
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _wait_for_host_dns_resolution(host, interval=1, retries=90):
|
||||
|
||||
# just execute the DNS resolution for FQDN
|
||||
# i.e: ( controller-0.internal )
|
||||
if not host.endswith(".internal"):
|
||||
return
|
||||
|
||||
LOG.info("Waiting for DNS resolution of %s (%d retries)...", host, retries)
|
||||
|
||||
for attempt in range(1, retries + 1):
|
||||
ip = _resolve_host_once(host, "A")
|
||||
if not ip:
|
||||
ip = _resolve_host_once(host, "AAAA")
|
||||
if ip:
|
||||
LOG.info("DNS resolved %s -> %s", host, ip)
|
||||
return
|
||||
|
||||
LOG.info("Attempt %d/%d failed to resolve %s", attempt, retries, host)
|
||||
|
||||
if attempt < retries:
|
||||
eventlet.sleep(interval)
|
||||
|
||||
LOG.warning("DNS did not resolve %s after %d retries", host, retries)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
config.init(sys.argv[1:])
|
||||
@@ -61,6 +126,7 @@ def main():
|
||||
|
||||
LOG.info("Server on http://%(host)s:%(port)s with %(workers)s",
|
||||
{'host': host, 'port': port, 'workers': workers})
|
||||
_wait_for_host_dns_resolution(host)
|
||||
systemd.notify_once()
|
||||
service = wsgi.Server(CONF, CONF.prog, application, host, port)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user