Support several API and Inspector URLs

Allows nodes with a single IP stack to be deployed from a dual-stack
Ironic.

Detecting advertised address and usable Ironic URLs are done completely
independently which does open some space for a misconfiguration. I hope
it's not likely in the reality, especially since this feature is
targetting advanced standalone users.

Change-Id: Ifa506c58caebe00b37167d329b81c166cdb323f2
Closes-Bug: #2045548
This commit is contained in:
Dmitry Tantsur 2023-12-13 16:44:49 +01:00
parent 2bb74523ae
commit 0d4ae976c2
No known key found for this signature in database
GPG Key ID: 315B2AF9FD216C60
9 changed files with 208 additions and 75 deletions

View File

@ -154,10 +154,10 @@ class IronicPythonAgentHeartbeater(threading.Thread):
except Exception as exc:
if isinstance(exc, errors.HeartbeatConflictError):
LOG.warning('conflict error sending heartbeat to %s',
self.agent.api_url)
self.agent.api_urls)
else:
LOG.exception('error sending heartbeat to %s',
self.agent.api_url)
self.agent.api_urls)
self.interval = _with_jitter(self.min_heartbeat_interval,
self.min_error_jitter_multiplier,
self.max_error_jitter_multiplier)
@ -183,6 +183,23 @@ class IronicPythonAgentHeartbeater(threading.Thread):
class IronicPythonAgent(base.ExecuteCommandMixin):
"""Class for base agent functionality."""
@classmethod
def from_config(cls, conf):
return cls(conf.api_url,
Host(hostname=conf.advertise_host,
port=conf.advertise_port),
Host(hostname=conf.listen_host,
port=conf.listen_port),
conf.ip_lookup_attempts,
conf.ip_lookup_sleep,
conf.network_interface,
conf.lookup_timeout,
conf.lookup_interval,
False,
conf.agent_token,
conf.hardware_initialization_delay,
conf.advertise_protocol)
def __init__(self, api_url, advertise_address, listen_address,
ip_lookup_attempts, ip_lookup_sleep, network_interface,
lookup_timeout, lookup_interval, standalone, agent_token,
@ -192,12 +209,11 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
LOG.warning("Only one of 'keyfile' and 'certfile' options is "
"defined in config file. Its value will be ignored.")
self.ext_mgr = base.init_ext_manager(self)
self.api_url = api_url
if (not self.api_url or self.api_url == 'mdns') and not standalone:
if (not api_url or api_url == 'mdns') and not standalone:
try:
self.api_url, params = mdns.get_endpoint('baremetal')
api_url, params = mdns.get_endpoint('baremetal')
except lib_exc.ServiceLookupFailure:
if self.api_url:
if api_url:
# mDNS explicitly requested, report failure.
raise
else:
@ -207,9 +223,12 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
'will not heartbeat')
else:
config.override(params)
if self.api_url:
self.api_client = ironic_api_client.APIClient(self.api_url)
if api_url:
self.api_urls = list(filter(None, api_url.split(',')))
else:
self.api_urls = None
if self.api_urls:
self.api_client = ironic_api_client.APIClient(self.api_urls)
self.heartbeater = IronicPythonAgentHeartbeater(self)
self.listen_address = listen_address
self.advertise_address = advertise_address
@ -293,6 +312,25 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
return source
def _find_routable_addr(self):
ips = []
for api_url in self.api_urls:
ironic_host = urlparse.urlparse(api_url).hostname
# Try resolving it in case it's not an IP address
try:
ironic_host = socket.gethostbyname(ironic_host)
except socket.gaierror:
LOG.debug('Could not resolve %s, maybe no DNS', ironic_host)
ips.append(ironic_host)
for attempt in range(self.ip_lookup_attempts):
for ironic_host in ips:
found_ip = self._get_route_source(ironic_host)
if found_ip:
return found_ip
time.sleep(self.ip_lookup_sleep)
def set_agent_advertise_addr(self):
"""Set advertised IP address for the agent, if not already set.
@ -311,20 +349,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
found_ip = hardware.dispatch_to_managers('get_ipv4_addr',
self.network_interface)
else:
url = urlparse.urlparse(self.api_url)
ironic_host = url.hostname
# Try resolving it in case it's not an IP address
try:
ironic_host = socket.gethostbyname(ironic_host)
except socket.gaierror:
LOG.debug('Count not resolve %s, maybe no DNS', ironic_host)
for attempt in range(self.ip_lookup_attempts):
found_ip = self._get_route_source(ironic_host)
if found_ip:
break
time.sleep(self.ip_lookup_sleep)
found_ip = self._find_routable_addr()
if found_ip:
self.advertise_address = Host(hostname=found_ip,
@ -397,7 +422,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
LOG.debug('Automated TLS is disabled')
return None, None
if not self.api_url or not self.api_client.supports_auto_tls():
if not self.api_urls or not self.api_client.supports_auto_tls():
LOG.warning('Ironic does not support automated TLS')
return None, None
@ -415,7 +440,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
"""Serve the API until an extension terminates it."""
cert_file, key_file = self._start_auto_tls()
self.api.start(cert_file, key_file)
if not self.standalone and self.api_url:
if not self.standalone and self.api_urls:
# Don't start heartbeating until the server is listening
self.heartbeater.start()
try:
@ -509,7 +534,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
except errors.InspectionError as e:
LOG.error('Failed to perform inspection: %s', e)
if self.api_url:
if self.api_urls:
content = self.api_client.lookup_node(
hardware_info=hardware.list_hardware_info(use_cache=True),
timeout=self.lookup_timeout,
@ -534,5 +559,5 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
self.serve_ipa_api()
if not self.standalone and self.api_url:
if not self.standalone and self.api_urls:
self.heartbeater.stop()

View File

@ -47,17 +47,4 @@ def run():
logger.debug("Configuration:")
CONF.log_opt_values(logger, log.DEBUG)
utils.log_early_log_to_logger()
agent.IronicPythonAgent(CONF.api_url,
agent.Host(hostname=CONF.advertise_host,
port=CONF.advertise_port),
agent.Host(hostname=CONF.listen_host,
port=CONF.listen_port),
CONF.ip_lookup_attempts,
CONF.ip_lookup_sleep,
CONF.network_interface,
CONF.lookup_timeout,
CONF.lookup_interval,
False,
CONF.agent_token,
CONF.hardware_initialization_delay,
CONF.advertise_protocol).run()
agent.IronicPythonAgent.from_config(CONF).run()

View File

@ -30,11 +30,13 @@ cli_opts = [
cfg.StrOpt('api_url',
default=APARAMS.get('ipa-api-url'),
regex='^(mdns|http(s?):\\/\\/.+)',
help='URL of the Ironic API. '
help='URL(s) of the Ironic API. '
'Can be supplied as "ipa-api-url" kernel parameter.'
'The value must start with either http:// or https://. '
'The value(s) must start with either http:// or https://. '
'A special value "mdns" can be specified to fetch the '
'URL using multicast DNS service discovery.'),
'URL using multicast DNS service discovery. If several '
'URLs are provided, all of them are tried until one '
'does not return a connection error.'),
cfg.StrOpt('global_request_id',
default=APARAMS.get('ipa-global-request-id'),
@ -155,9 +157,8 @@ cli_opts = [
cfg.StrOpt('inspection_callback_url',
default=APARAMS.get('ipa-inspection-callback-url'),
help='Endpoint of ironic-inspector. If set, hardware inventory '
'will be collected and sent to ironic-inspector '
'on start up. '
help='Endpoint(s) to send inspection data to. If set, hardware '
'inventory will be collected and sent there on start up. '
'A special value "mdns" can be specified to fetch the '
'URL using multicast DNS service discovery. '
'Can be supplied as "ipa-inspection-callback-url" '

View File

@ -125,7 +125,6 @@ def call_inspector(data, failures):
"""Post data to inspector."""
data['error'] = failures.get_error()
LOG.info('posting collected data to %s', CONF.inspection_callback_url)
LOG.debug('collected data: %s',
{k: v for k, v in data.items() if k not in _NO_LOGGING_FIELDS})
@ -140,6 +139,8 @@ def call_inspector(data, failures):
if CONF.global_request_id:
headers["X-OpenStack-Request-ID"] = CONF.global_request_id
urls = list(filter(None, CONF.inspection_callback_url.split(',')))
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
(requests.exceptions.ConnectionError,
@ -149,9 +150,21 @@ def call_inspector(data, failures):
min=_RETRY_WAIT, max=_RETRY_WAIT_MAX),
reraise=True)
def _post_to_inspector():
inspector_resp = requests.post(
CONF.inspection_callback_url, data=data, headers=headers,
verify=verify, cert=cert, timeout=CONF.http_request_timeout)
for url in urls:
LOG.info('Posting collected data to %s', url)
try:
inspector_resp = requests.post(
url, data=data, headers=headers,
verify=verify, cert=cert,
timeout=CONF.http_request_timeout)
except requests.exceptions.ConnectionError as exc:
if url == urls[-1]:
raise
LOG.warning("Connection error when accessing %s, trying the "
"next URL. Error: %s", url, exc)
else:
break
if inspector_resp.status_code >= 500:
raise requests.exceptions.HTTPError(response=inspector_resp)

View File

@ -39,6 +39,12 @@ AGENT_VERIFY_CA_IRONIC_VERSION = (1, 68)
# versions to ensure that we send the highest version we know about.
MAX_KNOWN_VERSION = AGENT_VERIFY_CA_IRONIC_VERSION
CONNECT_EXCEPTIONS = (requests.exceptions.Timeout,
requests.exceptions.ConnectTimeout,
requests.exceptions.ConnectionError,
requests.exceptions.ReadTimeout,
requests.exceptions.HTTPError)
class APIClient(object):
api_version = 'v1'
@ -48,8 +54,10 @@ class APIClient(object):
agent_token = None
lookup_lock_pause = 0
def __init__(self, api_url):
self.api_url = api_url.rstrip('/')
def __init__(self, api_urls):
if isinstance(api_urls, str):
api_urls = [api_urls]
self.api_urls = [url.rstrip('/') for url in api_urls]
# Only keep alive a maximum of 2 connections to the API. More will be
# opened if they are needed, but they will be closed immediately after
@ -57,12 +65,12 @@ class APIClient(object):
adapter = requests.adapters.HTTPAdapter(pool_connections=2,
pool_maxsize=2)
self.session = requests.Session()
self.session.mount(self.api_url, adapter)
self.session.mount('https://', adapter)
self.session.mount('http://', adapter)
self.encoder = encoding.RESTJSONEncoder()
def _request(self, method, path, data=None, headers=None, **kwargs):
request_url = '{api_url}{path}'.format(api_url=self.api_url, path=path)
if data is not None:
data = self.encoder.encode(data)
@ -76,14 +84,27 @@ class APIClient(object):
headers["X-OpenStack-Request-ID"] = CONF.global_request_id
verify, cert = utils.get_ssl_client_options(CONF)
return self.session.request(method,
request_url,
headers=headers,
data=data,
verify=verify,
cert=cert,
timeout=CONF.http_request_timeout,
**kwargs)
for idx, api_url in enumerate(self.api_urls):
request_url = f'{api_url}{path}'
try:
resp = self.session.request(method,
request_url,
headers=headers,
data=data,
verify=verify,
cert=cert,
timeout=CONF.http_request_timeout,
**kwargs)
# Make sure the working URL is on the top, so that the next
# time we start from it. Also allows us to log self.api_urls[0]
# as the currently used URL.
self.api_urls = self.api_urls[idx:] + self.api_urls[:idx]
return resp
except CONNECT_EXCEPTIONS as exc:
if idx == len(self.api_urls) - 1:
raise
LOG.warning("Connection error when accessing %s, trying the "
"next URL. Error: %s", request_url, exc)
def _get_ironic_api_version_header(self, version=None):
if version is None:
@ -204,21 +225,18 @@ class APIClient(object):
params['node_uuid'] = node_uuid
LOG.debug('Looking up node with addresses %r and UUID %s at %s',
params['addresses'], node_uuid, self.api_url)
params['addresses'], node_uuid, self.api_urls)
try:
response = self._request(
'GET', self.lookup_api,
headers=self._get_ironic_api_version_header(),
params=params)
except (requests.exceptions.Timeout,
requests.exceptions.ConnectTimeout,
requests.exceptions.ConnectionError,
requests.exceptions.ReadTimeout,
requests.exceptions.HTTPError) as err:
except CONNECT_EXCEPTIONS as err:
# Report the last URL, there are warnings for the rest already
LOG.warning(
'Error detected while attempting to perform lookup '
'with %s, retrying. Error: %s', self.api_url, err
'with %s, retrying. Error: %s', self.api_urls[-1], err
)
return False
except Exception as err:
@ -229,7 +247,7 @@ class APIClient(object):
# To be clear, we're going to try to provide as much detail as
# possible in the exit handling
msg = ('Unhandled error looking up node with addresses {} at '
'{}: {}'.format(params['addresses'], self.api_url, err))
'{}: {}'.format(params['addresses'], self.api_urls, err))
# No matter what we do at this point, IPA is going to exit.
# This is because we don't know why the exception occurred and
# we likely should not try to retry as such.
@ -272,7 +290,7 @@ class APIClient(object):
LOG.warning(
'Failed looking up node with addresses %r at %s. '
'Check if inspection has completed? %s',
params['addresses'], self.api_url,
params['addresses'], self.api_urls[0],
self._error_from_response(response)
)
return False
@ -288,7 +306,7 @@ class APIClient(object):
LOG.warning(
'Got invalid node data in response to query for node '
'with addresses %r from %s: %s',
params['addresses'], self.api_url, content,
params['addresses'], self.api_urls[0], content,
)
return False

View File

@ -1065,7 +1065,7 @@ class TestAdvertiseAddress(ironic_agent_base.IronicAgentTest):
self.assertFalse(mock_gethostbyname.called)
def test_route_with_ip(self, mock_exec, mock_gethostbyname):
self.agent.api_url = 'http://1.2.1.2:8081/v1'
self.agent.api_urls = ['http://1.2.1.2:8081/v1']
mock_gethostbyname.side_effect = socket.gaierror()
mock_exec.return_value = (
"""1.2.1.2 via 192.168.122.1 dev eth0 src 192.168.122.56
@ -1081,7 +1081,7 @@ class TestAdvertiseAddress(ironic_agent_base.IronicAgentTest):
mock_gethostbyname.assert_called_once_with('1.2.1.2')
def test_route_with_ipv6(self, mock_exec, mock_gethostbyname):
self.agent.api_url = 'http://[fc00:1111::1]:8081/v1'
self.agent.api_urls = ['http://[fc00:1111::1]:8081/v1']
mock_gethostbyname.side_effect = socket.gaierror()
mock_exec.return_value = (
"""fc00:101::1 dev br-ctlplane src fc00:101::4 metric 0
@ -1137,6 +1137,46 @@ class TestAdvertiseAddress(ironic_agent_base.IronicAgentTest):
self.assertEqual(3, mock_exec.call_count)
self.assertEqual(2, mock_sleep.call_count)
@mock.patch.object(time, 'sleep', autospec=True)
def test_route_several_urls_and_retries(self, mock_sleep, mock_exec,
mock_gethostbyname):
mock_gethostbyname.side_effect = lambda x: x
self.agent.api_urls = ['http://[fc00:1111::1]:8081/v1',
'http://1.2.1.2:8081/v1']
mock_exec.side_effect = [
processutils.ProcessExecutionError('boom'),
(
"Error: some error text",
""
),
processutils.ProcessExecutionError('boom'),
(
"""1.2.1.2 via 192.168.122.1 dev eth0 src 192.168.122.56
cache """,
""
)
]
self.agent.set_agent_advertise_addr()
self.assertEqual(('192.168.122.56', 9990),
self.agent.advertise_address)
mock_exec.assert_has_calls([
mock.call('ip', 'route', 'get', 'fc00:1111::1'),
mock.call('ip', 'route', 'get', '1.2.1.2'),
mock.call('ip', 'route', 'get', 'fc00:1111::1'),
mock.call('ip', 'route', 'get', '1.2.1.2'),
])
mock_gethostbyname.assert_has_calls([
mock.call('fc00:1111::1'),
mock.call('1.2.1.2'),
])
mock_sleep.assert_called_with(10)
self.assertEqual(4, mock_exec.call_count)
# Both URLs are handled in a single attempt, so only one sleep here
self.assertEqual(1, mock_sleep.call_count)
self.assertEqual(2, mock_gethostbyname.call_count)
@mock.patch.object(time, 'sleep', autospec=True)
def test_route_failed(self, mock_sleep, mock_exec, mock_gethostbyname):
mock_gethostbyname.return_value = '1.2.1.2'
@ -1225,3 +1265,12 @@ class TestBaseAgentVMediaToken(ironic_agent_base.IronicAgentTest):
self.agent.heartbeater.start.assert_called_once_with()
self.assertEqual('1' * 128, self.agent.agent_token)
self.assertEqual('1' * 128, self.agent.api_client.agent_token)
class TestFromConfig(ironic_agent_base.IronicAgentTest):
def test_override_urls(self):
urls = ['http://[fc00:1111::1]:8081/v1', 'http://1.2.1.2:8081/v1']
CONF.set_override('api_url', ','.join(urls))
ag = agent.IronicPythonAgent.from_config(CONF)
self.assertEqual(urls, ag.api_urls)

View File

@ -209,6 +209,28 @@ class TestCallInspector(base.IronicAgentTest):
data, failures)
self.assertEqual(5, mock_post.call_count)
@mock.patch.object(inspector, '_RETRY_WAIT', 0.01)
@mock.patch.object(inspector, '_RETRY_WAIT_MAX', 1)
def test_inspector_several_urls(self, mock_post):
CONF.set_override('inspection_callback_url', 'url1,url2')
mock_post.side_effect = [
requests.exceptions.ConnectionError,
requests.exceptions.ConnectionError,
mock.Mock(status_code=200),
]
failures = utils.AccumulatedFailures()
data = collections.OrderedDict(data=42)
inspector.call_inspector(data, failures)
self.assertEqual(3, mock_post.call_count)
mock_post.assert_has_calls([
mock.call('url1', cert=None, verify=True, headers=mock.ANY,
data='{"data": 42, "error": null}', timeout=30),
mock.call('url2', cert=None, verify=True, headers=mock.ANY,
data='{"data": 42, "error": null}', timeout=30),
mock.call('url1', cert=None, verify=True, headers=mock.ANY,
data='{"data": 42, "error": null}', timeout=30),
])
@mock.patch.object(inspector, '_RETRY_WAIT', 0.01)
@mock.patch.object(inspector, '_RETRY_WAIT_MAX', 1)
@mock.patch.object(inspector, '_RETRY_ATTEMPTS', 3)

View File

@ -340,6 +340,16 @@ class TestBaseIronicPythonAgent(base.IronicAgentTest):
uuid='meow',
advertise_address=('192.0.2.1', '9999'))
def test_heartbeat_requests_several_urls(self):
self.api_client.api_urls = ['2001:db8::1', '192.0.2.1']
self.api_client.session.request = mock.Mock()
self.api_client.session.request.side_effect = [
requests.exceptions.ConnectionError,
FakeResponse(status_code=202),
]
self.api_client.heartbeat(uuid='meow',
advertise_address=('192.0.2.1', '9999'))
@mock.patch('time.sleep', autospec=True)
@mock.patch('ironic_python_agent.ironic_api_client.APIClient._do_lookup',
autospec=True)

View File

@ -0,0 +1,8 @@
---
features:
- |
Supports several comma-separated URLs for ``ipa-api-url`` and
``ipa-inspection-callback-url``. The URLs are probed in the provided
order until one does not return a connection error. The primary use case
it to support deploying nodes with only one IP stack from an Ironic
installation that has both stacks.