From 0d4ae976c2ecf843ffc5fc9f76c98910a774708c Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Wed, 13 Dec 2023 16:44:49 +0100 Subject: [PATCH] Support several API and Inspector URLs Allows nodes with a single IP stack to be deployed from a dual-stack Ironic. Detecting advertised address and usable Ironic URLs are done completely independently which does open some space for a misconfiguration. I hope it's not likely in the reality, especially since this feature is targetting advanced standalone users. Change-Id: Ifa506c58caebe00b37167d329b81c166cdb323f2 Closes-Bug: #2045548 --- ironic_python_agent/agent.py | 79 ++++++++++++------- ironic_python_agent/cmd/agent.py | 15 +--- ironic_python_agent/config.py | 13 +-- ironic_python_agent/inspector.py | 21 ++++- ironic_python_agent/ironic_api_client.py | 62 +++++++++------ ironic_python_agent/tests/unit/test_agent.py | 53 ++++++++++++- .../tests/unit/test_inspector.py | 22 ++++++ .../tests/unit/test_ironic_api_client.py | 10 +++ .../notes/several-urls-9c3b8c14338b06ba.yaml | 8 ++ 9 files changed, 208 insertions(+), 75 deletions(-) create mode 100644 releasenotes/notes/several-urls-9c3b8c14338b06ba.yaml diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index a3032bd0f..d41a03b61 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -154,10 +154,10 @@ class IronicPythonAgentHeartbeater(threading.Thread): except Exception as exc: if isinstance(exc, errors.HeartbeatConflictError): LOG.warning('conflict error sending heartbeat to %s', - self.agent.api_url) + self.agent.api_urls) else: LOG.exception('error sending heartbeat to %s', - self.agent.api_url) + self.agent.api_urls) self.interval = _with_jitter(self.min_heartbeat_interval, self.min_error_jitter_multiplier, self.max_error_jitter_multiplier) @@ -183,6 +183,23 @@ class IronicPythonAgentHeartbeater(threading.Thread): class IronicPythonAgent(base.ExecuteCommandMixin): """Class for base agent functionality.""" + @classmethod + def from_config(cls, conf): + return cls(conf.api_url, + Host(hostname=conf.advertise_host, + port=conf.advertise_port), + Host(hostname=conf.listen_host, + port=conf.listen_port), + conf.ip_lookup_attempts, + conf.ip_lookup_sleep, + conf.network_interface, + conf.lookup_timeout, + conf.lookup_interval, + False, + conf.agent_token, + conf.hardware_initialization_delay, + conf.advertise_protocol) + def __init__(self, api_url, advertise_address, listen_address, ip_lookup_attempts, ip_lookup_sleep, network_interface, lookup_timeout, lookup_interval, standalone, agent_token, @@ -192,12 +209,11 @@ class IronicPythonAgent(base.ExecuteCommandMixin): LOG.warning("Only one of 'keyfile' and 'certfile' options is " "defined in config file. Its value will be ignored.") self.ext_mgr = base.init_ext_manager(self) - self.api_url = api_url - if (not self.api_url or self.api_url == 'mdns') and not standalone: + if (not api_url or api_url == 'mdns') and not standalone: try: - self.api_url, params = mdns.get_endpoint('baremetal') + api_url, params = mdns.get_endpoint('baremetal') except lib_exc.ServiceLookupFailure: - if self.api_url: + if api_url: # mDNS explicitly requested, report failure. raise else: @@ -207,9 +223,12 @@ class IronicPythonAgent(base.ExecuteCommandMixin): 'will not heartbeat') else: config.override(params) - - if self.api_url: - self.api_client = ironic_api_client.APIClient(self.api_url) + if api_url: + self.api_urls = list(filter(None, api_url.split(','))) + else: + self.api_urls = None + if self.api_urls: + self.api_client = ironic_api_client.APIClient(self.api_urls) self.heartbeater = IronicPythonAgentHeartbeater(self) self.listen_address = listen_address self.advertise_address = advertise_address @@ -293,6 +312,25 @@ class IronicPythonAgent(base.ExecuteCommandMixin): return source + def _find_routable_addr(self): + ips = [] + for api_url in self.api_urls: + ironic_host = urlparse.urlparse(api_url).hostname + # Try resolving it in case it's not an IP address + try: + ironic_host = socket.gethostbyname(ironic_host) + except socket.gaierror: + LOG.debug('Could not resolve %s, maybe no DNS', ironic_host) + ips.append(ironic_host) + + for attempt in range(self.ip_lookup_attempts): + for ironic_host in ips: + found_ip = self._get_route_source(ironic_host) + if found_ip: + return found_ip + + time.sleep(self.ip_lookup_sleep) + def set_agent_advertise_addr(self): """Set advertised IP address for the agent, if not already set. @@ -311,20 +349,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin): found_ip = hardware.dispatch_to_managers('get_ipv4_addr', self.network_interface) else: - url = urlparse.urlparse(self.api_url) - ironic_host = url.hostname - # Try resolving it in case it's not an IP address - try: - ironic_host = socket.gethostbyname(ironic_host) - except socket.gaierror: - LOG.debug('Count not resolve %s, maybe no DNS', ironic_host) - - for attempt in range(self.ip_lookup_attempts): - found_ip = self._get_route_source(ironic_host) - if found_ip: - break - - time.sleep(self.ip_lookup_sleep) + found_ip = self._find_routable_addr() if found_ip: self.advertise_address = Host(hostname=found_ip, @@ -397,7 +422,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin): LOG.debug('Automated TLS is disabled') return None, None - if not self.api_url or not self.api_client.supports_auto_tls(): + if not self.api_urls or not self.api_client.supports_auto_tls(): LOG.warning('Ironic does not support automated TLS') return None, None @@ -415,7 +440,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin): """Serve the API until an extension terminates it.""" cert_file, key_file = self._start_auto_tls() self.api.start(cert_file, key_file) - if not self.standalone and self.api_url: + if not self.standalone and self.api_urls: # Don't start heartbeating until the server is listening self.heartbeater.start() try: @@ -509,7 +534,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin): except errors.InspectionError as e: LOG.error('Failed to perform inspection: %s', e) - if self.api_url: + if self.api_urls: content = self.api_client.lookup_node( hardware_info=hardware.list_hardware_info(use_cache=True), timeout=self.lookup_timeout, @@ -534,5 +559,5 @@ class IronicPythonAgent(base.ExecuteCommandMixin): self.serve_ipa_api() - if not self.standalone and self.api_url: + if not self.standalone and self.api_urls: self.heartbeater.stop() diff --git a/ironic_python_agent/cmd/agent.py b/ironic_python_agent/cmd/agent.py index 9ebe30065..a3670becb 100644 --- a/ironic_python_agent/cmd/agent.py +++ b/ironic_python_agent/cmd/agent.py @@ -47,17 +47,4 @@ def run(): logger.debug("Configuration:") CONF.log_opt_values(logger, log.DEBUG) utils.log_early_log_to_logger() - agent.IronicPythonAgent(CONF.api_url, - agent.Host(hostname=CONF.advertise_host, - port=CONF.advertise_port), - agent.Host(hostname=CONF.listen_host, - port=CONF.listen_port), - CONF.ip_lookup_attempts, - CONF.ip_lookup_sleep, - CONF.network_interface, - CONF.lookup_timeout, - CONF.lookup_interval, - False, - CONF.agent_token, - CONF.hardware_initialization_delay, - CONF.advertise_protocol).run() + agent.IronicPythonAgent.from_config(CONF).run() diff --git a/ironic_python_agent/config.py b/ironic_python_agent/config.py index 383d290d1..35cde2729 100644 --- a/ironic_python_agent/config.py +++ b/ironic_python_agent/config.py @@ -30,11 +30,13 @@ cli_opts = [ cfg.StrOpt('api_url', default=APARAMS.get('ipa-api-url'), regex='^(mdns|http(s?):\\/\\/.+)', - help='URL of the Ironic API. ' + help='URL(s) of the Ironic API. ' 'Can be supplied as "ipa-api-url" kernel parameter.' - 'The value must start with either http:// or https://. ' + 'The value(s) must start with either http:// or https://. ' 'A special value "mdns" can be specified to fetch the ' - 'URL using multicast DNS service discovery.'), + 'URL using multicast DNS service discovery. If several ' + 'URLs are provided, all of them are tried until one ' + 'does not return a connection error.'), cfg.StrOpt('global_request_id', default=APARAMS.get('ipa-global-request-id'), @@ -155,9 +157,8 @@ cli_opts = [ cfg.StrOpt('inspection_callback_url', default=APARAMS.get('ipa-inspection-callback-url'), - help='Endpoint of ironic-inspector. If set, hardware inventory ' - 'will be collected and sent to ironic-inspector ' - 'on start up. ' + help='Endpoint(s) to send inspection data to. If set, hardware ' + 'inventory will be collected and sent there on start up. ' 'A special value "mdns" can be specified to fetch the ' 'URL using multicast DNS service discovery. ' 'Can be supplied as "ipa-inspection-callback-url" ' diff --git a/ironic_python_agent/inspector.py b/ironic_python_agent/inspector.py index e95d38211..7d9dc0733 100644 --- a/ironic_python_agent/inspector.py +++ b/ironic_python_agent/inspector.py @@ -125,7 +125,6 @@ def call_inspector(data, failures): """Post data to inspector.""" data['error'] = failures.get_error() - LOG.info('posting collected data to %s', CONF.inspection_callback_url) LOG.debug('collected data: %s', {k: v for k, v in data.items() if k not in _NO_LOGGING_FIELDS}) @@ -140,6 +139,8 @@ def call_inspector(data, failures): if CONF.global_request_id: headers["X-OpenStack-Request-ID"] = CONF.global_request_id + urls = list(filter(None, CONF.inspection_callback_url.split(','))) + @tenacity.retry( retry=tenacity.retry_if_exception_type( (requests.exceptions.ConnectionError, @@ -149,9 +150,21 @@ def call_inspector(data, failures): min=_RETRY_WAIT, max=_RETRY_WAIT_MAX), reraise=True) def _post_to_inspector(): - inspector_resp = requests.post( - CONF.inspection_callback_url, data=data, headers=headers, - verify=verify, cert=cert, timeout=CONF.http_request_timeout) + for url in urls: + LOG.info('Posting collected data to %s', url) + try: + inspector_resp = requests.post( + url, data=data, headers=headers, + verify=verify, cert=cert, + timeout=CONF.http_request_timeout) + except requests.exceptions.ConnectionError as exc: + if url == urls[-1]: + raise + LOG.warning("Connection error when accessing %s, trying the " + "next URL. Error: %s", url, exc) + else: + break + if inspector_resp.status_code >= 500: raise requests.exceptions.HTTPError(response=inspector_resp) diff --git a/ironic_python_agent/ironic_api_client.py b/ironic_python_agent/ironic_api_client.py index 614fa20c5..2959c0b09 100644 --- a/ironic_python_agent/ironic_api_client.py +++ b/ironic_python_agent/ironic_api_client.py @@ -39,6 +39,12 @@ AGENT_VERIFY_CA_IRONIC_VERSION = (1, 68) # versions to ensure that we send the highest version we know about. MAX_KNOWN_VERSION = AGENT_VERIFY_CA_IRONIC_VERSION +CONNECT_EXCEPTIONS = (requests.exceptions.Timeout, + requests.exceptions.ConnectTimeout, + requests.exceptions.ConnectionError, + requests.exceptions.ReadTimeout, + requests.exceptions.HTTPError) + class APIClient(object): api_version = 'v1' @@ -48,8 +54,10 @@ class APIClient(object): agent_token = None lookup_lock_pause = 0 - def __init__(self, api_url): - self.api_url = api_url.rstrip('/') + def __init__(self, api_urls): + if isinstance(api_urls, str): + api_urls = [api_urls] + self.api_urls = [url.rstrip('/') for url in api_urls] # Only keep alive a maximum of 2 connections to the API. More will be # opened if they are needed, but they will be closed immediately after @@ -57,12 +65,12 @@ class APIClient(object): adapter = requests.adapters.HTTPAdapter(pool_connections=2, pool_maxsize=2) self.session = requests.Session() - self.session.mount(self.api_url, adapter) + self.session.mount('https://', adapter) + self.session.mount('http://', adapter) self.encoder = encoding.RESTJSONEncoder() def _request(self, method, path, data=None, headers=None, **kwargs): - request_url = '{api_url}{path}'.format(api_url=self.api_url, path=path) if data is not None: data = self.encoder.encode(data) @@ -76,14 +84,27 @@ class APIClient(object): headers["X-OpenStack-Request-ID"] = CONF.global_request_id verify, cert = utils.get_ssl_client_options(CONF) - return self.session.request(method, - request_url, - headers=headers, - data=data, - verify=verify, - cert=cert, - timeout=CONF.http_request_timeout, - **kwargs) + for idx, api_url in enumerate(self.api_urls): + request_url = f'{api_url}{path}' + try: + resp = self.session.request(method, + request_url, + headers=headers, + data=data, + verify=verify, + cert=cert, + timeout=CONF.http_request_timeout, + **kwargs) + # Make sure the working URL is on the top, so that the next + # time we start from it. Also allows us to log self.api_urls[0] + # as the currently used URL. + self.api_urls = self.api_urls[idx:] + self.api_urls[:idx] + return resp + except CONNECT_EXCEPTIONS as exc: + if idx == len(self.api_urls) - 1: + raise + LOG.warning("Connection error when accessing %s, trying the " + "next URL. Error: %s", request_url, exc) def _get_ironic_api_version_header(self, version=None): if version is None: @@ -204,21 +225,18 @@ class APIClient(object): params['node_uuid'] = node_uuid LOG.debug('Looking up node with addresses %r and UUID %s at %s', - params['addresses'], node_uuid, self.api_url) + params['addresses'], node_uuid, self.api_urls) try: response = self._request( 'GET', self.lookup_api, headers=self._get_ironic_api_version_header(), params=params) - except (requests.exceptions.Timeout, - requests.exceptions.ConnectTimeout, - requests.exceptions.ConnectionError, - requests.exceptions.ReadTimeout, - requests.exceptions.HTTPError) as err: + except CONNECT_EXCEPTIONS as err: + # Report the last URL, there are warnings for the rest already LOG.warning( 'Error detected while attempting to perform lookup ' - 'with %s, retrying. Error: %s', self.api_url, err + 'with %s, retrying. Error: %s', self.api_urls[-1], err ) return False except Exception as err: @@ -229,7 +247,7 @@ class APIClient(object): # To be clear, we're going to try to provide as much detail as # possible in the exit handling msg = ('Unhandled error looking up node with addresses {} at ' - '{}: {}'.format(params['addresses'], self.api_url, err)) + '{}: {}'.format(params['addresses'], self.api_urls, err)) # No matter what we do at this point, IPA is going to exit. # This is because we don't know why the exception occurred and # we likely should not try to retry as such. @@ -272,7 +290,7 @@ class APIClient(object): LOG.warning( 'Failed looking up node with addresses %r at %s. ' 'Check if inspection has completed? %s', - params['addresses'], self.api_url, + params['addresses'], self.api_urls[0], self._error_from_response(response) ) return False @@ -288,7 +306,7 @@ class APIClient(object): LOG.warning( 'Got invalid node data in response to query for node ' 'with addresses %r from %s: %s', - params['addresses'], self.api_url, content, + params['addresses'], self.api_urls[0], content, ) return False diff --git a/ironic_python_agent/tests/unit/test_agent.py b/ironic_python_agent/tests/unit/test_agent.py index 400a36eb5..0a3daab31 100644 --- a/ironic_python_agent/tests/unit/test_agent.py +++ b/ironic_python_agent/tests/unit/test_agent.py @@ -1065,7 +1065,7 @@ class TestAdvertiseAddress(ironic_agent_base.IronicAgentTest): self.assertFalse(mock_gethostbyname.called) def test_route_with_ip(self, mock_exec, mock_gethostbyname): - self.agent.api_url = 'http://1.2.1.2:8081/v1' + self.agent.api_urls = ['http://1.2.1.2:8081/v1'] mock_gethostbyname.side_effect = socket.gaierror() mock_exec.return_value = ( """1.2.1.2 via 192.168.122.1 dev eth0 src 192.168.122.56 @@ -1081,7 +1081,7 @@ class TestAdvertiseAddress(ironic_agent_base.IronicAgentTest): mock_gethostbyname.assert_called_once_with('1.2.1.2') def test_route_with_ipv6(self, mock_exec, mock_gethostbyname): - self.agent.api_url = 'http://[fc00:1111::1]:8081/v1' + self.agent.api_urls = ['http://[fc00:1111::1]:8081/v1'] mock_gethostbyname.side_effect = socket.gaierror() mock_exec.return_value = ( """fc00:101::1 dev br-ctlplane src fc00:101::4 metric 0 @@ -1137,6 +1137,46 @@ class TestAdvertiseAddress(ironic_agent_base.IronicAgentTest): self.assertEqual(3, mock_exec.call_count) self.assertEqual(2, mock_sleep.call_count) + @mock.patch.object(time, 'sleep', autospec=True) + def test_route_several_urls_and_retries(self, mock_sleep, mock_exec, + mock_gethostbyname): + mock_gethostbyname.side_effect = lambda x: x + self.agent.api_urls = ['http://[fc00:1111::1]:8081/v1', + 'http://1.2.1.2:8081/v1'] + mock_exec.side_effect = [ + processutils.ProcessExecutionError('boom'), + ( + "Error: some error text", + "" + ), + processutils.ProcessExecutionError('boom'), + ( + """1.2.1.2 via 192.168.122.1 dev eth0 src 192.168.122.56 + cache """, + "" + ) + ] + + self.agent.set_agent_advertise_addr() + + self.assertEqual(('192.168.122.56', 9990), + self.agent.advertise_address) + mock_exec.assert_has_calls([ + mock.call('ip', 'route', 'get', 'fc00:1111::1'), + mock.call('ip', 'route', 'get', '1.2.1.2'), + mock.call('ip', 'route', 'get', 'fc00:1111::1'), + mock.call('ip', 'route', 'get', '1.2.1.2'), + ]) + mock_gethostbyname.assert_has_calls([ + mock.call('fc00:1111::1'), + mock.call('1.2.1.2'), + ]) + mock_sleep.assert_called_with(10) + self.assertEqual(4, mock_exec.call_count) + # Both URLs are handled in a single attempt, so only one sleep here + self.assertEqual(1, mock_sleep.call_count) + self.assertEqual(2, mock_gethostbyname.call_count) + @mock.patch.object(time, 'sleep', autospec=True) def test_route_failed(self, mock_sleep, mock_exec, mock_gethostbyname): mock_gethostbyname.return_value = '1.2.1.2' @@ -1225,3 +1265,12 @@ class TestBaseAgentVMediaToken(ironic_agent_base.IronicAgentTest): self.agent.heartbeater.start.assert_called_once_with() self.assertEqual('1' * 128, self.agent.agent_token) self.assertEqual('1' * 128, self.agent.api_client.agent_token) + + +class TestFromConfig(ironic_agent_base.IronicAgentTest): + + def test_override_urls(self): + urls = ['http://[fc00:1111::1]:8081/v1', 'http://1.2.1.2:8081/v1'] + CONF.set_override('api_url', ','.join(urls)) + ag = agent.IronicPythonAgent.from_config(CONF) + self.assertEqual(urls, ag.api_urls) diff --git a/ironic_python_agent/tests/unit/test_inspector.py b/ironic_python_agent/tests/unit/test_inspector.py index be98d432c..5f05fed79 100644 --- a/ironic_python_agent/tests/unit/test_inspector.py +++ b/ironic_python_agent/tests/unit/test_inspector.py @@ -209,6 +209,28 @@ class TestCallInspector(base.IronicAgentTest): data, failures) self.assertEqual(5, mock_post.call_count) + @mock.patch.object(inspector, '_RETRY_WAIT', 0.01) + @mock.patch.object(inspector, '_RETRY_WAIT_MAX', 1) + def test_inspector_several_urls(self, mock_post): + CONF.set_override('inspection_callback_url', 'url1,url2') + mock_post.side_effect = [ + requests.exceptions.ConnectionError, + requests.exceptions.ConnectionError, + mock.Mock(status_code=200), + ] + failures = utils.AccumulatedFailures() + data = collections.OrderedDict(data=42) + inspector.call_inspector(data, failures) + self.assertEqual(3, mock_post.call_count) + mock_post.assert_has_calls([ + mock.call('url1', cert=None, verify=True, headers=mock.ANY, + data='{"data": 42, "error": null}', timeout=30), + mock.call('url2', cert=None, verify=True, headers=mock.ANY, + data='{"data": 42, "error": null}', timeout=30), + mock.call('url1', cert=None, verify=True, headers=mock.ANY, + data='{"data": 42, "error": null}', timeout=30), + ]) + @mock.patch.object(inspector, '_RETRY_WAIT', 0.01) @mock.patch.object(inspector, '_RETRY_WAIT_MAX', 1) @mock.patch.object(inspector, '_RETRY_ATTEMPTS', 3) diff --git a/ironic_python_agent/tests/unit/test_ironic_api_client.py b/ironic_python_agent/tests/unit/test_ironic_api_client.py index 6c101acc7..bfe47dd15 100644 --- a/ironic_python_agent/tests/unit/test_ironic_api_client.py +++ b/ironic_python_agent/tests/unit/test_ironic_api_client.py @@ -340,6 +340,16 @@ class TestBaseIronicPythonAgent(base.IronicAgentTest): uuid='meow', advertise_address=('192.0.2.1', '9999')) + def test_heartbeat_requests_several_urls(self): + self.api_client.api_urls = ['2001:db8::1', '192.0.2.1'] + self.api_client.session.request = mock.Mock() + self.api_client.session.request.side_effect = [ + requests.exceptions.ConnectionError, + FakeResponse(status_code=202), + ] + self.api_client.heartbeat(uuid='meow', + advertise_address=('192.0.2.1', '9999')) + @mock.patch('time.sleep', autospec=True) @mock.patch('ironic_python_agent.ironic_api_client.APIClient._do_lookup', autospec=True) diff --git a/releasenotes/notes/several-urls-9c3b8c14338b06ba.yaml b/releasenotes/notes/several-urls-9c3b8c14338b06ba.yaml new file mode 100644 index 000000000..3dc69f869 --- /dev/null +++ b/releasenotes/notes/several-urls-9c3b8c14338b06ba.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Supports several comma-separated URLs for ``ipa-api-url`` and + ``ipa-inspection-callback-url``. The URLs are probed in the provided + order until one does not return a connection error. The primary use case + it to support deploying nodes with only one IP stack from an Ironic + installation that has both stacks.