From 175f1e2d2e532dc0c4c76ee46695579fdc4eeb25 Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Tue, 26 Feb 2019 14:00:58 +0100 Subject: [PATCH] Add retry handling to MaaS OCF DNS API calls This patchset adds retries to the MaaS API calls with the the maas_dns.py script to allow Maas/PostGRES to 'settle' during failover situations. This should enable corosync to successfully update the DNS records by allowing extra time for MaaS to settle and to detect when the transient failures from the MaaS occur. Closes-Bug: #1817429 Change-Id: I5180508ba236c60b7b677e226914e7fcd13c3020 --- ocf/maas/maas_dns.py | 208 ++++++++++++++++++++++++++++++- ocf/maas/maasclient/__init__.py | 29 ++--- ocf/maas/maasclient/apidriver.py | 29 +++-- ocf/maas/maasclient/driver.py | 9 +- 4 files changed, 241 insertions(+), 34 deletions(-) diff --git a/ocf/maas/maas_dns.py b/ocf/maas/maas_dns.py index ca4d9a7..e7bc947 100755 --- a/ocf/maas/maas_dns.py +++ b/ocf/maas/maas_dns.py @@ -18,10 +18,156 @@ import argparse import requests_oauthlib import logging import sys +import time import maasclient +# Default MaaS API options +NUM_RETRIES = 5 +RETRY_BASE_DELAY = 10 +RETRY_CODES = [500] + +# the global options that is parsed from the arguments +options = None + + +class RetriesException(Exception): + pass + + +def retry_on_request_error(retries=3, base_delay=0, codes=None): + """Retry a function that retures a requests response. + + If the response from the target function has an error code in the + :param:`codes` list then retry the function up to :param:`retries`. The + :param:`base_delay`, if not zero, will progressively back off at + `base_delay`, `base_delay * 2`, `base_delay * 3` ... + + If the decorated function raises an exception, then the decorator DOESN'T + catch it, and this will bypass any retries. + + In order to enable the decorator to access command line arguments, each of + the arguments can optionally be a Callable that returns the value, which + will be evaluated when the function is called. + + :param retries: Number of attempts to run the decorated function. + :type retries: Option[int, Callable[..., int]] + :param base_delay: Back off time, which linearly increases by the number of + retries for each failed request. + :type base_delay: Option[int, Callable[..., int]] + :param codes: The codes to detect that force a retry that + response.status_code may contain. + :type codes: Option[List[int], Callable(..., List[int]] + :returns: decorated target function + :rtype: Callable + :raises: Exception, if the decorated function raises an exception + """ + if codes is None: + codes = [500] + + def inner1(f): + + def inner2(*args, **kwargs): + if callable(retries): + _retries = retries() + else: + _retries = retries + num_retries = _retries + if callable(base_delay): + _base_delay = base_delay() + else: + _base_delay = base_delay + if callable(codes): + _codes = codes() + else: + _codes = codes + multiplier = 1 + while True: + response = f(*args, **kwargs) + if response.status_code not in _codes: + return response + if _retries <= 0: + raise RetriesException( + "Command {} failed after {} retries" + .format(f.__name__, num_retries)) + delay = _base_delay * multiplier + multiplier += 1 + logging.debug( + "Retrying '{}' {} more times (delay={})" + .format(f.__name__, _retries, delay)) + _retries -= 1 + if delay: + time.sleep(delay) + + return inner2 + + return inner1 + + +def options_retries(): + """Returns options.maas_api_retries value + + It's used as a callable in the retry_on_request_error as follows: + + @retry_on_request_error(retries=options_retries, + base_delay=options_base_delay, + codes=options_codes) + def some_function_that_needs_retries_that_returns_Response(...): + pass + + :returns: options.maas_api_retries + :rtype: int + """ + global options + if options is not None: + return options.maas_api_retries + else: + return NUM_RETRIES + + +def options_base_delay(): + """Returns options.maas_base_delay + + It's used as a callable in the retry_on_request_error as follows: + + @retry_on_request_error(retries=options_retries, + base_delay=options_base_delay, + codes=options_codes) + def some_function_that_needs_retries_that_returns_Response(...): + pass + + :returns: options.maas_base_delay + :rtype: int + """ + global options + if options is not None: + return options.maas_base_delay + else: + return RETRY_BASE_DELAY + + +def options_codes(): + """Returns options.maas_retry_codes + + It's used as a callable in the retry_on_request_error as follows: + + @retry_on_request_error(retries=options_retries, + base_delay=options_base_delay, + codes=options_codes) + def some_function_that_needs_retries_that_returns_Response(...): + pass + + :returns: options.maas_retry_codes + :rtype: List[int] + """ + global options + if options is not None: + return options.maas_retry_codes + else: + return RETRY_CODES + + class MAASDNS(object): def __init__(self, options): self.maas = maasclient.MAASClient(options.maas_server, @@ -51,6 +197,9 @@ class MAASDNS(object): """ Get a dnsresource ID """ return self.dnsresource['id'] + @retry_on_request_error(retries=options_retries, + base_delay=options_base_delay, + codes=options_codes) def update_resource(self): """ Update a dnsresource record with an IP """ return self.maas.update_dnsresource(self.dnsresource['id'], @@ -82,7 +231,14 @@ class MAASDNS(object): 'address_ttl': self.ttl, 'ip_addresses': self.ip, } - return maas_session.post(dns_url, data=payload) + + @retry_on_request_error(retries=options_retries, + base_delay=options_base_delay, + codes=options_codes) + def inner_maas_session_post(session, dns_url, payload): + return session.post(dns_url, data=payload) + + return inner_maas_session_post(maas_session, dns_url, payload) class MAASIP(object): @@ -104,6 +260,9 @@ class MAASIP(object): self.ipaddress = ipaddress return self.ipaddress + @retry_on_request_error(retries=options_retries, + base_delay=options_base_delay, + codes=options_codes) def create_ipaddress(self, hostname=None): """ Create an ipaddresses object Due to https://bugs.launchpad.net/maas/+bug/1555393 @@ -159,6 +318,28 @@ def dns_ha(): ''.format(sys.argv[0] .split('/')[-1] .split('.')[0])) + parser.add_argument('--maas_api_retries', '-r', + help='The number of times to retry a MaaS API call', + type=int, + default=3) + parser.add_argument('--maas_base_delay', '-b', + help='The base delay after a failed MaaS API call', + type=int, + default=10) + + def read_int_list(s): + try: + return [int(x.strip()) for x in s.split(',')] + except TypeError: + msg = "Can't convert '{}' into a list of integers".format(s) + return argparse.ArgumentTypeError(msg) + + parser.add_argument('--maas_retry_codes', '-x', + help=('The codes to detect to auto-retry, as a ' + 'comma-separated list.'), + type=read_int_list, + default=[500]) + global options options = parser.parse_args() setup_logging(options.logfile) @@ -183,5 +364,28 @@ def dns_ha(): dns_obj.update_resource() +def main(): + """Entry point for the script. + + Runs dns_ha(), but wraps it with exception handling so that retries using + the MaaS API return 2 from the script, and all other errors return 1. + Otherwise the script returns 0 to indicate that it thinks it succeeded. + + :returns: return code for script + :rtype: int + """ + try: + dns_ha() + except RetriesException as e: + logging.error("'{}' failed retries: {}".format(sys.argv[0], str(e))) + return 1 + except Exception as e: + logging.error("'{}' failed due to: {}".format(sys.argv[0], str(e))) + import traceback + logging.error("Traceback:\n{}".format(traceback.format_exc())) + return 2 + return 0 + + if __name__ == '__main__': - dns_ha() + sys.exit(main()) diff --git a/ocf/maas/maasclient/__init__.py b/ocf/maas/maasclient/__init__.py index 70d017a..4eb0da1 100644 --- a/ocf/maas/maasclient/__init__.py +++ b/ocf/maas/maasclient/__init__.py @@ -57,7 +57,6 @@ class MAASClient(object): """ Get a listing of DNS resources which are currently defined. - :returns: a list of DNS objects DNS object is a dictionary of the form: {'fqdn': 'keystone.maas', 'resource_records': [], @@ -65,6 +64,9 @@ class MAASClient(object): 'resource_uri': '/MAAS/api/2.0/dnsresources/1/', 'ip_addresses': [], 'id': 1} + + :returns: a list of DNS objects + :rtype: List[Dict[str, Any]] """ resp = self.driver.get_dnsresources() if resp.ok: @@ -79,12 +81,10 @@ class MAASClient(object): /api/2.0/dnsresources/{dnsresource_id}/ :param fqdn: The fqdn address to update :param ip_address: The ip address to update the A record to point to - :returns: True if the DNS object was updated, False otherwise. + :returns: the response from the requests method + :rtype: maasclient.driver.Response """ - resp = self.driver.update_dnsresource(rid, fqdn, ip_address) - if resp.ok: - return True - return False + return self.driver.update_dnsresource(rid, fqdn, ip_address) def create_dnsresource(self, fqdn, ip_address, address_ttl=None): """ @@ -93,12 +93,10 @@ class MAASClient(object): :param fqdn: The fqdn address to update :param ip_address: The ip address to update the A record to point to :param adress_ttl: DNS time to live - :returns: True if the DNS object was updated, False otherwise. + :returns: the response from the requests method + :rtype: maasclient.driver.Response """ - resp = self.driver.create_dnsresource(fqdn, ip_address, address_ttl) - if resp.ok: - return True - return False + return self.driver.create_dnsresource(fqdn, ip_address, address_ttl) ########################################################################### # IP API - http://maas.ubuntu.com/docs2.0/api.html#ip-address @@ -108,6 +106,7 @@ class MAASClient(object): Get a list of ip addresses :returns: a list of ip address dictionaries + :rtype: List[str] """ resp = self.driver.get_ipaddresses() if resp.ok: @@ -120,9 +119,7 @@ class MAASClient(object): :param ip_address: The ip address to register :param hostname: the hostname to register at the same time - :returns: True if the DNS object was updated, False otherwise. + :returns: the response from the requests method + :rtype: maasclient.driver.Response """ - resp = self.driver.create_ipaddress(ip_address, hostname) - if resp.ok: - return True - return False + return self.driver.create_ipaddress(ip_address, hostname) diff --git a/ocf/maas/maasclient/apidriver.py b/ocf/maas/maasclient/apidriver.py index 5e16f48..9fdc317 100644 --- a/ocf/maas/maasclient/apidriver.py +++ b/ocf/maas/maasclient/apidriver.py @@ -89,10 +89,11 @@ class APIDriver(MAASDriver): log.debug("Request %s results: [%s] %s", path, response.getcode(), payload) - if response.getcode() == OK: - return Response(True, yaml.load(payload)) + code = response.getcode() + if code == OK: + return Response(True, yaml.load(payload), code) else: - return Response(False, payload) + return Response(False, payload, code) def _post(self, path, op='update', **kwargs): """ @@ -104,17 +105,18 @@ class APIDriver(MAASDriver): log.debug("Request %s results: [%s] %s", path, response.getcode(), payload) - if response.getcode() == OK: - return Response(True, yaml.load(payload)) + code = response.getcode() + if code == OK: + return Response(True, yaml.load(payload), code) else: - return Response(False, payload) + return Response(False, payload, code) except HTTPError as e: log.error("Error encountered: %s for %s with params %s", str(e), path, str(kwargs)) - return Response(False, None) + return Response(False, None, None) except Exception as e: log.error("Post request raised exception: %s", e) - return Response(False, None) + return Response(False, None, None) def _put(self, path, **kwargs): """ @@ -125,17 +127,18 @@ class APIDriver(MAASDriver): payload = response.read() log.debug("Request %s results: [%s] %s", path, response.getcode(), payload) - if response.getcode() == OK: - return Response(True, payload) + code = response.getcode() + if code == OK: + return Response(True, payload, code) else: - return Response(False, payload) + return Response(False, payload, code) except HTTPError as e: log.error("Error encountered: %s with details: %s for %s with " "params %s", e, e.read(), path, str(kwargs)) - return Response(False, None) + return Response(False, None, None) except Exception as e: log.error("Put request raised exception: %s", e) - return Response(False, None) + return Response(False, None, None) ########################################################################### # DNS API - http://maas.ubuntu.com/docs2.0/api.html#dnsresource diff --git a/ocf/maas/maasclient/driver.py b/ocf/maas/maasclient/driver.py index 9bcae78..7dc97eb 100644 --- a/ocf/maas/maasclient/driver.py +++ b/ocf/maas/maasclient/driver.py @@ -18,12 +18,15 @@ log = logging.getLogger('vmaas.main') class Response(object): + """Response for the API calls to use internally + + The status_code member is to make it look a bit like a requests Response + object so that it can be used in the retry decorator. """ - Response for the API calls to use internally - """ - def __init__(self, ok=False, data=None): + def __init__(self, ok=False, data=None, status_code=None): self.ok = ok self.data = data + self.status_code = status_code def __nonzero__(self): """Allow boolean comparison"""