diff --git a/releasenotes/notes/datasource-query-retry-00cba5f7e68aec39.yaml b/releasenotes/notes/datasource-query-retry-00cba5f7e68aec39.yaml new file mode 100644 index 000000000..b4b5c622d --- /dev/null +++ b/releasenotes/notes/datasource-query-retry-00cba5f7e68aec39.yaml @@ -0,0 +1,17 @@ +--- +features: + - | + All datasources can now be configured to retry retrieving a metric upon + encountering an error. Between each attempt will be a set amount of time + which can be adjusted from the configuration. These configuration + options can be found in the `[watcher_datasources]` group and are named + `query_max_retries` and `query_timeout`. +upgrade: + - | + If Gnocchi was configured to have a custom amount of retries and or a + custom timeout then the configuration needs to moved into the + `[watcher_datasources]` group instead of the `[gnocchi_client]` group. +deprecations: + - | + The configuration options for query retries in `[gnocchi_client]` are + deprecated and the option in `[watcher_datasources]` should now be used. \ No newline at end of file diff --git a/watcher/conf/datasources.py b/watcher/conf/datasources.py index 6fcc5449e..21b6da0a7 100644 --- a/watcher/conf/datasources.py +++ b/watcher/conf/datasources.py @@ -34,7 +34,19 @@ DATASOURCES_OPTS = [ " the default for all strategies unless a strategy has a" " specific override.", item_type=cfg.types.String(choices=possible_datasources), - default=possible_datasources) + default=possible_datasources), + cfg.IntOpt('query_max_retries', + min=1, + default=10, + mutable=True, + help='How many times Watcher is trying to query again', + deprecated_group="gnocchi_client"), + cfg.IntOpt('query_timeout', + min=0, + default=1, + mutable=True, + help='How many seconds Watcher should wait to do query again', + deprecated_group="gnocchi_client") ] diff --git a/watcher/conf/gnocchi_client.py b/watcher/conf/gnocchi_client.py index 4987ef6e1..592ef7459 100644 --- a/watcher/conf/gnocchi_client.py +++ b/watcher/conf/gnocchi_client.py @@ -32,15 +32,8 @@ GNOCCHI_CLIENT_OPTS = [ 'The default is public.'), cfg.StrOpt('region_name', help='Region in Identity service catalog to use for ' - 'communication with the OpenStack service.'), - cfg.IntOpt('query_max_retries', - default=10, - mutable=True, - help='How many times Watcher is trying to query again'), - cfg.IntOpt('query_timeout', - default=1, - mutable=True, - help='How many seconds Watcher should wait to do query again')] + 'communication with the OpenStack service.') +] def register_opts(conf): diff --git a/watcher/datasources/base.py b/watcher/datasources/base.py index 0af04baf0..6656afef7 100644 --- a/watcher/datasources/base.py +++ b/watcher/datasources/base.py @@ -14,6 +14,15 @@ # limitations under the License. import abc +import time + +from oslo_config import cfg +from oslo_log import log + +from watcher.common import exception + +CONF = cfg.CONF +LOG = log.getLogger(__name__) class DataSourceBase(object): @@ -30,6 +39,9 @@ class DataSourceBase(object): """Possible options for the parameters named resource_type""" RESOURCE_TYPES = ['compute_node', 'instance', 'bare_metal', 'storage'] + """Each datasource should have a uniquely identifying name""" + NAME = '' + """Possible metrics a datasource can support and their internal name""" METRIC_MAP = dict(host_cpu_usage=None, host_ram_usage=None, @@ -44,8 +56,7 @@ class DataSourceBase(object): instance_root_disk_size=None, ) - @abc.abstractmethod - def query_retry(self, f, *args, **kargs): + def query_retry(self, f, *args, **kwargs): """Attempts to retrieve metrics from the external service Attempts to access data from the external service and handles @@ -53,9 +64,26 @@ class DataSourceBase(object): to the value of query_max_retries :param f: The method that performs the actual querying for metrics :param args: Array of arguments supplied to the method - :param kargs: The amount of arguments supplied to the method + :param kwargs: The amount of arguments supplied to the method :return: The value as retrieved from the external service """ + + num_retries = CONF.watcher_datasources.query_max_retries + timeout = CONF.watcher_datasources.query_timeout + for i in range(num_retries): + try: + return f(*args, **kwargs) + except Exception as e: + LOG.exception(e) + self.query_retry_reset(e) + LOG.warning("Retry {0} of {1} while retrieving metrics retry " + "in {2} seconds".format(i+1, num_retries, timeout)) + time.sleep(timeout) + raise exception.DataSourceNotAvailable(datasource=self.NAME) + + @abc.abstractmethod + def query_retry_reset(self, exception_instance): + """Abstract method to perform reset operations upon request failure""" pass @abc.abstractmethod diff --git a/watcher/datasources/ceilometer.py b/watcher/datasources/ceilometer.py index b5497aecc..913b4aff2 100644 --- a/watcher/datasources/ceilometer.py +++ b/watcher/datasources/ceilometer.py @@ -129,15 +129,10 @@ class CeilometerHelper(base.DataSourceBase): "value": end_timestamp}) return query - def query_retry(self, f, *args, **kargs): - try: - return f(*args, **kargs) - except exc.HTTPUnauthorized: + def query_retry_reset(self, exception_instance): + if isinstance(exception_instance, exc.HTTPUnauthorized): self.osc.reset_clients() self.ceilometer = self.osc.ceilometer() - return f(*args, **kargs) - except Exception: - raise def list_metrics(self): """List the user's meters.""" diff --git a/watcher/datasources/gnocchi.py b/watcher/datasources/gnocchi.py index c4ee110bf..89615403d 100644 --- a/watcher/datasources/gnocchi.py +++ b/watcher/datasources/gnocchi.py @@ -18,7 +18,6 @@ from datetime import datetime from datetime import timedelta -import time from oslo_config import cfg from oslo_log import log @@ -52,16 +51,6 @@ class GnocchiHelper(base.DataSourceBase): self.osc = osc if osc else clients.OpenStackClients() self.gnocchi = self.osc.gnocchi() - def query_retry(self, f, *args, **kwargs): - # TODO(Dantali0n) move gnocchi query_max_retries into general config - for i in range(CONF.gnocchi_client.query_max_retries): - try: - return f(*args, **kwargs) - except Exception as e: - LOG.exception(e) - time.sleep(CONF.gnocchi_client.query_timeout) - raise exception.DataSourceNotAvailable(datasource='gnocchi') - def check_availability(self): try: self.query_retry(self.gnocchi.status.get) diff --git a/watcher/datasources/monasca.py b/watcher/datasources/monasca.py index 5dbee611e..c4818d8c7 100644 --- a/watcher/datasources/monasca.py +++ b/watcher/datasources/monasca.py @@ -46,16 +46,6 @@ class MonascaHelper(base.DataSourceBase): self.osc = osc if osc else clients.OpenStackClients() self.monasca = self.osc.monasca() - def query_retry(self, f, *args, **kwargs): - try: - return f(*args, **kwargs) - except exc.Unauthorized: - self.osc.reset_clients() - self.monasca = self.osc.monasca() - return f(*args, **kwargs) - except Exception: - raise - def _format_time_params(self, start_time, end_time, period): """Format time-related params to the correct Monasca format @@ -77,6 +67,11 @@ class MonascaHelper(base.DataSourceBase): return start_timestamp, end_timestamp, period + def query_retry_reset(self, exception_instance): + if isinstance(exception_instance, exc.Unauthorized): + self.osc.reset_clients() + self.monasca = self.osc.monasca() + def check_availability(self): try: self.query_retry(self.monasca.metrics.list) diff --git a/watcher/tests/datasources/test_base.py b/watcher/tests/datasources/test_base.py new file mode 100644 index 000000000..78daa3f83 --- /dev/null +++ b/watcher/tests/datasources/test_base.py @@ -0,0 +1,69 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) 2019 European Organization for Nuclear Research (CERN) +# +# Authors: Corne Lukken +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +from oslo_config import cfg + +from watcher.common import exception +from watcher.datasources import base as datasource +from watcher.tests import base + +CONF = cfg.CONF + + +class TestBaseDatasourceHelper(base.BaseTestCase): + + def test_query_retry(self): + exc = Exception() + method = mock.Mock() + # first call will fail but second will succeed + method.side_effect = [exc, True] + # Max 2 attempts + CONF.set_override("query_max_retries", 2, + group='watcher_datasources') + # Reduce sleep time to 0 + CONF.set_override("query_timeout", 0, + group='watcher_datasources') + + helper = datasource.DataSourceBase() + helper.query_retry_reset = mock.Mock() + + self.assertTrue(helper.query_retry(f=method)) + helper.query_retry_reset.assert_called_once_with(exc) + + def test_query_retry_exception(self): + exc = Exception() + method = mock.Mock() + # only third call will succeed + method.side_effect = [exc, exc, True] + # Max 2 attempts + CONF.set_override("query_max_retries", 2, + group='watcher_datasources') + # Reduce sleep time to 0 + CONF.set_override("query_timeout", 0, + group='watcher_datasources') + + helper = datasource.DataSourceBase() + helper.query_retry_reset = mock.Mock() + + # Maximum number of retries exceeded query_retry should raise error + self.assertRaises(exception.DataSourceNotAvailable, + helper.query_retry, f=method) + # query_retry_reset should be called twice + helper.query_retry_reset.assert_has_calls( + [mock.call(exc), mock.call(exc)]) diff --git a/watcher/tests/datasources/test_gnocchi_helper.py b/watcher/tests/datasources/test_gnocchi_helper.py index 370a1cbf3..8e0949299 100644 --- a/watcher/tests/datasources/test_gnocchi_helper.py +++ b/watcher/tests/datasources/test_gnocchi_helper.py @@ -127,7 +127,7 @@ class TestGnocchiHelper(base.BaseTestCase): def test_gnocchi_check_availability_with_failure(self, mock_gnocchi): cfg.CONF.set_override("query_max_retries", 1, - group='gnocchi_client') + group='watcher_datasources') gnocchi = mock.MagicMock() gnocchi.status.get.side_effect = Exception() mock_gnocchi.return_value = gnocchi @@ -147,7 +147,7 @@ class TestGnocchiHelper(base.BaseTestCase): def test_gnocchi_list_metrics_with_failure(self, mock_gnocchi): cfg.CONF.set_override("query_max_retries", 1, - group='gnocchi_client') + group='watcher_datasources') gnocchi = mock.MagicMock() gnocchi.metric.list.side_effect = Exception() mock_gnocchi.return_value = gnocchi