From 582b4bab0ed0362a0e795590c806d930414acfba Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sun, 27 Sep 2020 12:32:58 -0700 Subject: [PATCH] Make /healthcheck cache results The healthcheck endpoint should cache results to reduce the potential load on the backend systems being tested. This patch adds the caching and a configuration setting for the interval between cache refreshes. Change-Id: Ic97a991437144f3a220d9b96839cec5b63565f8c Story: 2008203 Task: 40987 (cherry picked from commit 6c54eab5b5d8fd5300cd5d62e23e6cadc1641636) --- doc/source/admin/healthcheck.rst | 19 +++++++++++++++ etc/octavia.conf | 3 +++ .../api/healthcheck/healthcheck_plugins.py | 23 +++++++++++++++++-- octavia/common/config.py | 3 +++ .../tests/functional/api/test_healthcheck.py | 9 ++++++++ .../healthcheck-cache-641f0a64e6f5856c.yaml | 5 ++++ 6 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/healthcheck-cache-641f0a64e6f5856c.yaml diff --git a/doc/source/admin/healthcheck.rst b/doc/source/admin/healthcheck.rst index ebe8ac8377..e6a67905fb 100644 --- a/doc/source/admin/healthcheck.rst +++ b/doc/source/admin/healthcheck.rst @@ -527,6 +527,25 @@ You will then need to select the desired monitoring backend plugins: changes. Not only does it not run any tests, it will return 204 results instead of 200. +The Octavia API health monitoring endpoint does not require a keystone token +for access to allow external load balancers to query the endpoint. For this +reason we recommend you restrict access to it on your external load balancer +to prevent abuse. + +As an additional protection, the API will cache results for a configurable +period of time. This means that queries to the health monitoring endpoint +will return cached results until the refresh interval has expired, at which +point the health check plugin will rerun the check. + +By default, the refresh interval is five seconds. This can be configured by +adjusting the healthcheck_refresh_interval setting in the Octavia configuration +file: + +.. code-block:: ini + + [api_settings] + healthcheck_refresh_interval = 5 + Optionally you can enable the "detailed" mode in Oslo middleware healthcheck. This will cause Oslo middleware healthcheck to return additional information about the API instance. It will also provide exception details if one was diff --git a/etc/octavia.conf b/etc/octavia.conf index 1c8e9f7572..a3a7d3fff4 100644 --- a/etc/octavia.conf +++ b/etc/octavia.conf @@ -56,6 +56,9 @@ # Boolean to enable/disable oslo middleware /healthcheck in the Octavia API # healthcheck_enabled = False +# The interval healthcheck plugins should cache results, in seconds. +# healthcheck_refresh_interval = 5 + # Default cipher string for new TLS-terminated listeners # Cipher strings are in OpenSSL format, see https://www.openssl.org/docs/man1.1.1/man1/ciphers.html # This example is the "Broad Compatibility" cipher string from OWASP, diff --git a/octavia/api/healthcheck/healthcheck_plugins.py b/octavia/api/healthcheck/healthcheck_plugins.py index 29141c4db2..2992aff39b 100644 --- a/octavia/api/healthcheck/healthcheck_plugins.py +++ b/octavia/api/healthcheck/healthcheck_plugins.py @@ -11,23 +11,42 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +import datetime + +from oslo_config import cfg from oslo_middleware.healthcheck import pluginbase from octavia.db import api as db_apis from octavia.db import healthcheck +CONF = cfg.CONF + class OctaviaDBHealthcheck(pluginbase.HealthcheckBaseExtension): UNAVAILABLE_REASON = 'The Octavia database is unavailable.' + last_check = None + last_result = None + last_message = None + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def healthcheck(self, server_port): try: - result, message = healthcheck.check_database_connection( - db_apis.get_session()) + if (self.last_check is not None and + ((datetime.datetime.now() - + self.last_check).total_seconds()) < + CONF.api_settings.healthcheck_refresh_interval): + result = self.last_result + message = self.last_message + else: + result, message = healthcheck.check_database_connection( + db_apis.get_session()) + self.last_check = datetime.datetime.now() + self.last_result = result + self.last_message = message if result: return OctaviaDBCheckResult(available=True, reason="OK") else: diff --git a/octavia/common/config.py b/octavia/common/config.py index ae3c9794b8..fec0288729 100644 --- a/octavia/common/config.py +++ b/octavia/common/config.py @@ -105,6 +105,9 @@ api_opts = [ cfg.BoolOpt('healthcheck_enabled', default=False, help=_("When True, the oslo middleware healthcheck endpoint " "is enabled in the Octavia API.")), + cfg.IntOpt('healthcheck_refresh_interval', default=5, + help=_("The interval healthcheck plugins should cache results, " + "in seconds.")), cfg.StrOpt('default_listener_ciphers', default=constants.CIPHERS_OWASP_SUITE_B, help=_("Default OpenSSL cipher string (colon-separated) for " diff --git a/octavia/tests/functional/api/test_healthcheck.py b/octavia/tests/functional/api/test_healthcheck.py index 13ecafa5d4..cdf497132b 100644 --- a/octavia/tests/functional/api/test_healthcheck.py +++ b/octavia/tests/functional/api/test_healthcheck.py @@ -49,6 +49,7 @@ class TestHealthCheck(base_db_test.OctaviaDBTestBase): self.conf = self.useFixture(oslo_fixture.Config(cfg.CONF)) self.conf.config(group='healthcheck', backends=['octavia_db_check']) + self.conf.config(group='api_settings', healthcheck_refresh_interval=5) self.UNAVAILABLE = (healthcheck_plugins.OctaviaDBHealthcheck. UNAVAILABLE_REASON) @@ -145,6 +146,14 @@ class TestHealthCheck(base_db_test.OctaviaDBTestBase): self.assertIn('OK', response.text) self.assertIn('Garbage collector', response.text) + def test_healthcheck_get_text_cached(self): + self.conf.config(group='healthcheck', detailed=False) + app = self._get_enabled_app() + for i in range(10): + response = self._get(app, '/healthcheck') + self.assertEqual(200, response.status_code) + self.assertEqual('OK', response.text) + def test_healthcheck_disabled_get(self): self._get(self._get_disabled_app(), '/healthcheck', status=404) diff --git a/releasenotes/notes/healthcheck-cache-641f0a64e6f5856c.yaml b/releasenotes/notes/healthcheck-cache-641f0a64e6f5856c.yaml new file mode 100644 index 0000000000..767fe78156 --- /dev/null +++ b/releasenotes/notes/healthcheck-cache-641f0a64e6f5856c.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixed the healthcheck endpoint always querying the backends by caching + results for a configurable time. The default is five seconds.