diff --git a/devstack/lib/kuryr_kubernetes b/devstack/lib/kuryr_kubernetes index d216a45ab..59dc68dc2 100644 --- a/devstack/lib/kuryr_kubernetes +++ b/devstack/lib/kuryr_kubernetes @@ -406,7 +406,7 @@ EOF function generate_controller_deployment() { output_dir=$1 - readiness_probe=${2:-False} + health_server_port=$2 mkdir -p "$output_dir" rm -f ${output_dir}/controller_deployment.yml cat >> "${output_dir}/controller_deployment.yml" << EOF @@ -437,19 +437,13 @@ spec: - name: config-volume mountPath: "/etc/kuryr/kuryr.conf" subPath: kuryr.conf -EOF - # Add readiness probe if ports pool functionality is enabled. The rationale - # behind is to make the controller not ready until the precreated ports are - # loaded into the pools - if [ "$readiness_probe" == "True" ]; then - cat >> "${output_dir}/controller_deployment.yml" << EOF readinessProbe: - exec: - command: - - cat - - /tmp/pools_loaded + httpGet: + path: /healthz + port: ${health_server_port} + scheme: HTTP + timeoutSeconds: 5 EOF - fi cat >> "${output_dir}/controller_deployment.yml" << EOF volumes: diff --git a/devstack/plugin.sh b/devstack/plugin.sh index c80d62e43..afdc9db55 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -119,8 +119,7 @@ function generate_containerized_kuryr_resources { local output_dir="${DATA_DIR}/kuryr-kubernetes" generate_kuryr_configmap $output_dir $KURYR_CONFIG $KURYR_CNI_CONFIG generate_kuryr_service_account $output_dir - KURYR_USE_PORTS_POOLS=$(trueorfalse False KURYR_USE_PORTS_POOLS) - generate_controller_deployment $output_dir $KURYR_USE_PORTS_POOLS + generate_controller_deployment $output_dir $KURYR_HEALTH_SERVER_PORT generate_cni_daemon_set $output_dir $CNI_BIN_DIR $CNI_CONF_DIR } diff --git a/devstack/settings b/devstack/settings index f402fb5d5..7555511c7 100644 --- a/devstack/settings +++ b/devstack/settings @@ -73,3 +73,6 @@ KURYR_VIF_POOL_UPDATE_FREQ=${KURYR_VIF_POOL_UPDATE_FREQ:-20} # Kuryr VIF Pool Manager KURYR_VIF_POOL_MANAGER=${KURYR_VIF_POOL_MANAGER:-False} + +# Health Server +KURYR_HEALTH_SERVER_PORT=${KURYR_HEALTH_SERVER_PORT:-8082} diff --git a/doc/source/devref/health_manager.rst b/doc/source/devref/health_manager.rst new file mode 100644 index 000000000..5804757dd --- /dev/null +++ b/doc/source/devref/health_manager.rst @@ -0,0 +1,52 @@ +.. + This work is licensed under a Creative Commons Attribution 3.0 Unported + License. + + http://creativecommons.org/licenses/by/3.0/legalcode + + Convention for heading levels in Neutron devref: + ======= Heading 0 (reserved for the title in a document) + ------- Heading 1 + ~~~~~~~ Heading 2 + +++++++ Heading 3 + ''''''' Heading 4 + (Avoid deeper levels because they do not render well.) + +======================================== +Kuryr Kubernetes Health Manager Design +======================================== + + +Purpose +------- +The purpose of this document is to present the design decision behind +Kuryr Kubernetes Health Manager. + +The main purpose of the Health Manager is to perform Health verifications +that assures Kuryr Controller readiness and so improve the management that +Kubernetes does on Kuryr Controller pod. + +Overview +-------- + +Kuryr Controller might get to a broken state due to problems like: +unable to connect with services it depends on and they being not healthy. + +It is important to check health of these services so that Kubernetes and +its users know when Kuryr Controller it is ready to perform its networking +tasks. To provide this functionality, Health Manager will verify and serve +the health state of these services to the probe. + +Proposed Solution +----------------- +The Health Manager will provide an endpoint that will check whether it is +able to watch the Kubernetes API, authenticate with Keystone and talk to +Neutron, since these are services needed by Kuryr Controller. These checks +will assure the Controller readiness. + +The idea behind the Manager is to combine all the necessary checks in a +server running inside Kuryr Controller pod and provide the checks result +to the probe. + +This design focuses on providing health checks for readiness probe, but +another endpoint can be created for liveness probes. diff --git a/doc/source/devref/index.rst b/doc/source/devref/index.rst index 7478157ab..1dcdec941 100644 --- a/doc/source/devref/index.rst +++ b/doc/source/devref/index.rst @@ -39,7 +39,7 @@ Design documents service_support port_manager vif_handler_drivers_design - + health_manager Indices and tables ------------------ diff --git a/kuryr_kubernetes/controller/managers/health.py b/kuryr_kubernetes/controller/managers/health.py new file mode 100644 index 000000000..dbb2ae538 --- /dev/null +++ b/kuryr_kubernetes/controller/managers/health.py @@ -0,0 +1,127 @@ +# Copyright 2018 Maysa de Macedo Souza. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import eventlet +from flask import Flask +from keystoneauth1 import exceptions as k_exc +from keystoneclient import client as keystone_client +from kuryr.lib._i18n import _ +from kuryr.lib import config as kuryr_config +from kuryr.lib import utils +import os +from oslo_config import cfg +from oslo_log import log as logging +import requests +from six.moves import http_client as httplib + +LOG = logging.getLogger(__name__) +CONF = cfg.CONF + +health_server_opts = [ + cfg.IntOpt('port', + help=_('port for Health HTTP Server.'), + default=8082), +] + +CONF.register_opts(health_server_opts, "health_server") + + +class HealthServer(object): + + def __init__(self): + self.ctx = None + + self.application = Flask('health-daemon') + self.application.add_url_rule( + '/healthz', methods=['GET'], view_func=self.read) + self.headers = {'Connection': 'close'} + + def read(self): + data = 'ok' + + if CONF.kubernetes.vif_pool_driver != 'noop': + if not os.path.exists('/tmp/pools_loaded'): + error_message = 'Ports not loaded into the pools.' + LOG.error(error_message) + return error_message, httplib.NOT_FOUND, self.headers + + k8s_conn, status = self.verify_k8s_connection() + if not k8s_conn: + error_message = 'Error when processing k8s healthz request.' + LOG.error(error_message) + return error_message, status, self.headers + try: + self.verify_keystone_connection() + except k_exc.http.HttpError as h_ex: + error_message = 'Error when processing Keystone request %s.' % h_ex + LOG.exception(error_message) + return error_message, h_ex.http_status, self.headers + except Exception as ex: + error_message = 'Error when creating a Keystone client: %s.' % ex + LOG.exception(error_message) + return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers + try: + self.verify_neutron_connection() + except Exception as ex: + error_message = 'Error when creating a Neutron client: %s.' % ex + LOG.exception(error_message) + return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers + + LOG.info('Kuryr Controller readiness verified.') + return data, httplib.OK, self.headers + + def run(self): + address = '' + try: + LOG.info('Starting health check server.') + self.application.run(address, CONF.health_server.port) + except Exception: + LOG.exception('Failed to start health check server.') + raise + + def verify_k8s_connection(self): + path = '/healthz' + address = CONF.kubernetes.api_root + url = address + path + resp = requests.get(url, headers={'Connection': 'close'}) + return resp.content == 'ok', resp.status_code + + def verify_keystone_connection(self): + conf_group = kuryr_config.neutron_group.name + auth_plugin = utils.get_auth_plugin(conf_group) + sess = utils.get_keystone_session(conf_group, auth_plugin) + endpoint_type = getattr(getattr(cfg.CONF, conf_group), 'endpoint_type') + ks = keystone_client.Client(session=sess, auth=auth_plugin, + endpoint_type=endpoint_type) + ks.projects.list() + + def verify_neutron_connection(self): + neutron = utils.get_neutron_client() + neutron.list_extensions() + + +class ReadinessChecker(object): + """Proxy server used by readiness probe to manage health checks. + + Allows to verify connectivity with Kubernetes API, Keystone and Neutron. + Also, if pool ports functionality is enabled it is verified whether + the precreated ports are loaded into the pools. + """ + + def __init__(self): + eventlet.spawn(self._start_readiness_checker_daemon) + + def _start_readiness_checker_daemon(self): + server = HealthServer() + server.run() diff --git a/kuryr_kubernetes/controller/service.py b/kuryr_kubernetes/controller/service.py index e9dd8e063..9dba2454e 100644 --- a/kuryr_kubernetes/controller/service.py +++ b/kuryr_kubernetes/controller/service.py @@ -25,6 +25,7 @@ from kuryr_kubernetes import constants from kuryr_kubernetes.controller.handlers import lbaas as h_lbaas from kuryr_kubernetes.controller.handlers import pipeline as h_pipeline from kuryr_kubernetes.controller.handlers import vif as h_vif +from kuryr_kubernetes.controller.managers import health from kuryr_kubernetes import objects from kuryr_kubernetes import watcher @@ -49,6 +50,7 @@ class KuryrK8sService(service.Service): def start(self): LOG.info("Service '%s' starting", self.__class__.__name__) + health.ReadinessChecker() super(KuryrK8sService, self).start() self.watcher.start() LOG.info("Service '%s' started", self.__class__.__name__) diff --git a/kuryr_kubernetes/tests/unit/controller/managers/test_health.py b/kuryr_kubernetes/tests/unit/controller/managers/test_health.py new file mode 100644 index 000000000..5375c24d0 --- /dev/null +++ b/kuryr_kubernetes/tests/unit/controller/managers/test_health.py @@ -0,0 +1,98 @@ +# Copyright 2018 Maysa de Macedo Souza. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keystoneauth1 import exceptions +from kuryr_kubernetes.controller.managers import health +from kuryr_kubernetes.tests import base +import mock +from oslo_config import cfg as oslo_cfg + + +class TestHealthServer(base.TestCase): + + def setUp(self): + super(TestHealthServer, self).setUp() + self.srv = health.HealthServer() + self.srv.application.testing = True + self.test_client = self.srv.application.test_client() + + @mock.patch('os.path.exists') + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_neutron_connection') + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_keystone_connection') + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_k8s_connection') + def test_read(self, m_verify_k8s_conn, m_verify_keystone_conn, + m_verify_neutron_conn, m_exist): + m_verify_k8s_conn.return_value = True, 200 + m_exist.return_value = True + resp = self.test_client.get('/healthz') + m_verify_k8s_conn.assert_called_once() + m_verify_keystone_conn.assert_called_once() + m_verify_neutron_conn.assert_called_once_with() + + self.assertEqual(200, resp.status_code) + self.assertEqual('ok', resp.data.decode()) + + @mock.patch('os.path.exists') + def test_read_not_found(self, m_exist): + m_exist.return_value = False + oslo_cfg.CONF.set_override('vif_pool_driver', 'neutron', + group='kubernetes') + resp = self.test_client.get('/healthz') + self.assertEqual(404, resp.status_code) + + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_k8s_connection') + @mock.patch('os.path.exists') + def test_read_k8s_error(self, m_exist, m_verify_k8s_conn): + m_exist.return_value = True + m_verify_k8s_conn.return_value = False, 503 + resp = self.test_client.get('/healthz') + + m_verify_k8s_conn.assert_called_once() + self.assertEqual(503, resp.status_code) + + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_keystone_connection') + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_k8s_connection') + @mock.patch('os.path.exists') + def test_read_unauthorized(self, m_exist, m_verify_k8s_conn, + m_verify_keystone_conn): + m_exist.return_value = True + m_verify_k8s_conn.return_value = True, 200 + m_verify_keystone_conn.side_effect = exceptions.http.Unauthorized + resp = self.test_client.get('/healthz') + + m_verify_keystone_conn.assert_called_once() + self.assertEqual(401, resp.status_code) + + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_neutron_connection') + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_keystone_connection') + @mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.' + 'verify_k8s_connection') + @mock.patch('os.path.exists') + def test_read_neutron_error(self, m_exist, m_verify_k8s_conn, + m_verify_keystone_conn, m_verify_neutron_conn): + m_exist.return_value = True + m_verify_k8s_conn.return_value = True, 200 + m_verify_neutron_conn.side_effect = Exception + resp = self.test_client.get('/healthz') + + m_verify_neutron_conn.assert_called_once() + self.assertEqual(500, resp.status_code) diff --git a/tools/generate_k8s_resource_definitions.sh b/tools/generate_k8s_resource_definitions.sh index 36518f86b..31304fb43 100755 --- a/tools/generate_k8s_resource_definitions.sh +++ b/tools/generate_k8s_resource_definitions.sh @@ -101,6 +101,6 @@ fi generate_kuryr_configmap $OUTPUT_DIR $CONTROLLER_CONF_PATH $CNI_CONF_PATH generate_kuryr_service_account $OUTPUT_DIR -readiness_probe=${KURYR_USE_PORTS_POOLS:-False} -generate_controller_deployment $OUTPUT_DIR $readiness_probe +health_server_port=${KURYR_HEALTH_SERVER_PORT:-8082} +generate_controller_deployment $OUTPUT_DIR $health_server_port generate_cni_daemon_set $OUTPUT_DIR