Add readiness checks for Kuryr Controller
This patch checks the health of k8s, Keystone and Neutron, by using a server that combines all the verifications. Also, checks if ports are loaded into the pools when required. Partially Implements: blueprint controller-readiness-liveness-probes Change-Id: I09121a61d23fb64b326dae3947d5d24b1329cde3
This commit is contained in:
parent
34f811bbfc
commit
ed2536dbcc
@ -406,7 +406,7 @@ EOF
|
||||
|
||||
function generate_controller_deployment() {
|
||||
output_dir=$1
|
||||
readiness_probe=${2:-False}
|
||||
health_server_port=$2
|
||||
mkdir -p "$output_dir"
|
||||
rm -f ${output_dir}/controller_deployment.yml
|
||||
cat >> "${output_dir}/controller_deployment.yml" << EOF
|
||||
@ -437,19 +437,13 @@ spec:
|
||||
- name: config-volume
|
||||
mountPath: "/etc/kuryr/kuryr.conf"
|
||||
subPath: kuryr.conf
|
||||
EOF
|
||||
# Add readiness probe if ports pool functionality is enabled. The rationale
|
||||
# behind is to make the controller not ready until the precreated ports are
|
||||
# loaded into the pools
|
||||
if [ "$readiness_probe" == "True" ]; then
|
||||
cat >> "${output_dir}/controller_deployment.yml" << EOF
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- cat
|
||||
- /tmp/pools_loaded
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: ${health_server_port}
|
||||
scheme: HTTP
|
||||
timeoutSeconds: 5
|
||||
EOF
|
||||
fi
|
||||
|
||||
cat >> "${output_dir}/controller_deployment.yml" << EOF
|
||||
volumes:
|
||||
|
@ -119,8 +119,7 @@ function generate_containerized_kuryr_resources {
|
||||
local output_dir="${DATA_DIR}/kuryr-kubernetes"
|
||||
generate_kuryr_configmap $output_dir $KURYR_CONFIG $KURYR_CNI_CONFIG
|
||||
generate_kuryr_service_account $output_dir
|
||||
KURYR_USE_PORTS_POOLS=$(trueorfalse False KURYR_USE_PORTS_POOLS)
|
||||
generate_controller_deployment $output_dir $KURYR_USE_PORTS_POOLS
|
||||
generate_controller_deployment $output_dir $KURYR_HEALTH_SERVER_PORT
|
||||
generate_cni_daemon_set $output_dir $CNI_BIN_DIR $CNI_CONF_DIR
|
||||
}
|
||||
|
||||
|
@ -73,3 +73,6 @@ KURYR_VIF_POOL_UPDATE_FREQ=${KURYR_VIF_POOL_UPDATE_FREQ:-20}
|
||||
|
||||
# Kuryr VIF Pool Manager
|
||||
KURYR_VIF_POOL_MANAGER=${KURYR_VIF_POOL_MANAGER:-False}
|
||||
|
||||
# Health Server
|
||||
KURYR_HEALTH_SERVER_PORT=${KURYR_HEALTH_SERVER_PORT:-8082}
|
||||
|
52
doc/source/devref/health_manager.rst
Normal file
52
doc/source/devref/health_manager.rst
Normal file
@ -0,0 +1,52 @@
|
||||
..
|
||||
This work is licensed under a Creative Commons Attribution 3.0 Unported
|
||||
License.
|
||||
|
||||
http://creativecommons.org/licenses/by/3.0/legalcode
|
||||
|
||||
Convention for heading levels in Neutron devref:
|
||||
======= Heading 0 (reserved for the title in a document)
|
||||
------- Heading 1
|
||||
~~~~~~~ Heading 2
|
||||
+++++++ Heading 3
|
||||
''''''' Heading 4
|
||||
(Avoid deeper levels because they do not render well.)
|
||||
|
||||
========================================
|
||||
Kuryr Kubernetes Health Manager Design
|
||||
========================================
|
||||
|
||||
|
||||
Purpose
|
||||
-------
|
||||
The purpose of this document is to present the design decision behind
|
||||
Kuryr Kubernetes Health Manager.
|
||||
|
||||
The main purpose of the Health Manager is to perform Health verifications
|
||||
that assures Kuryr Controller readiness and so improve the management that
|
||||
Kubernetes does on Kuryr Controller pod.
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
Kuryr Controller might get to a broken state due to problems like:
|
||||
unable to connect with services it depends on and they being not healthy.
|
||||
|
||||
It is important to check health of these services so that Kubernetes and
|
||||
its users know when Kuryr Controller it is ready to perform its networking
|
||||
tasks. To provide this functionality, Health Manager will verify and serve
|
||||
the health state of these services to the probe.
|
||||
|
||||
Proposed Solution
|
||||
-----------------
|
||||
The Health Manager will provide an endpoint that will check whether it is
|
||||
able to watch the Kubernetes API, authenticate with Keystone and talk to
|
||||
Neutron, since these are services needed by Kuryr Controller. These checks
|
||||
will assure the Controller readiness.
|
||||
|
||||
The idea behind the Manager is to combine all the necessary checks in a
|
||||
server running inside Kuryr Controller pod and provide the checks result
|
||||
to the probe.
|
||||
|
||||
This design focuses on providing health checks for readiness probe, but
|
||||
another endpoint can be created for liveness probes.
|
@ -39,7 +39,7 @@ Design documents
|
||||
service_support
|
||||
port_manager
|
||||
vif_handler_drivers_design
|
||||
|
||||
health_manager
|
||||
|
||||
Indices and tables
|
||||
------------------
|
||||
|
127
kuryr_kubernetes/controller/managers/health.py
Normal file
127
kuryr_kubernetes/controller/managers/health.py
Normal file
@ -0,0 +1,127 @@
|
||||
# Copyright 2018 Maysa de Macedo Souza.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import eventlet
|
||||
from flask import Flask
|
||||
from keystoneauth1 import exceptions as k_exc
|
||||
from keystoneclient import client as keystone_client
|
||||
from kuryr.lib._i18n import _
|
||||
from kuryr.lib import config as kuryr_config
|
||||
from kuryr.lib import utils
|
||||
import os
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import requests
|
||||
from six.moves import http_client as httplib
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
|
||||
health_server_opts = [
|
||||
cfg.IntOpt('port',
|
||||
help=_('port for Health HTTP Server.'),
|
||||
default=8082),
|
||||
]
|
||||
|
||||
CONF.register_opts(health_server_opts, "health_server")
|
||||
|
||||
|
||||
class HealthServer(object):
|
||||
|
||||
def __init__(self):
|
||||
self.ctx = None
|
||||
|
||||
self.application = Flask('health-daemon')
|
||||
self.application.add_url_rule(
|
||||
'/healthz', methods=['GET'], view_func=self.read)
|
||||
self.headers = {'Connection': 'close'}
|
||||
|
||||
def read(self):
|
||||
data = 'ok'
|
||||
|
||||
if CONF.kubernetes.vif_pool_driver != 'noop':
|
||||
if not os.path.exists('/tmp/pools_loaded'):
|
||||
error_message = 'Ports not loaded into the pools.'
|
||||
LOG.error(error_message)
|
||||
return error_message, httplib.NOT_FOUND, self.headers
|
||||
|
||||
k8s_conn, status = self.verify_k8s_connection()
|
||||
if not k8s_conn:
|
||||
error_message = 'Error when processing k8s healthz request.'
|
||||
LOG.error(error_message)
|
||||
return error_message, status, self.headers
|
||||
try:
|
||||
self.verify_keystone_connection()
|
||||
except k_exc.http.HttpError as h_ex:
|
||||
error_message = 'Error when processing Keystone request %s.' % h_ex
|
||||
LOG.exception(error_message)
|
||||
return error_message, h_ex.http_status, self.headers
|
||||
except Exception as ex:
|
||||
error_message = 'Error when creating a Keystone client: %s.' % ex
|
||||
LOG.exception(error_message)
|
||||
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
|
||||
try:
|
||||
self.verify_neutron_connection()
|
||||
except Exception as ex:
|
||||
error_message = 'Error when creating a Neutron client: %s.' % ex
|
||||
LOG.exception(error_message)
|
||||
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
|
||||
|
||||
LOG.info('Kuryr Controller readiness verified.')
|
||||
return data, httplib.OK, self.headers
|
||||
|
||||
def run(self):
|
||||
address = ''
|
||||
try:
|
||||
LOG.info('Starting health check server.')
|
||||
self.application.run(address, CONF.health_server.port)
|
||||
except Exception:
|
||||
LOG.exception('Failed to start health check server.')
|
||||
raise
|
||||
|
||||
def verify_k8s_connection(self):
|
||||
path = '/healthz'
|
||||
address = CONF.kubernetes.api_root
|
||||
url = address + path
|
||||
resp = requests.get(url, headers={'Connection': 'close'})
|
||||
return resp.content == 'ok', resp.status_code
|
||||
|
||||
def verify_keystone_connection(self):
|
||||
conf_group = kuryr_config.neutron_group.name
|
||||
auth_plugin = utils.get_auth_plugin(conf_group)
|
||||
sess = utils.get_keystone_session(conf_group, auth_plugin)
|
||||
endpoint_type = getattr(getattr(cfg.CONF, conf_group), 'endpoint_type')
|
||||
ks = keystone_client.Client(session=sess, auth=auth_plugin,
|
||||
endpoint_type=endpoint_type)
|
||||
ks.projects.list()
|
||||
|
||||
def verify_neutron_connection(self):
|
||||
neutron = utils.get_neutron_client()
|
||||
neutron.list_extensions()
|
||||
|
||||
|
||||
class ReadinessChecker(object):
|
||||
"""Proxy server used by readiness probe to manage health checks.
|
||||
|
||||
Allows to verify connectivity with Kubernetes API, Keystone and Neutron.
|
||||
Also, if pool ports functionality is enabled it is verified whether
|
||||
the precreated ports are loaded into the pools.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
eventlet.spawn(self._start_readiness_checker_daemon)
|
||||
|
||||
def _start_readiness_checker_daemon(self):
|
||||
server = HealthServer()
|
||||
server.run()
|
@ -25,6 +25,7 @@ from kuryr_kubernetes import constants
|
||||
from kuryr_kubernetes.controller.handlers import lbaas as h_lbaas
|
||||
from kuryr_kubernetes.controller.handlers import pipeline as h_pipeline
|
||||
from kuryr_kubernetes.controller.handlers import vif as h_vif
|
||||
from kuryr_kubernetes.controller.managers import health
|
||||
from kuryr_kubernetes import objects
|
||||
from kuryr_kubernetes import watcher
|
||||
|
||||
@ -49,6 +50,7 @@ class KuryrK8sService(service.Service):
|
||||
|
||||
def start(self):
|
||||
LOG.info("Service '%s' starting", self.__class__.__name__)
|
||||
health.ReadinessChecker()
|
||||
super(KuryrK8sService, self).start()
|
||||
self.watcher.start()
|
||||
LOG.info("Service '%s' started", self.__class__.__name__)
|
||||
|
@ -0,0 +1,98 @@
|
||||
# Copyright 2018 Maysa de Macedo Souza.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from keystoneauth1 import exceptions
|
||||
from kuryr_kubernetes.controller.managers import health
|
||||
from kuryr_kubernetes.tests import base
|
||||
import mock
|
||||
from oslo_config import cfg as oslo_cfg
|
||||
|
||||
|
||||
class TestHealthServer(base.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestHealthServer, self).setUp()
|
||||
self.srv = health.HealthServer()
|
||||
self.srv.application.testing = True
|
||||
self.test_client = self.srv.application.test_client()
|
||||
|
||||
@mock.patch('os.path.exists')
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_neutron_connection')
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_keystone_connection')
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_k8s_connection')
|
||||
def test_read(self, m_verify_k8s_conn, m_verify_keystone_conn,
|
||||
m_verify_neutron_conn, m_exist):
|
||||
m_verify_k8s_conn.return_value = True, 200
|
||||
m_exist.return_value = True
|
||||
resp = self.test_client.get('/healthz')
|
||||
m_verify_k8s_conn.assert_called_once()
|
||||
m_verify_keystone_conn.assert_called_once()
|
||||
m_verify_neutron_conn.assert_called_once_with()
|
||||
|
||||
self.assertEqual(200, resp.status_code)
|
||||
self.assertEqual('ok', resp.data.decode())
|
||||
|
||||
@mock.patch('os.path.exists')
|
||||
def test_read_not_found(self, m_exist):
|
||||
m_exist.return_value = False
|
||||
oslo_cfg.CONF.set_override('vif_pool_driver', 'neutron',
|
||||
group='kubernetes')
|
||||
resp = self.test_client.get('/healthz')
|
||||
self.assertEqual(404, resp.status_code)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_k8s_connection')
|
||||
@mock.patch('os.path.exists')
|
||||
def test_read_k8s_error(self, m_exist, m_verify_k8s_conn):
|
||||
m_exist.return_value = True
|
||||
m_verify_k8s_conn.return_value = False, 503
|
||||
resp = self.test_client.get('/healthz')
|
||||
|
||||
m_verify_k8s_conn.assert_called_once()
|
||||
self.assertEqual(503, resp.status_code)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_keystone_connection')
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_k8s_connection')
|
||||
@mock.patch('os.path.exists')
|
||||
def test_read_unauthorized(self, m_exist, m_verify_k8s_conn,
|
||||
m_verify_keystone_conn):
|
||||
m_exist.return_value = True
|
||||
m_verify_k8s_conn.return_value = True, 200
|
||||
m_verify_keystone_conn.side_effect = exceptions.http.Unauthorized
|
||||
resp = self.test_client.get('/healthz')
|
||||
|
||||
m_verify_keystone_conn.assert_called_once()
|
||||
self.assertEqual(401, resp.status_code)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_neutron_connection')
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_keystone_connection')
|
||||
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
|
||||
'verify_k8s_connection')
|
||||
@mock.patch('os.path.exists')
|
||||
def test_read_neutron_error(self, m_exist, m_verify_k8s_conn,
|
||||
m_verify_keystone_conn, m_verify_neutron_conn):
|
||||
m_exist.return_value = True
|
||||
m_verify_k8s_conn.return_value = True, 200
|
||||
m_verify_neutron_conn.side_effect = Exception
|
||||
resp = self.test_client.get('/healthz')
|
||||
|
||||
m_verify_neutron_conn.assert_called_once()
|
||||
self.assertEqual(500, resp.status_code)
|
@ -101,6 +101,6 @@ fi
|
||||
|
||||
generate_kuryr_configmap $OUTPUT_DIR $CONTROLLER_CONF_PATH $CNI_CONF_PATH
|
||||
generate_kuryr_service_account $OUTPUT_DIR
|
||||
readiness_probe=${KURYR_USE_PORTS_POOLS:-False}
|
||||
generate_controller_deployment $OUTPUT_DIR $readiness_probe
|
||||
health_server_port=${KURYR_HEALTH_SERVER_PORT:-8082}
|
||||
generate_controller_deployment $OUTPUT_DIR $health_server_port
|
||||
generate_cni_daemon_set $OUTPUT_DIR
|
||||
|
Loading…
x
Reference in New Issue
Block a user