Add readiness checks for Kuryr Controller

This patch checks the health of k8s, Keystone and Neutron,
by using a server that combines all the verifications. Also, checks
if ports are loaded into the pools when required.

Partially Implements: blueprint controller-readiness-liveness-probes
Change-Id: I09121a61d23fb64b326dae3947d5d24b1329cde3
This commit is contained in:
Maysa Macedo 2017-12-20 14:15:53 +00:00
parent 34f811bbfc
commit ed2536dbcc
9 changed files with 292 additions and 17 deletions

View File

@ -406,7 +406,7 @@ EOF
function generate_controller_deployment() {
output_dir=$1
readiness_probe=${2:-False}
health_server_port=$2
mkdir -p "$output_dir"
rm -f ${output_dir}/controller_deployment.yml
cat >> "${output_dir}/controller_deployment.yml" << EOF
@ -437,19 +437,13 @@ spec:
- name: config-volume
mountPath: "/etc/kuryr/kuryr.conf"
subPath: kuryr.conf
EOF
# Add readiness probe if ports pool functionality is enabled. The rationale
# behind is to make the controller not ready until the precreated ports are
# loaded into the pools
if [ "$readiness_probe" == "True" ]; then
cat >> "${output_dir}/controller_deployment.yml" << EOF
readinessProbe:
exec:
command:
- cat
- /tmp/pools_loaded
httpGet:
path: /healthz
port: ${health_server_port}
scheme: HTTP
timeoutSeconds: 5
EOF
fi
cat >> "${output_dir}/controller_deployment.yml" << EOF
volumes:

View File

@ -119,8 +119,7 @@ function generate_containerized_kuryr_resources {
local output_dir="${DATA_DIR}/kuryr-kubernetes"
generate_kuryr_configmap $output_dir $KURYR_CONFIG $KURYR_CNI_CONFIG
generate_kuryr_service_account $output_dir
KURYR_USE_PORTS_POOLS=$(trueorfalse False KURYR_USE_PORTS_POOLS)
generate_controller_deployment $output_dir $KURYR_USE_PORTS_POOLS
generate_controller_deployment $output_dir $KURYR_HEALTH_SERVER_PORT
generate_cni_daemon_set $output_dir $CNI_BIN_DIR $CNI_CONF_DIR
}

View File

@ -73,3 +73,6 @@ KURYR_VIF_POOL_UPDATE_FREQ=${KURYR_VIF_POOL_UPDATE_FREQ:-20}
# Kuryr VIF Pool Manager
KURYR_VIF_POOL_MANAGER=${KURYR_VIF_POOL_MANAGER:-False}
# Health Server
KURYR_HEALTH_SERVER_PORT=${KURYR_HEALTH_SERVER_PORT:-8082}

View File

@ -0,0 +1,52 @@
..
This work is licensed under a Creative Commons Attribution 3.0 Unported
License.
http://creativecommons.org/licenses/by/3.0/legalcode
Convention for heading levels in Neutron devref:
======= Heading 0 (reserved for the title in a document)
------- Heading 1
~~~~~~~ Heading 2
+++++++ Heading 3
''''''' Heading 4
(Avoid deeper levels because they do not render well.)
========================================
Kuryr Kubernetes Health Manager Design
========================================
Purpose
-------
The purpose of this document is to present the design decision behind
Kuryr Kubernetes Health Manager.
The main purpose of the Health Manager is to perform Health verifications
that assures Kuryr Controller readiness and so improve the management that
Kubernetes does on Kuryr Controller pod.
Overview
--------
Kuryr Controller might get to a broken state due to problems like:
unable to connect with services it depends on and they being not healthy.
It is important to check health of these services so that Kubernetes and
its users know when Kuryr Controller it is ready to perform its networking
tasks. To provide this functionality, Health Manager will verify and serve
the health state of these services to the probe.
Proposed Solution
-----------------
The Health Manager will provide an endpoint that will check whether it is
able to watch the Kubernetes API, authenticate with Keystone and talk to
Neutron, since these are services needed by Kuryr Controller. These checks
will assure the Controller readiness.
The idea behind the Manager is to combine all the necessary checks in a
server running inside Kuryr Controller pod and provide the checks result
to the probe.
This design focuses on providing health checks for readiness probe, but
another endpoint can be created for liveness probes.

View File

@ -39,7 +39,7 @@ Design documents
service_support
port_manager
vif_handler_drivers_design
health_manager
Indices and tables
------------------

View File

@ -0,0 +1,127 @@
# Copyright 2018 Maysa de Macedo Souza.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import eventlet
from flask import Flask
from keystoneauth1 import exceptions as k_exc
from keystoneclient import client as keystone_client
from kuryr.lib._i18n import _
from kuryr.lib import config as kuryr_config
from kuryr.lib import utils
import os
from oslo_config import cfg
from oslo_log import log as logging
import requests
from six.moves import http_client as httplib
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
health_server_opts = [
cfg.IntOpt('port',
help=_('port for Health HTTP Server.'),
default=8082),
]
CONF.register_opts(health_server_opts, "health_server")
class HealthServer(object):
def __init__(self):
self.ctx = None
self.application = Flask('health-daemon')
self.application.add_url_rule(
'/healthz', methods=['GET'], view_func=self.read)
self.headers = {'Connection': 'close'}
def read(self):
data = 'ok'
if CONF.kubernetes.vif_pool_driver != 'noop':
if not os.path.exists('/tmp/pools_loaded'):
error_message = 'Ports not loaded into the pools.'
LOG.error(error_message)
return error_message, httplib.NOT_FOUND, self.headers
k8s_conn, status = self.verify_k8s_connection()
if not k8s_conn:
error_message = 'Error when processing k8s healthz request.'
LOG.error(error_message)
return error_message, status, self.headers
try:
self.verify_keystone_connection()
except k_exc.http.HttpError as h_ex:
error_message = 'Error when processing Keystone request %s.' % h_ex
LOG.exception(error_message)
return error_message, h_ex.http_status, self.headers
except Exception as ex:
error_message = 'Error when creating a Keystone client: %s.' % ex
LOG.exception(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
try:
self.verify_neutron_connection()
except Exception as ex:
error_message = 'Error when creating a Neutron client: %s.' % ex
LOG.exception(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
LOG.info('Kuryr Controller readiness verified.')
return data, httplib.OK, self.headers
def run(self):
address = ''
try:
LOG.info('Starting health check server.')
self.application.run(address, CONF.health_server.port)
except Exception:
LOG.exception('Failed to start health check server.')
raise
def verify_k8s_connection(self):
path = '/healthz'
address = CONF.kubernetes.api_root
url = address + path
resp = requests.get(url, headers={'Connection': 'close'})
return resp.content == 'ok', resp.status_code
def verify_keystone_connection(self):
conf_group = kuryr_config.neutron_group.name
auth_plugin = utils.get_auth_plugin(conf_group)
sess = utils.get_keystone_session(conf_group, auth_plugin)
endpoint_type = getattr(getattr(cfg.CONF, conf_group), 'endpoint_type')
ks = keystone_client.Client(session=sess, auth=auth_plugin,
endpoint_type=endpoint_type)
ks.projects.list()
def verify_neutron_connection(self):
neutron = utils.get_neutron_client()
neutron.list_extensions()
class ReadinessChecker(object):
"""Proxy server used by readiness probe to manage health checks.
Allows to verify connectivity with Kubernetes API, Keystone and Neutron.
Also, if pool ports functionality is enabled it is verified whether
the precreated ports are loaded into the pools.
"""
def __init__(self):
eventlet.spawn(self._start_readiness_checker_daemon)
def _start_readiness_checker_daemon(self):
server = HealthServer()
server.run()

View File

@ -25,6 +25,7 @@ from kuryr_kubernetes import constants
from kuryr_kubernetes.controller.handlers import lbaas as h_lbaas
from kuryr_kubernetes.controller.handlers import pipeline as h_pipeline
from kuryr_kubernetes.controller.handlers import vif as h_vif
from kuryr_kubernetes.controller.managers import health
from kuryr_kubernetes import objects
from kuryr_kubernetes import watcher
@ -49,6 +50,7 @@ class KuryrK8sService(service.Service):
def start(self):
LOG.info("Service '%s' starting", self.__class__.__name__)
health.ReadinessChecker()
super(KuryrK8sService, self).start()
self.watcher.start()
LOG.info("Service '%s' started", self.__class__.__name__)

View File

@ -0,0 +1,98 @@
# Copyright 2018 Maysa de Macedo Souza.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keystoneauth1 import exceptions
from kuryr_kubernetes.controller.managers import health
from kuryr_kubernetes.tests import base
import mock
from oslo_config import cfg as oslo_cfg
class TestHealthServer(base.TestCase):
def setUp(self):
super(TestHealthServer, self).setUp()
self.srv = health.HealthServer()
self.srv.application.testing = True
self.test_client = self.srv.application.test_client()
@mock.patch('os.path.exists')
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_neutron_connection')
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_keystone_connection')
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_k8s_connection')
def test_read(self, m_verify_k8s_conn, m_verify_keystone_conn,
m_verify_neutron_conn, m_exist):
m_verify_k8s_conn.return_value = True, 200
m_exist.return_value = True
resp = self.test_client.get('/healthz')
m_verify_k8s_conn.assert_called_once()
m_verify_keystone_conn.assert_called_once()
m_verify_neutron_conn.assert_called_once_with()
self.assertEqual(200, resp.status_code)
self.assertEqual('ok', resp.data.decode())
@mock.patch('os.path.exists')
def test_read_not_found(self, m_exist):
m_exist.return_value = False
oslo_cfg.CONF.set_override('vif_pool_driver', 'neutron',
group='kubernetes')
resp = self.test_client.get('/healthz')
self.assertEqual(404, resp.status_code)
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_k8s_connection')
@mock.patch('os.path.exists')
def test_read_k8s_error(self, m_exist, m_verify_k8s_conn):
m_exist.return_value = True
m_verify_k8s_conn.return_value = False, 503
resp = self.test_client.get('/healthz')
m_verify_k8s_conn.assert_called_once()
self.assertEqual(503, resp.status_code)
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_keystone_connection')
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_k8s_connection')
@mock.patch('os.path.exists')
def test_read_unauthorized(self, m_exist, m_verify_k8s_conn,
m_verify_keystone_conn):
m_exist.return_value = True
m_verify_k8s_conn.return_value = True, 200
m_verify_keystone_conn.side_effect = exceptions.http.Unauthorized
resp = self.test_client.get('/healthz')
m_verify_keystone_conn.assert_called_once()
self.assertEqual(401, resp.status_code)
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_neutron_connection')
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_keystone_connection')
@mock.patch('kuryr_kubernetes.controller.managers.health.HealthServer.'
'verify_k8s_connection')
@mock.patch('os.path.exists')
def test_read_neutron_error(self, m_exist, m_verify_k8s_conn,
m_verify_keystone_conn, m_verify_neutron_conn):
m_exist.return_value = True
m_verify_k8s_conn.return_value = True, 200
m_verify_neutron_conn.side_effect = Exception
resp = self.test_client.get('/healthz')
m_verify_neutron_conn.assert_called_once()
self.assertEqual(500, resp.status_code)

View File

@ -101,6 +101,6 @@ fi
generate_kuryr_configmap $OUTPUT_DIR $CONTROLLER_CONF_PATH $CNI_CONF_PATH
generate_kuryr_service_account $OUTPUT_DIR
readiness_probe=${KURYR_USE_PORTS_POOLS:-False}
generate_controller_deployment $OUTPUT_DIR $readiness_probe
health_server_port=${KURYR_HEALTH_SERVER_PORT:-8082}
generate_controller_deployment $OUTPUT_DIR $health_server_port
generate_cni_daemon_set $OUTPUT_DIR