Run amphora agent with gunicorn

Flask's default runner (werkzeug) is plagued with bugs.
If we use gunicorn instead, we should have many less problems!

Depends-On: I211dc771aa95147c0f1d9e6ac1a65a7e164b33c2
Change-Id: I59897167f9285bf013f8a155dd2ea4f799ac1d3f
This commit is contained in:
Adam Harwell 2016-10-15 04:50:53 +09:00
parent ae0ca96e8a
commit 48a1e7cbe9
9 changed files with 57 additions and 47 deletions

View File

@ -104,12 +104,18 @@ function octavia_configure {
iniuncomment $OCTAVIA_CONF haproxy_amphora base_cert_dir
iniuncomment $OCTAVIA_CONF haproxy_amphora connection_max_retries
iniuncomment $OCTAVIA_CONF haproxy_amphora connection_retry_interval
iniuncomment $OCTAVIA_CONF haproxy_amphora rest_request_conn_timeout
iniuncomment $OCTAVIA_CONF haproxy_amphora rest_request_read_timeout
iniuncomment $OCTAVIA_CONF controller_worker amp_active_retries
iniuncomment $OCTAVIA_CONF controller_worker amp_active_wait_sec
# devstack optimizations for tempest runs
iniset $OCTAVIA_CONF haproxy_amphora connection_max_retries 1500
iniset $OCTAVIA_CONF haproxy_amphora connection_retry_interval 1
iniset $OCTAVIA_CONF haproxy_amphora rest_request_conn_timeout ${OCTAVIA_AMP_CONN_TIMEOUT}
iniset $OCTAVIA_CONF haproxy_amphora rest_request_read_timeout ${OCTAVIA_AMP_READ_TIMEOUT}
iniset $OCTAVIA_CONF controller_worker amp_active_retries 100
iniset $OCTAVIA_CONF controller_worker amp_active_wait_sec 1
iniset $OCTAVIA_CONF controller_worker amp_active_wait_sec 2
if [[ -a $OCTAVIA_SSH_DIR ]] ; then
rm -rf $OCTAVIA_SSH_DIR

View File

@ -48,6 +48,9 @@ OCTAVIA_AMP_IMAGE_NAME=${OCTAVIA_AMP_IMAGE_NAME:-"amphora-x64-haproxy"}
OCTAVIA_AMP_IMAGE_FILE=${OCTAVIA_AMP_IMAGE_FILE:-${OCTAVIA_DIR}/diskimage-create/${OCTAVIA_AMP_IMAGE_NAME}.qcow2}
OCTAVIA_AMP_IMAGE_TAG="amphora"
OCTAVIA_AMP_CONN_TIMEOUT=${OCTAVIA_AMP_CONN_TIMEOUT:-"10"}
OCTAVIA_AMP_READ_TIMEOUT=${OCTAVIA_AMP_READ_TIMEOUT:-"120"}
OCTAVIA_HEALTH_KEY=${OCTAVIA_HEALTH_KEY:-"insecure"}
OCTAVIA_AMP_EXPIRY_AGE=${OCTAVIA_AMP_EXPIRY_AGE:-"3600"}

View File

@ -224,6 +224,7 @@
# agent_server_cert = /etc/octavia/certs/server.pem
# agent_server_network_dir = /etc/netns/amphora-haproxy/network/interfaces.d/
# agent_server_network_file =
# agent_request_read_timeout = 120
[keepalived_vrrp]
# Amphora Role/Priority advertisement interval in seconds

View File

@ -45,6 +45,8 @@ class AgentJinjaTemplater(object):
CONF.amphora_agent.agent_server_network_dir,
'agent_server_network_file':
CONF.amphora_agent.agent_server_network_file,
'agent_request_read_timeout':
CONF.amphora_agent.agent_request_read_timeout,
'amphora_id': amphora_id,
'base_cert_dir': CONF.haproxy_amphora.base_cert_dir,
'base_path': CONF.haproxy_amphora.base_path,

View File

@ -37,4 +37,5 @@ agent_server_network_dir = {{ agent_server_network_dir }}
{% if agent_server_network_file -%}
agent_server_network_file = {{ agent_server_network_file }}
{% endif -%}
agent_request_read_timeout = {{ agent_request_read_timeout }}
amphora_id = {{ amphora_id }}

View File

@ -15,13 +15,12 @@
# make sure PYTHONPATH includes the home directory if you didn't install
import multiprocessing as multiproc
import os
import ssl
import sys
import gunicorn.app.base
from oslo_config import cfg
from oslo_reports import guru_meditation_report as gmr
from werkzeug import serving
import six
from octavia.amphorae.backends.agent.api_server import server
from octavia.amphorae.backends.health_daemon import health_daemon
@ -35,27 +34,22 @@ CONF.import_group('haproxy_amphora', 'octavia.common.config')
HM_SENDER_CMD_QUEUE = multiproc.Queue()
# Hack: Use werkzeugs context
# also http://stackoverflow.com/questions/23262768/
# two-way-ssl-authentication-for-flask
class OctaviaSSLContext(serving._SSLContext):
def __init__(self, protocol):
self._ca_certs = None
super(OctaviaSSLContext, self).__init__(protocol)
class AmphoraAgent(gunicorn.app.base.BaseApplication):
def __init__(self, app, options=None):
self.options = options or {}
self.application = app
super(AmphoraAgent, self).__init__()
def load_cert_chain(self, certfile, keyfile=None, password=None, ca=None):
self._ca_certs = ca
super(OctaviaSSLContext, self).load_cert_chain(
certfile, keyfile, password)
def wrap_socket(self, sock, **kwargs):
return super(OctaviaSSLContext, self).wrap_socket(
sock,
# Comment out for debugging if you want to connect without
# a client cert
cert_reqs=ssl.CERT_REQUIRED,
ca_certs=self._ca_certs
def load_config(self):
config = dict(
[(key, value) for key, value in six.iteritems(self.options)
if key in self.cfg.settings and value is not None]
)
for key, value in six.iteritems(config):
self.cfg.set(key.lower(), value)
def load(self):
return self.application
# start api server
@ -65,31 +59,28 @@ def main():
gmr.TextGuruMeditation.setup_autorun(version)
# Workaround for an issue with the auto-reload used below in werkzeug
# Without it multiple health senders get started when werkzeug reloads
if not os.environ.get('WERKZEUG_RUN_MAIN'):
health_sender_proc = multiproc.Process(name='HM_sender',
target=health_daemon.run_sender,
args=(HM_SENDER_CMD_QUEUE,))
health_sender_proc.daemon = True
health_sender_proc.start()
# We will only enforce that the client cert is from the good authority
# todo(german): Watch this space for security improvements
ctx = OctaviaSSLContext(ssl.PROTOCOL_SSLv23)
ctx.load_cert_chain(CONF.amphora_agent.agent_server_cert,
ca=CONF.amphora_agent.agent_server_ca)
health_sender_proc = multiproc.Process(name='HM_sender',
target=health_daemon.run_sender,
args=(HM_SENDER_CMD_QUEUE,))
health_sender_proc.daemon = True
health_sender_proc.start()
# Initiate server class
server_instance = server.Server()
# This will trigger a reload if any files change and
# in particular the certificate file
serving.run_simple(hostname=CONF.haproxy_amphora.bind_host,
port=CONF.haproxy_amphora.bind_port,
application=server_instance.app,
use_debugger=CONF.debug,
ssl_context=ctx,
use_reloader=True,
extra_files=[CONF.amphora_agent.agent_server_cert])
options = {
'bind': '{host}:{port}'.format(
host=CONF.haproxy_amphora.bind_host,
port=CONF.haproxy_amphora.bind_port
),
'workers': 1,
'timeout': CONF.amphora_agent.agent_request_read_timeout,
'certfile': CONF.amphora_agent.agent_server_cert,
'ca_certs': CONF.amphora_agent.agent_server_ca,
'cert_reqs': True,
'preload_app': True,
'accesslog': '-',
'errorlog': '-',
'loglevel': 'debug',
}
AmphoraAgent(server_instance.app, options).run()

View File

@ -75,6 +75,9 @@ amphora_agent_opts = [
help=_("The file where the network interfaces are located. "
"Specifying this will override any value set for "
"agent_server_network_dir.")),
cfg.IntOpt('agent_request_read_timeout', default=120,
help=_("The time in seconds to allow a request from the "
"controller to run before terminating the socket.")),
# Do not specify in octavia.conf, loaded at runtime
cfg.StrOpt('amphora_id', help=_("The amphora ID.")),
]

View File

@ -76,6 +76,7 @@ class AgentJinjaTestCase(base.TestCase):
'/etc/octavia/certs/server.pem\n'
'agent_server_network_dir = '
'/etc/network/interfaces.d/\n'
'agent_request_read_timeout = 120\n'
'amphora_id = ' + AMP_ID)
agent_cfg = ajc.build_agent_config(AMP_ID)
self.assertEqual(expected_config, agent_cfg)
@ -109,6 +110,7 @@ class AgentJinjaTestCase(base.TestCase):
'/etc/network/interfaces.d/\n'
'agent_server_network_file = '
'/etc/network/interfaces\n'
'agent_request_read_timeout = 120\n'
'amphora_id = ' + AMP_ID)
agent_cfg = ajc.build_agent_config(AMP_ID)
self.assertEqual(expected_config, agent_cfg)

View File

@ -42,3 +42,4 @@ netifaces>=0.10.4 # MIT
ipaddress>=1.0.7;python_version<'3.3' # PSF
cryptography!=1.3.0,>=1.0 # BSD/Apache-2.0
pyroute2>=0.4.3 # Apache-2.0 (+ dual licensed GPL2)
gunicorn>=19.0 # MIT