[ALL] Add Gearman KEEPALIVE options
This requires the lastest development version of python-gearman to support the new keepalive options. Using these options (off by default) will solve the problem when the Gearman job server gets paused/unpaused in a cloud environment, but the clients and workers never recognize that they are indeed disconnected. WARNING: Not backwards compatible with older versions of the python-gearman driver, even if the SSL and keepalive options are not specified. This is a change in behavior. Change-Id: Ic8db6676f7408364b6fe9a8690deb72bb6e2772c
This commit is contained in:
@@ -55,6 +55,25 @@ Command Line Options
|
|||||||
The path for the SSL key file to be used for the frontend of the API
|
The path for the SSL key file to be used for the frontend of the API
|
||||||
server
|
server
|
||||||
|
|
||||||
|
.. option:: --gearman_keepalive
|
||||||
|
|
||||||
|
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
|
||||||
|
systems.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepcnt <COUNT>
|
||||||
|
|
||||||
|
Maximum number of TCP KEEPALIVE probes to send before killing the
|
||||||
|
connection to the Gearman job server.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepidle <SECONDS>
|
||||||
|
|
||||||
|
Seconds of idle time on the Gearman job server connection before
|
||||||
|
sending TCP KEEPALIVE probes.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepintvl <SECONDS>
|
||||||
|
|
||||||
|
Seconds between TCP KEEPALIVE probes.
|
||||||
|
|
||||||
.. option:: --gearman_ssl_ca <PATH>
|
.. option:: --gearman_ssl_ca <PATH>
|
||||||
|
|
||||||
The path for the Gearman SSL Certificate Authority.
|
The path for the Gearman SSL Certificate Authority.
|
||||||
|
@@ -60,6 +60,25 @@ Command Line Options
|
|||||||
Used to specify the Gearman job server hostname and port. This option
|
Used to specify the Gearman job server hostname and port. This option
|
||||||
can be used multiple times to specify multiple job servers.
|
can be used multiple times to specify multiple job servers.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepalive
|
||||||
|
|
||||||
|
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
|
||||||
|
systems.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepcnt <COUNT>
|
||||||
|
|
||||||
|
Maximum number of TCP KEEPALIVE probes to send before killing the
|
||||||
|
connection to the Gearman job server.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepidle <SECONDS>
|
||||||
|
|
||||||
|
Seconds of idle time on the Gearman job server connection before
|
||||||
|
sending TCP KEEPALIVE probes.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepintvl <SECONDS>
|
||||||
|
|
||||||
|
Seconds between TCP KEEPALIVE probes.
|
||||||
|
|
||||||
.. option:: --gearman_ssl_ca <PATH>
|
.. option:: --gearman_ssl_ca <PATH>
|
||||||
|
|
||||||
The path for the Gearman SSL Certificate Authority
|
The path for the Gearman SSL Certificate Authority
|
||||||
|
@@ -83,6 +83,25 @@ Command Line Options
|
|||||||
The flavor ID (image size ID) or name to use for new nodes spun up in
|
The flavor ID (image size ID) or name to use for new nodes spun up in
|
||||||
the Nova API
|
the Nova API
|
||||||
|
|
||||||
|
.. option:: --gearman_keepalive
|
||||||
|
|
||||||
|
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
|
||||||
|
systems.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepcnt <COUNT>
|
||||||
|
|
||||||
|
Maximum number of TCP KEEPALIVE probes to send before killing the
|
||||||
|
connection to the Gearman job server.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepidle <SECONDS>
|
||||||
|
|
||||||
|
Seconds of idle time on the Gearman job server connection before
|
||||||
|
sending TCP KEEPALIVE probes.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepintvl <SECONDS>
|
||||||
|
|
||||||
|
Seconds between TCP KEEPALIVE probes.
|
||||||
|
|
||||||
.. option:: --gearman_ssl_ca <PATH>
|
.. option:: --gearman_ssl_ca <PATH>
|
||||||
|
|
||||||
The path for the Gearman SSL Certificate Authority.
|
The path for the Gearman SSL Certificate Authority.
|
||||||
|
@@ -28,6 +28,25 @@ Command Line Options
|
|||||||
* *haproxy* - `HAProxy <http://haproxy.1wt.eu>`_ software load balancer.
|
* *haproxy* - `HAProxy <http://haproxy.1wt.eu>`_ software load balancer.
|
||||||
This is the default driver.
|
This is the default driver.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepalive
|
||||||
|
|
||||||
|
Use TCP KEEPALIVE to the Gearman job server. Not supported on all
|
||||||
|
systems.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepcnt <COUNT>
|
||||||
|
|
||||||
|
Maximum number of TCP KEEPALIVE probes to send before killing the
|
||||||
|
connection to the Gearman job server.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepidle <SECONDS>
|
||||||
|
|
||||||
|
Seconds of idle time on the Gearman job server connection before
|
||||||
|
sending TCP KEEPALIVE probes.
|
||||||
|
|
||||||
|
.. option:: --gearman_keepintvl <SECONDS>
|
||||||
|
|
||||||
|
Seconds between TCP KEEPALIVE probes.
|
||||||
|
|
||||||
.. option:: --gearman_ssl_ca <FILE>
|
.. option:: --gearman_ssl_ca <FILE>
|
||||||
|
|
||||||
Full path to the file with the CA public key to use when
|
Full path to the file with the CA public key to use when
|
||||||
|
@@ -28,9 +28,8 @@ user = libra
|
|||||||
group = libra
|
group = libra
|
||||||
driver = haproxy
|
driver = haproxy
|
||||||
reconnect_sleep = 60
|
reconnect_sleep = 60
|
||||||
stats_poll = 300
|
|
||||||
gearman_poll = 60
|
gearman_poll = 60
|
||||||
server = 10.0.0.1:8080 10.0.0.2:8080
|
server = 10.0.0.1:4730 10.0.0.2:4730
|
||||||
pid = /var/run/libra/libra_worker.pid
|
pid = /var/run/libra/libra_worker.pid
|
||||||
logfile = /var/log/libra/libra_worker.log
|
logfile = /var/log/libra/libra_worker.log
|
||||||
|
|
||||||
|
@@ -51,7 +51,11 @@ def setup_app(pecan_config, args):
|
|||||||
'server': args.gearman,
|
'server': args.gearman,
|
||||||
'ssl_key': args.gearman_ssl_key,
|
'ssl_key': args.gearman_ssl_key,
|
||||||
'ssl_cert': args.gearman_ssl_cert,
|
'ssl_cert': args.gearman_ssl_cert,
|
||||||
'ssl_ca': args.gearman_ssl_ca
|
'ssl_ca': args.gearman_ssl_ca,
|
||||||
|
'keepalive': args.gearman_keepalive,
|
||||||
|
'keepcnt': args.gearman_keepcnt,
|
||||||
|
'keepidle': args.gearman_keepidle,
|
||||||
|
'keepintvl': args.gearman_keepintvl
|
||||||
}
|
}
|
||||||
config['conffile'] = args.config
|
config['conffile'] = args.config
|
||||||
if args.debug:
|
if args.debug:
|
||||||
@@ -136,6 +140,22 @@ def main():
|
|||||||
'--gearman', action='append', metavar='HOST:PORT', default=[],
|
'--gearman', action='append', metavar='HOST:PORT', default=[],
|
||||||
help='Gearman job servers'
|
help='Gearman job servers'
|
||||||
)
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepalive', action="store_true",
|
||||||
|
help='use KEEPALIVE to Gearman server'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepcnt', type=int, metavar='COUNT',
|
||||||
|
help='max keepalive probes to send before killing connection'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepidle', type=int, metavar='SECONDS',
|
||||||
|
help='seconds of idle time before sending keepalive probes'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepintvl', type=int, metavar='SECONDS',
|
||||||
|
help='seconds between TCP keepalive probes'
|
||||||
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--gearman_ssl_ca', metavar='FILE',
|
'--gearman_ssl_ca', metavar='FILE',
|
||||||
help='Gearman SSL certificate authority'
|
help='Gearman SSL certificate authority'
|
||||||
|
@@ -56,7 +56,11 @@ def setup_app(pecan_config, args):
|
|||||||
'server': args.gearman,
|
'server': args.gearman,
|
||||||
'ssl_key': args.gearman_ssl_key,
|
'ssl_key': args.gearman_ssl_key,
|
||||||
'ssl_cert': args.gearman_ssl_cert,
|
'ssl_cert': args.gearman_ssl_cert,
|
||||||
'ssl_ca': args.gearman_ssl_ca
|
'ssl_ca': args.gearman_ssl_ca,
|
||||||
|
'keepalive': args.gearman_keepalive,
|
||||||
|
'keepcnt': args.gearman_keepcnt,
|
||||||
|
'keepidle': args.gearman_keepidle,
|
||||||
|
'keepintvl': args.gearman_keepintvl
|
||||||
}
|
}
|
||||||
config['ip_filters'] = args.ip_filters
|
config['ip_filters'] = args.ip_filters
|
||||||
if args.debug:
|
if args.debug:
|
||||||
@@ -115,6 +119,22 @@ def main():
|
|||||||
'--gearman', action='append', metavar='HOST:PORT', default=[],
|
'--gearman', action='append', metavar='HOST:PORT', default=[],
|
||||||
help='Gearman job servers'
|
help='Gearman job servers'
|
||||||
)
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepalive', action="store_true",
|
||||||
|
help='use KEEPALIVE to Gearman server'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepcnt', type=int, metavar='COUNT',
|
||||||
|
help='max keepalive probes to send before killing connection'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepidle', type=int, metavar='SECONDS',
|
||||||
|
help='seconds of idle time before sending keepalive probes'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepintvl', type=int, metavar='SECONDS',
|
||||||
|
help='seconds between TCP keepalive probes'
|
||||||
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--gearman_ssl_ca', metavar='FILE',
|
'--gearman_ssl_ca', metavar='FILE',
|
||||||
help='Gearman SSL certificate authority'
|
help='Gearman SSL certificate authority'
|
||||||
|
@@ -84,20 +84,19 @@ class GearmanClientThread(object):
|
|||||||
self.host = host
|
self.host = host
|
||||||
self.lbid = lbid
|
self.lbid = lbid
|
||||||
|
|
||||||
if all([conf.gearman.ssl_key, conf.gearman.ssl_cert,
|
server_list = []
|
||||||
conf.gearman.ssl_ca]):
|
for server in conf.gearman.server:
|
||||||
# Use SSL connections to each Gearman job server.
|
ghost, gport = server.split(':')
|
||||||
ssl_server_list = []
|
server_list.append({'host': ghost,
|
||||||
for server in conf.gearman.server:
|
'port': int(gport),
|
||||||
ghost, gport = server.split(':')
|
'keyfile': conf.gearman.ssl_key,
|
||||||
ssl_server_list.append({'host': ghost,
|
'certfile': conf.gearman.ssl_cert,
|
||||||
'port': int(gport),
|
'ca_certs': conf.gearman.ssl_ca,
|
||||||
'keyfile': conf.gearman.ssl_key,
|
'keepalive': conf.gearman.keepalive,
|
||||||
'certfile': conf.gearman.ssl_cert,
|
'keepcnt': conf.gearman.keepcnt,
|
||||||
'ca_certs': conf.gearman.ssl_ca})
|
'keepidle': conf.gearman.keepidle,
|
||||||
self.gearman_client = JSONGearmanClient(ssl_server_list)
|
'keepintvl': conf.gearman.keepintvl})
|
||||||
else:
|
self.gearman_client = JSONGearmanClient(server_list)
|
||||||
self.gearman_client = JSONGearmanClient(conf.gearman.server)
|
|
||||||
|
|
||||||
def send_assign(self, data):
|
def send_assign(self, data):
|
||||||
job_data = {
|
job_data = {
|
||||||
|
@@ -34,18 +34,19 @@ def worker_thread(logger, args):
|
|||||||
logger.info("Registering task libra_pool_mgm")
|
logger.info("Registering task libra_pool_mgm")
|
||||||
hostname = socket.gethostname()
|
hostname = socket.gethostname()
|
||||||
|
|
||||||
if all([args.gearman_ssl_key, args.gearman_ssl_cert, args.gearman_ssl_ca]):
|
server_list = []
|
||||||
ssl_server_list = []
|
for host_port in args.gearman:
|
||||||
for host_port in args.gearman:
|
host, port = host_port.split(':')
|
||||||
host, port = host_port.split(':')
|
server_list.append({'host': host,
|
||||||
ssl_server_list.append({'host': host,
|
'port': int(port),
|
||||||
'port': int(port),
|
'keyfile': args.gearman_ssl_key,
|
||||||
'keyfile': args.gearman_ssl_key,
|
'certfile': args.gearman_ssl_cert,
|
||||||
'certfile': args.gearman_ssl_cert,
|
'ca_certs': args.gearman_ssl_ca,
|
||||||
'ca_certs': args.gearman_ssl_ca})
|
'keepalive': args.gearman_keepalive,
|
||||||
worker = JSONGearmanWorker(ssl_server_list)
|
'keepcnt': args.gearman_keepcnt,
|
||||||
else:
|
'keepidle': args.gearman_keepidle,
|
||||||
worker = JSONGearmanWorker(args.gearman)
|
'keepintvl': args.gearman_keepintvl})
|
||||||
|
worker = JSONGearmanWorker(server_list)
|
||||||
|
|
||||||
worker.set_client_id(hostname)
|
worker.set_client_id(hostname)
|
||||||
worker.register_task('libra_pool_mgm', handler)
|
worker.register_task('libra_pool_mgm', handler)
|
||||||
|
@@ -118,6 +118,22 @@ def main():
|
|||||||
'--gearman', action='append', metavar='HOST:PORT', default=[],
|
'--gearman', action='append', metavar='HOST:PORT', default=[],
|
||||||
help='Gearman job servers'
|
help='Gearman job servers'
|
||||||
)
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepalive', action="store_true",
|
||||||
|
help='use KEEPALIVE to Gearman server'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepcnt', type=int, metavar='COUNT',
|
||||||
|
help='max keepalive probes to send before killing connection'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepidle', type=int, metavar='SECONDS',
|
||||||
|
help='seconds of idle time before sending keepalive probes'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepintvl', type=int, metavar='SECONDS',
|
||||||
|
help='seconds between TCP keepalive probes'
|
||||||
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--gearman_ssl_ca', metavar='FILE',
|
'--gearman_ssl_ca', metavar='FILE',
|
||||||
help='Gearman SSL certificate authority'
|
help='Gearman SSL certificate authority'
|
||||||
@@ -131,8 +147,7 @@ def main():
|
|||||||
help='Gearman SSL key'
|
help='Gearman SSL key'
|
||||||
)
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--gearman-poll',
|
'--gearman_poll', type=int, metavar='TIME',
|
||||||
dest='gearman_poll', type=int, metavar='TIME',
|
|
||||||
default=1, help='Gearman worker polling timeout'
|
default=1, help='Gearman worker polling timeout'
|
||||||
)
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
|
@@ -70,6 +70,22 @@ def main():
|
|||||||
choices=known_drivers.keys(), default='haproxy',
|
choices=known_drivers.keys(), default='haproxy',
|
||||||
help='type of device to use'
|
help='type of device to use'
|
||||||
)
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepalive', action="store_true",
|
||||||
|
help='use KEEPALIVE to Gearman server'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepcnt', type=int, metavar='COUNT',
|
||||||
|
help='max keepalive probes to send before killing connection'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepidle', type=int, metavar='SECONDS',
|
||||||
|
help='seconds of idle time before sending keepalive probes'
|
||||||
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--gearman_keepintvl', type=int, metavar='SECONDS',
|
||||||
|
help='seconds between TCP keepalive probes'
|
||||||
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--gearman_ssl_ca', dest='gearman_ssl_ca', metavar='FILE',
|
'--gearman_ssl_ca', dest='gearman_ssl_ca', metavar='FILE',
|
||||||
help='Gearman SSL certificate authority'
|
help='Gearman SSL certificate authority'
|
||||||
@@ -83,13 +99,12 @@ def main():
|
|||||||
help='Gearman SSL key'
|
help='Gearman SSL key'
|
||||||
)
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--haproxy-service', dest='haproxy_service',
|
'--haproxy_service',
|
||||||
choices=haproxy_services.keys(), default='ubuntu',
|
choices=haproxy_services.keys(), default='ubuntu',
|
||||||
help='os services to use with HAProxy driver (when used)'
|
help='os services to use with HAProxy driver (when used)'
|
||||||
)
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'-s', '--reconnect_sleep',
|
'-s', '--reconnect_sleep', type=int, metavar='TIME',
|
||||||
dest='reconnect_sleep', type=int, metavar='TIME',
|
|
||||||
default=60, help='seconds to sleep between job server reconnects'
|
default=60, help='seconds to sleep between job server reconnects'
|
||||||
)
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
@@ -98,12 +113,11 @@ def main():
|
|||||||
help='add a Gearman job server to the connection list'
|
help='add a Gearman job server to the connection list'
|
||||||
)
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--stats-poll', dest='stats_poll', type=int, metavar='TIME',
|
'--stats_poll', type=int, metavar='TIME',
|
||||||
default=300, help='statistics polling interval in seconds'
|
default=300, help='statistics polling interval in seconds'
|
||||||
)
|
)
|
||||||
options.parser.add_argument(
|
options.parser.add_argument(
|
||||||
'--gearman-poll',
|
'--gearman_poll', type=int, metavar='TIME',
|
||||||
dest='gearman_poll', type=int, metavar='TIME',
|
|
||||||
default=1, help='Gearman worker polling timeout'
|
default=1, help='Gearman worker polling timeout'
|
||||||
)
|
)
|
||||||
args = options.run()
|
args = options.run()
|
||||||
|
@@ -63,19 +63,20 @@ def config_thread(logger, driver, args):
|
|||||||
hostname = socket.gethostname()
|
hostname = socket.gethostname()
|
||||||
logger.info("[worker] Registering task %s" % hostname)
|
logger.info("[worker] Registering task %s" % hostname)
|
||||||
|
|
||||||
if all([args.gearman_ssl_key, args.gearman_ssl_cert, args.gearman_ssl_ca]):
|
server_list = []
|
||||||
ssl_server_list = []
|
for host_port in args.server:
|
||||||
for host_port in args.server:
|
host, port = host_port.split(':')
|
||||||
host, port = host_port.split(':')
|
server_list.append({'host': host,
|
||||||
ssl_server_list.append({'host': host,
|
'port': int(port),
|
||||||
'port': int(port),
|
'keyfile': args.gearman_ssl_key,
|
||||||
'keyfile': args.gearman_ssl_key,
|
'certfile': args.gearman_ssl_cert,
|
||||||
'certfile': args.gearman_ssl_cert,
|
'ca_certs': args.gearman_ssl_ca,
|
||||||
'ca_certs': args.gearman_ssl_ca})
|
'keepalive': args.gearman_keepalive,
|
||||||
worker = CustomJSONGearmanWorker(ssl_server_list)
|
'keepcnt': args.gearman_keepcnt,
|
||||||
else:
|
'keepidle': args.gearman_keepidle,
|
||||||
worker = CustomJSONGearmanWorker(args.server)
|
'keepintvl': args.gearman_keepintvl})
|
||||||
|
|
||||||
|
worker = CustomJSONGearmanWorker(server_list)
|
||||||
worker.set_client_id(hostname)
|
worker.set_client_id(hostname)
|
||||||
worker.register_task(hostname, handler)
|
worker.register_task(hostname, handler)
|
||||||
worker.logger = logger
|
worker.logger = logger
|
||||||
|
Reference in New Issue
Block a user