From 5717bd79525e2eb5b14c2d365dd59b63d7a63066 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Mon, 15 Jun 2020 11:05:06 +0200 Subject: [PATCH] Use leastcon and socket-level TCP keep-alives for Heat API According to the HAProxy docs, when the protocol involves very long sessions with long idle periods (eg: querying Heat API for large resources), there is a risk that one of the intermediate components decides to expire a session which has remained idle for too long. In some NFV cases with hundreds of VM/port resources, multiple API requests are being sent in parallel towards the Heat API service to retrieve the OS::Nova::Server resources from an big Heat stack, and this is causing the Heat API backends to be unavailable and requests to fail. This also ends up with all of its backends considered down by HAProxy, leaving the system in a cascading failure scenario: xx:12:09 overcloud-ctrl-0 haproxy[13]: Server heat_api/overcloud-ctrl-1.internalapi is DOWN, reason: Layer7 timeout, check duration: 10001ms. 2 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. xx:12:09 overcloud-ctrl-0 haproxy[13]: Server heat_api/overcloud-ctrl-2.internalapi is DOWN, reason: Layer7 timeout, check duration: 10001ms. 1 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. xx:12:09 overcloud-ctrl-0 haproxy[13]: Server heat_api/overcloud-ctrl-0.internalapi is DOWN, reason: Layer7 timeout, check duration: 10001ms. 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. xx:12:09 overcloud-ctrl-0 haproxy[13]: proxy heat_api has no server available! xx:13:55 overcloud-ctrl-0 haproxy[13]: Server heat_api/overcloud-ctrl-1.internalapi is UP, reason: Layer7 check passed, code: 200, info: "OK", check duration: 1ms. 1 active and 0 backup servers online. 0 sessions requeued, 0 total in queue. xx:13:55 overcloud-ctrl-0 haproxy[13]: Server heat_api/overcloud-ctrl-2.internalapi is UP, reason: Layer7 check passed, code: 200, info: "OK", check duration: 2ms. 2 active and 0 backup servers online. 0 sessions requeued, 0 total in queue. xx:13:56 overcloud-ctrl-0 haproxy[13]: Server heat_api/overcloud-ctrl-0.internalapi is UP, reason: Layer7 check passed, code: 200, info: "OK", check duration: 1ms. 3 active and 0 backup servers online. 0 sessions requeued, 0 total in queue. Mitigation steps proposed: * Enabling socket-level TCP keep-alives makes the system regularly send packets to the other end of the connection, leaving it active. * tl;dr - round-robin LB does not fit scenarios with cascading failures. Enabling leastcon LB makes the cascading failure to happen less likely, when high numbers of client connections become aligned by real counts instead of the numbers-unaware round-robin rotation. * the default balance algorithm for Heat API therefore becomes 'leastconn' instead of 'roundrobin' (this is controlled by a new parameter). Cascading failures (when backends go down one-by-one) result in unfair distribution of load, consider the following example: - do round-robin of a 100 connections amongst 3 backends (normal operation) -> 34/33/33, - ... another 100 but among only 2 (a 3-1 failure) -> 84/83/-, - ... another 100 in a cascading failure -> 184/-/-, - ... +100, after one more gets recovered -> 214/33/- - ... +100, after all recovered -> 244/63/33 (repeat until all goes down after the 1st backend takes enormous number of connections) Partial-Bug: #1882927 Change-Id: I5b85675c97a899b94c78ba9e19865a156e054fcb --- manifests/haproxy.pp | 16 ++++++++++++++-- ...oxy-leastconn-overrides-bdb2068ef794ff1d.yaml | 11 +++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/haproxy-leastconn-overrides-bdb2068ef794ff1d.yaml diff --git a/manifests/haproxy.pp b/manifests/haproxy.pp index 276ac6300..a930ef3bc 100644 --- a/manifests/haproxy.pp +++ b/manifests/haproxy.pp @@ -63,6 +63,12 @@ # If you enter an already existing key, it will override the default. # Defaults to {} # +# [*haproxy_lb_mode_longrunning*] +# HAProxy LB mode to use with the services the clients of which may have the notion +# of the longrunning requests, like RPC or just API requests that take time. +# The HAProxy's default roundrobin balance algorithm can be replaced with it. +# Defaults to "leastconn". +# # [*haproxy_defaults_override*] # HAProxy defaults option we can append to the default base set in this class. # If you enter an already existing key, it will override the default. @@ -559,6 +565,7 @@ class tripleo::haproxy ( $activate_httplog = false, $haproxy_globals_override = {}, $haproxy_defaults_override = {}, + $haproxy_lb_mode_longrunning = 'leastconn', $haproxy_daemon = true, $haproxy_socket_access_level = 'user', $haproxy_stats_user = 'admin', @@ -1136,6 +1143,10 @@ class tripleo::haproxy ( 'timeout client' => '10m', 'timeout server' => '10m', } + $heat_durability_options = { + 'option' => [ 'tcpka' ], + 'balance' => $haproxy_lb_mode_longrunning, + } if $service_certificate { $heat_ssl_options = { 'rsprep' => "^Location:\\ http://${public_virtual_ip}(.*) Location:\\ https://${public_virtual_ip}\\1", @@ -1144,6 +1155,7 @@ class tripleo::haproxy ( } else { $heat_options = merge($default_listen_options, $heat_timeout_options) } + $heat_options_real = merge($heat_options, $heat_durability_options) if $heat_api { ::tripleo::haproxy::endpoint { 'heat_api': @@ -1153,7 +1165,7 @@ class tripleo::haproxy ( ip_addresses => $heat_ip_addresses, server_names => hiera('heat_api_node_names', $controller_hosts_names_real), mode => 'http', - listen_options => $heat_options, + listen_options => $heat_options_real, public_ssl_port => $ports[heat_api_ssl_port], service_network => $heat_api_network, member_options => union($haproxy_member_options, $internal_tls_member_options), @@ -1168,7 +1180,7 @@ class tripleo::haproxy ( ip_addresses => $heat_ip_addresses, server_names => hiera('heat_api_node_names', $controller_hosts_names_real), mode => 'http', - listen_options => $heat_options, + listen_options => $heat_options_real, public_ssl_port => $ports[heat_cfn_ssl_port], service_network => $heat_cfn_network, member_options => union($haproxy_member_options, $internal_tls_member_options), diff --git a/releasenotes/notes/haproxy-leastconn-overrides-bdb2068ef794ff1d.yaml b/releasenotes/notes/haproxy-leastconn-overrides-bdb2068ef794ff1d.yaml new file mode 100644 index 000000000..7cce3d861 --- /dev/null +++ b/releasenotes/notes/haproxy-leastconn-overrides-bdb2068ef794ff1d.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Add `haproxy_lb_mode_longrunning` parameter for the use with the services + that may have longrunning API requests. Defaults to 'leastconn' (replaces + the HAProxy default 'roundrobin'). +upgrade: + - | + Since Heat API can be given longrunning API requests its backends will + become load-balanced based on LRU 'leastconn' algorithm and its sessions + will also benefit from the TCP-keepalive feature of HAProxy.