From 1b15e1ef22963324e556e072f4bc58021f96bfec Mon Sep 17 00:00:00 2001 From: Gregory Thiemonge Date: Mon, 23 Aug 2021 19:53:28 +0200 Subject: [PATCH] Preserve haproxy server states during reloads haproxy doesn't keep the state of the servers (UP or DOWN) during reloads, so a member with an ERROR operating_status may be updated to ONLINE after updating a load balancer. This commit adds the load-server-state-from-file option in haproxy configuration files. It also adds an extra step in the haproxy systemd service file to dump the current server-state in a file when reloading the service. Story: 2009142 Task: 43084 Change-Id: Ia8ec48ab858eeecf71ed429e5b7625fd7af9d8f6 --- .../backends/agent/api_server/loadbalancer.py | 2 ++ .../api_server/templates/systemd.conf.j2 | 1 + .../backends/agent/api_server/util.py | 4 +++ .../haproxy/combined_listeners/jinja_cfg.py | 4 +++ .../combined_listeners/templates/base.j2 | 1 + .../combined_listeners/templates/macros.j2 | 1 + .../api_server/test_haproxy_compatibility.py | 2 ++ .../combined_listeners/test_jinja_cfg.py | 34 +++++++++++++++++++ .../sample_configs/sample_configs_combined.py | 5 ++- ...ing-status-on-reload-fe3688603bae8726.yaml | 5 +++ 10 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/fix-member-operating-status-on-reload-fe3688603bae8726.yaml diff --git a/octavia/amphorae/backends/agent/api_server/loadbalancer.py b/octavia/amphorae/backends/agent/api_server/loadbalancer.py index 713842ba1d..57940115e9 100644 --- a/octavia/amphorae/backends/agent/api_server/loadbalancer.py +++ b/octavia/amphorae/backends/agent/api_server/loadbalancer.py @@ -189,6 +189,8 @@ class Loadbalancer(object): haproxy_pid=util.pid_path(lb_id), haproxy_cmd=util.CONF.haproxy_amphora.haproxy_cmd, haproxy_cfg=util.config_path(lb_id), + haproxy_state_file=util.state_file_path(lb_id), + haproxy_socket=util.haproxy_sock_path(lb_id), haproxy_user_group_cfg=consts.HAPROXY_USER_GROUP_CFG, respawn_count=util.CONF.haproxy_amphora.respawn_count, respawn_interval=(util.CONF.haproxy_amphora. diff --git a/octavia/amphorae/backends/agent/api_server/templates/systemd.conf.j2 b/octavia/amphorae/backends/agent/api_server/templates/systemd.conf.j2 index 545486e3a7..3ccc69489f 100644 --- a/octavia/amphorae/backends/agent/api_server/templates/systemd.conf.j2 +++ b/octavia/amphorae/backends/agent/api_server/templates/systemd.conf.j2 @@ -13,6 +13,7 @@ Environment="CONFIG={{ haproxy_cfg }}" "USERCONFIG={{ haproxy_user_group_cfg }}" ExecStartPre={{ haproxy_cmd }} -f $CONFIG -f $USERCONFIG -c -q -L {{ peer_name }} +ExecReload=/bin/sh -c "echo 'show servers state' | socat stdio unix-connect:{{ haproxy_socket }} > {{ haproxy_state_file }}" ExecReload={{ haproxy_cmd }} -c -f $CONFIG -f $USERCONFIG -L {{ peer_name }} ExecReload=/bin/kill -USR2 $MAINPID diff --git a/octavia/amphorae/backends/agent/api_server/util.py b/octavia/amphorae/backends/agent/api_server/util.py index 35382b2c9a..c648ee6488 100644 --- a/octavia/amphorae/backends/agent/api_server/util.py +++ b/octavia/amphorae/backends/agent/api_server/util.py @@ -114,6 +114,10 @@ def config_path(lb_id): return os.path.join(haproxy_dir(lb_id), 'haproxy.cfg') +def state_file_path(lb_id): + return os.path.join(haproxy_dir(lb_id), 'servers-state') + + def get_haproxy_pid(lb_id): with open(pid_path(lb_id), 'r', encoding='utf-8') as f: return f.readline().rstrip() diff --git a/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py b/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py index 6dfb72ba96..f746fc67ef 100644 --- a/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py +++ b/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py @@ -162,11 +162,15 @@ class JinjaTemplater(object): if not socket_path: socket_path = '%s/%s.sock' % (self.base_amp_path, listeners[0].load_balancer.id) + state_file_path = '%s/%s/servers-state' % ( + self.base_amp_path, + listeners[0].load_balancer.id) return self._get_template().render( {'loadbalancer': loadbalancer, 'stats_sock': socket_path, 'log_http': self.log_http, 'log_server': self.log_server, + 'state_file': state_file_path, 'administrative_log_facility': CONF.amphora_agent.administrative_log_facility, 'user_log_facility': CONF.amphora_agent.user_log_facility, diff --git a/octavia/common/jinja/haproxy/combined_listeners/templates/base.j2 b/octavia/common/jinja/haproxy/combined_listeners/templates/base.j2 index 17fadb5b7d..41f812a3c0 100644 --- a/octavia/common/jinja/haproxy/combined_listeners/templates/base.j2 +++ b/octavia/common/jinja/haproxy/combined_listeners/templates/base.j2 @@ -20,6 +20,7 @@ global log {{ log_http | default('/run/rsyslog/octavia/log', true)}} local{{ user_log_facility }} log {{ log_server | default('/run/rsyslog/octavia/log', true)}} local{{ administrative_log_facility }} notice stats socket {{ sock_path }} mode 0666 level user + server-state-file {{ state_file }} {% if loadbalancer.global_connection_limit is defined %} maxconn {{ loadbalancer.global_connection_limit }} {% endif %} diff --git a/octavia/common/jinja/haproxy/combined_listeners/templates/macros.j2 b/octavia/common/jinja/haproxy/combined_listeners/templates/macros.j2 index 69cb78ba42..2778d34e63 100644 --- a/octavia/common/jinja/haproxy/combined_listeners/templates/macros.j2 +++ b/octavia/common/jinja/haproxy/combined_listeners/templates/macros.j2 @@ -340,6 +340,7 @@ backend {{ pool.id }}:{{ listener.id }} {% endif %} {% endif %} {% if pool.health_monitor and pool.health_monitor.enabled %} + load-server-state-from-file global timeout check {{ pool.health_monitor.timeout }}s {% if (pool.health_monitor.type == constants.HEALTH_MONITOR_HTTP or pool.health_monitor.type == diff --git a/octavia/tests/unit/amphorae/backends/agent/api_server/test_haproxy_compatibility.py b/octavia/tests/unit/amphorae/backends/agent/api_server/test_haproxy_compatibility.py index 80c872e832..3e19c646eb 100644 --- a/octavia/tests/unit/amphorae/backends/agent/api_server/test_haproxy_compatibility.py +++ b/octavia/tests/unit/amphorae/backends/agent/api_server/test_haproxy_compatibility.py @@ -31,6 +31,8 @@ class HAProxyCompatTestCase(base.TestCase): " log /run/rsyslog/octavia/log local1 notice\n" " stats socket /var/lib/octavia/sample_loadbalancer_id_1.sock" " mode 0666 level user\n" + " server-state-file /var/lib/octavia/sample_loadbalancer_id_1" + "/servers-state\n" " maxconn {maxconn}\n\n" "defaults\n" " log global\n" diff --git a/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py b/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py index 65ea92511c..1ba699f16b 100644 --- a/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py +++ b/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py @@ -64,6 +64,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -118,6 +119,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -168,6 +170,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -223,6 +226,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -276,6 +280,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -329,6 +334,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -380,6 +386,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -415,6 +422,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -441,6 +449,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -477,6 +486,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -512,6 +522,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -540,6 +551,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -580,6 +592,7 @@ class TestHaproxyCfg(base.TestCase): " mode tcp\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -616,6 +629,7 @@ class TestHaproxyCfg(base.TestCase): " mode tcp\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option ssl-hello-chk\n" " fullconn {maxconn}\n" @@ -683,6 +697,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option external-check\n" " external-check command /var/lib/octavia/ping-wrapper.sh\n" @@ -743,6 +758,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.1\\r\\nHost:\\ " "testlab.com\n" @@ -820,6 +836,7 @@ class TestHaproxyCfg(base.TestCase): " balance roundrobin\n" " stick-table type ip size 10k\n" " stick on src\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -847,6 +864,7 @@ class TestHaproxyCfg(base.TestCase): " stick-table type string len 64 size 10k\n" " stick store-response res.cook(JSESSIONID)\n" " stick match req.cook(JSESSIONID)\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -970,6 +988,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -986,6 +1005,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /healthmon.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1007,6 +1027,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1035,6 +1056,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1065,6 +1087,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " fullconn {maxconn}\n" " option allbackups\n" @@ -1092,6 +1115,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1132,6 +1156,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1171,6 +1196,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1211,6 +1237,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1248,6 +1275,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1281,6 +1309,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1311,6 +1340,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1527,6 +1557,7 @@ class TestHaproxyCfg(base.TestCase): " http-reuse safe\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " fullconn {maxconn}\n" " option allbackups\n" @@ -1555,6 +1586,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " fullconn {maxconn}\n" " option allbackups\n" @@ -1628,6 +1660,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1643,6 +1676,7 @@ class TestHaproxyCfg(base.TestCase): " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /healthmon.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" diff --git a/octavia/tests/unit/common/sample_configs/sample_configs_combined.py b/octavia/tests/unit/common/sample_configs/sample_configs_combined.py index fc35cc79e2..d608f6c972 100644 --- a/octavia/tests/unit/common/sample_configs/sample_configs_combined.py +++ b/octavia/tests/unit/common/sample_configs/sample_configs_combined.py @@ -1213,6 +1213,7 @@ def sample_base_expected_config(frontend=None, logging=None, backend=None, " mode http\n" " balance roundrobin\n" " cookie SRV insert indirect nocache\n" + " load-server-state-from-file global\n" " timeout check 31s\n" " option httpchk GET /index.html HTTP/1.0\\r\\n\n" " http-check expect rstatus 418\n" @@ -1246,5 +1247,7 @@ def sample_base_expected_config(frontend=None, logging=None, backend=None, " log /run/rsyslog/octavia/log local0\n" " log /run/rsyslog/octavia/log local1 notice\n" " stats socket /var/lib/octavia/sample_loadbalancer_id_1.sock" - " mode 0666 level user\n" + + " mode 0666 level user\n" + " server-state-file /var/lib/octavia/sample_loadbalancer_id_1" + "/servers-state\n" + global_opts + defaults + peers + frontend + logging + backend) diff --git a/releasenotes/notes/fix-member-operating-status-on-reload-fe3688603bae8726.yaml b/releasenotes/notes/fix-member-operating-status-on-reload-fe3688603bae8726.yaml new file mode 100644 index 0000000000..1dab5c48d9 --- /dev/null +++ b/releasenotes/notes/fix-member-operating-status-on-reload-fe3688603bae8726.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixed an issue with members in ERROR operating status that may have been + updated briefly to ONLINE during a Load Balancer configuration change.