From 8c0ce1c62f73f880ed255b20ea932852288d23e9 Mon Sep 17 00:00:00 2001
From: Kevin Carter <kevin.carter@rackspace.com>
Date: Thu, 16 Nov 2017 11:59:21 -0600
Subject: [PATCH] Change the galera health check for better cluster health

The current galera cluster health check simply logs into a cluster node
but does not check if the node is sync'd. This can lead to an issue
where a node is placed back into the pool before it is ready. If this
happens it can lead to a broken OpenStack environment until the wsrep
received queue is processed which is especially true if the node out of
sync happens to be the primary.

Combined backport of:
- https://review.openstack.org/520673
- https://review.openstack.org/523854
- https://review.openstack.org/524107

Closes-Bug: #1665667
Change-Id: I49e371a2743618a0b5544a23e892aa28bb8567eb
Depends-On: I81c924464aa4b19c2a62f37b5bf26c3c0453786a
Depends-On: Ie1b3b9724dd33de1d90634166e585ecceb1f4c96
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
---
 ansible-role-requirements.yml | 4 ++--
 group_vars/all/haproxy.yml    | 3 ++-
 group_vars/galera_all.yml     | 5 +++++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/ansible-role-requirements.yml b/ansible-role-requirements.yml
index a2c540974c..31a35dee10 100644
--- a/ansible-role-requirements.yml
+++ b/ansible-role-requirements.yml
@@ -17,7 +17,7 @@
 - name: galera_server
   scm: git
   src: https://git.openstack.org/openstack/openstack-ansible-galera_server
-  version: b124e06872ebeca7d81cb22fb80ae97a995b07a8
+  version: ed739a5243b59596455b3488471c8cd81c15acf5
 - name: ceph_client
   scm: git
   src: https://git.openstack.org/openstack/openstack-ansible-ceph_client
@@ -25,7 +25,7 @@
 - name: haproxy_server
   scm: git
   src: https://git.openstack.org/openstack/openstack-ansible-haproxy_server
-  version: a905aaed8627f59d9dc10b9bc031589a7c65828f
+  version: 9b0ec183547a663f8a386375eaeecfad55fc7d73
 - name: keepalived
   scm: git
   src: https://github.com/evrardjp/ansible-keepalived
diff --git a/group_vars/all/haproxy.yml b/group_vars/all/haproxy.yml
index 1cc446e5b3..29edd71b98 100644
--- a/group_vars/all/haproxy.yml
+++ b/group_vars/all/haproxy.yml
@@ -19,11 +19,12 @@ haproxy_default_services:
       haproxy_backup_nodes: "{{ groups['galera_all'][1:] | default([]) }}"
       haproxy_bind: "{{ [internal_lb_vip_address] }}"
       haproxy_port: 3306
+      haproxy_check_port: 9200
       haproxy_balance_type: tcp
       haproxy_timeout_client: 5000s
       haproxy_timeout_server: 5000s
       haproxy_backend_options:
-        - "mysql-check user {{ galera_monitoring_user }}"
+        - "httpchk HEAD /"
       haproxy_whitelist_networks: "{{ haproxy_galera_whitelist_networks }}"
   - service:
       haproxy_service_name: repo_git
diff --git a/group_vars/galera_all.yml b/group_vars/galera_all.yml
index 73c15b7bb5..ca5bc910eb 100644
--- a/group_vars/galera_all.yml
+++ b/group_vars/galera_all.yml
@@ -26,3 +26,8 @@ galera_container_bind_mounts:
 # Disable PrivateDevices for MariaDB on CentOS 7
 # See https://bugs.launchpad.net/openstack-ansible/+bug/1697531 for details.
 galera_disable_privatedevices: "{{ ((properties.is_metal | default(false)) | bool) | ternary('false', 'true') }}"
+
+# By default galera_monitoring xinetd app is open to 0.0.0.0/0
+# This makes sure the monitoring is only restricted to the necessary nodes:
+# the load balancers, and the galera nodes.
+galera_monitoring_allowed_source: "{% for node in groups['galera_all'] + groups['haproxy_all'] %}{{ hostvars[node]['ansible_host'] }} {% endfor %} 127.0.0.1"