122 lines
3.9 KiB
Smarty
Raw Normal View History

Patroni inclusion work for HA Postgres This patchset aims to add HA Clustering support for Postgres. HA Clustering provides automatic failover in the event of the database going down in addition to keeping replicas of the database for rebuilding in the event of a node going down. To achieve this clustering we use [Patroni](https://github.com/zalando/patroni) which offers HA clustering support for Postgres. Patroni is a daemon that runs in the background and keeps track of which node in your cluster is currently the leader node and routes all traffic on the Postgresql endpoint to that node. If the leader node goes down, Patroni holds an election to chose a new leader and updates the endpoint to route traffic accordingly. All communication between nodes is done by a Patroni created endpoint, seperate from the externally facing Postgres endpoint. Note that, although the postgresql helm chart can be upgraded from non-patroni to patroni clustering, the previous `postgresql` endpoints object (which is not directly managed by helm) must be deleted via an out-of-band mechanism so that it may be replaced by the patroni-managed endpoints. If Postgres itself is leveraged for the deployment process, this must be done with careful timing. Note that the old endpoints had a port named "db", and the new endpoints has a port named "postgresql". - Picking up patchset: https://review.openstack.org/#/c/591663 Co-authored-by: Tony Sorrentino <as1413@att.com> Co-authored-by: Randeep Jalli <rj2083@att.com> Co-authored-by: Pete Birley <pete@port.direct> Co-authored-by: Matt McEuen <mm9745@att.com> Change-Id: I721b745017dc1ea7ae05dfd9f8d5dd08d0965985
2019-03-18 18:08:17 +00:00
#!/bin/bash
{{/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
# This script creates the patroni replication user if it doesn't exist.
# This is only needed for brownfield upgrade scenarios, on top of sites that
# were greenfield-deployed with a pre-patroni version of postgres.
#
# For greenfield deployments, the patroni-enabled postgresql chart will
# create this user automatically.
#
# If any additional conversion steps are found to be needed, they can go here.
set -ex
Patroni inclusion work for HA Postgres This patchset aims to add HA Clustering support for Postgres. HA Clustering provides automatic failover in the event of the database going down in addition to keeping replicas of the database for rebuilding in the event of a node going down. To achieve this clustering we use [Patroni](https://github.com/zalando/patroni) which offers HA clustering support for Postgres. Patroni is a daemon that runs in the background and keeps track of which node in your cluster is currently the leader node and routes all traffic on the Postgresql endpoint to that node. If the leader node goes down, Patroni holds an election to chose a new leader and updates the endpoint to route traffic accordingly. All communication between nodes is done by a Patroni created endpoint, seperate from the externally facing Postgres endpoint. Note that, although the postgresql helm chart can be upgraded from non-patroni to patroni clustering, the previous `postgresql` endpoints object (which is not directly managed by helm) must be deleted via an out-of-band mechanism so that it may be replaced by the patroni-managed endpoints. If Postgres itself is leveraged for the deployment process, this must be done with careful timing. Note that the old endpoints had a port named "db", and the new endpoints has a port named "postgresql". - Picking up patchset: https://review.openstack.org/#/c/591663 Co-authored-by: Tony Sorrentino <as1413@att.com> Co-authored-by: Randeep Jalli <rj2083@att.com> Co-authored-by: Pete Birley <pete@port.direct> Co-authored-by: Matt McEuen <mm9745@att.com> Change-Id: I721b745017dc1ea7ae05dfd9f8d5dd08d0965985
2019-03-18 18:08:17 +00:00
function patroni_started() {
HOST=$1
PORT=$2
STATUS=$(timeout 10 bash -c "exec 3<>/dev/tcp/${HOST}/${PORT};
echo -e \"GET / HTTP/1.1\r\nConnection: close\r\n\" >&3;
cat <&3 | tail -n1 | grep -o \"running\"")
[[ x${STATUS} == "xrunning" ]]
}
PGDATABASE=${PGDATABASE:-'postgres'}
PGHOST=${PGHOST:-'127.0.0.1'}
PGPORT={{- tuple "postgresql" "internal" "postgresql" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
PSQL="psql -h ${PGHOST} -p ${PGPORT} -d ${PGDATABASE}"
PVC_MNT={{- .Values.storage.mount.path }}
FILE_MADE_BY_POSTGRES=${PVC_MNT}/pgdata/pg_xlog
FILE_MADE_BY_PATRONI=${PVC_MNT}/pgdata/patroni.dynamic.json
TIMEOUT=0
# Only need to add the user once, on the first replica
if [ "x${POD_NAME}" != "xpostgresql-0" ]; then
echo "Nothing to do on ${POD_NAME}"
exit 0
fi
# Look for a file-based clue that we're migrating from vanilla pg to patroni.
# This is lighter-weight than checking in the database for the user, since
# we have to fire up the database at this point to do the check.
if [[ -e "${FILE_MADE_BY_POSTGRES}" && ! -e "${FILE_MADE_BY_PATRONI}" ]]
then
echo "We are upgrading to Patroni -- checking for replication user"
# Fire up a temporary postgres
/docker-entrypoint.sh postgres &
while ! $PSQL -c "select 1;"; do
sleep 1
if [[ $TIMEOUT -gt 120 ]]; then
exit 1
fi
TIMEOUT=$((TIMEOUT+1))
done
TIMEOUT=0
# Add the replication user if it doesn't exist
USER_COUNT=$(${PSQL} -qt -c \
"SELECT COUNT(*) FROM pg_roles \
WHERE rolname='${PATRONI_REPLICATION_USERNAME}'")
if [ ${USER_COUNT} -eq 0 ]; then
echo "The patroni replication user ${PATRONI_REPLICATION_USERNAME} doesn't exist yet; creating:"
# CREATE ROLE defaults to NOLOGIN not to allow password based login.
# Replication user uses SSL Cert to connect.
${PSQL} -c "CREATE ROLE ${PATRONI_REPLICATION_USERNAME} \
WITH REPLICATION;"
Patroni inclusion work for HA Postgres This patchset aims to add HA Clustering support for Postgres. HA Clustering provides automatic failover in the event of the database going down in addition to keeping replicas of the database for rebuilding in the event of a node going down. To achieve this clustering we use [Patroni](https://github.com/zalando/patroni) which offers HA clustering support for Postgres. Patroni is a daemon that runs in the background and keeps track of which node in your cluster is currently the leader node and routes all traffic on the Postgresql endpoint to that node. If the leader node goes down, Patroni holds an election to chose a new leader and updates the endpoint to route traffic accordingly. All communication between nodes is done by a Patroni created endpoint, seperate from the externally facing Postgres endpoint. Note that, although the postgresql helm chart can be upgraded from non-patroni to patroni clustering, the previous `postgresql` endpoints object (which is not directly managed by helm) must be deleted via an out-of-band mechanism so that it may be replaced by the patroni-managed endpoints. If Postgres itself is leveraged for the deployment process, this must be done with careful timing. Note that the old endpoints had a port named "db", and the new endpoints has a port named "postgresql". - Picking up patchset: https://review.openstack.org/#/c/591663 Co-authored-by: Tony Sorrentino <as1413@att.com> Co-authored-by: Randeep Jalli <rj2083@att.com> Co-authored-by: Pete Birley <pete@port.direct> Co-authored-by: Matt McEuen <mm9745@att.com> Change-Id: I721b745017dc1ea7ae05dfd9f8d5dd08d0965985
2019-03-18 18:08:17 +00:00
echo "done."
else
echo "The patroni replication user ${PATRONI_REPLICATION_USERNAME} already exists: nothing to do."
fi
# Start Patroni to assimilate the postgres
sed "s/POD_IP_PATTERN/${PATRONI_KUBERNETES_POD_IP}/g" \
/tmp/patroni-templated.yaml > /tmp/patroni.yaml
READY_FLAG="i am the leader with the lock"
PATRONI_LOG=/tmp/patroni_conversion.log
/usr/bin/python3 /usr/local/bin/patroni /tmp/patroni-templated.yaml &> ${PATRONI_LOG} &
# Sleep until patroni is running
while ! grep -q "${READY_FLAG}" ${PATRONI_LOG}; do
sleep 5
if [[ $TIMEOUT -gt 24 ]]; then
echo "A timeout occurred. Patroni logs:"
cat ${PATRONI_LOG}
exit 1
fi
TIMEOUT=$((TIMEOUT+1))
done
TIMEOUT=0
# Gracefully stop postgres and patroni
while pkill INT --uid postgres; do
sleep 5
if [[ $TIMEOUT -gt 24 ]]; then
echo "A timeout occurred. Patroni logs:"
cat ${PATRONI_LOG}
exit 1
fi
TIMEOUT=$((TIMEOUT+1))
done
else
echo "Patroni is already in place: nothing to do."
fi