From e965239d60b6e2b76aeca0c0aa8ef8998029bbdd Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Thu, 11 Jun 2020 13:45:55 +0200 Subject: [PATCH] Ensure redis_tls_proxy starts after all redis instances When converting a HA control plane to TLS-e, 1) the bootstrap node tells pacemaker to restart all redis instances to take into account the new TLS-e config; 2) a new container redis_tls_proxy is started on every controller to encapsulate redis traffic in TLS tunnels. This happens during step 2. Redis servers have to be restarted everywhere for redis_tls_proxy to be able to start tunnels properly. Since we can't guarantee that across several nodes during the same step, tweak the startup of redis_tls_proxy instead; make sure to only create the tunnels once the targeted host:port can be bound (i.e. redis was restarted). Change-Id: I70560f80775dacddd82262e8079c13f86b0eb0e6 Closes-Bug: #1883096 (cherry picked from commit b91a1a09cb25f042adb7094947eb3416c80af88f) --- container_config_scripts/wait-port-and-run.sh | 18 ++++++++++++++++++ deployment/containers-common.yaml | 3 +++ .../database/redis-pacemaker-puppet.yaml | 10 +++++++++- tools/yaml-validate.py | 4 ++++ 4 files changed, 34 insertions(+), 1 deletion(-) create mode 100755 container_config_scripts/wait-port-and-run.sh diff --git a/container_config_scripts/wait-port-and-run.sh b/container_config_scripts/wait-port-and-run.sh new file mode 100755 index 0000000000..817c64ad41 --- /dev/null +++ b/container_config_scripts/wait-port-and-run.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -eu + +HOST=$1 +PORT=$2 + +echo "$(date -u): Checking whether we can bind to ${HOST}:${PORT}" +while (ss -Htnl src "${HOST}" "sport = :${PORT}" | grep -wq "${PORT}"); do + echo "$(date -u): ${HOST}:${PORT} still in use, waiting..."; + sleep 10; +done + +shift 2 +COMMAND="$*" +if [ -z "${COMMAND}" ]; then + COMMAND="true" +fi +exec $COMMAND diff --git a/deployment/containers-common.yaml b/deployment/containers-common.yaml index 725b82b3e7..7aa9faaaca 100644 --- a/deployment/containers-common.yaml +++ b/deployment/containers-common.yaml @@ -118,6 +118,9 @@ outputs: template: { get_file: ../container_config_scripts/pacemaker_wait_bundle.sh } params: __PCMKTIMEOUT__: {get_param: PcmkConfigRestartTimeout} + wait-port-and-run.sh: + mode: "0755" + content: { get_file: ../container_config_scripts/wait-port-and-run.sh } volumes_base: description: Base volume list diff --git a/deployment/database/redis-pacemaker-puppet.yaml b/deployment/database/redis-pacemaker-puppet.yaml index e38be5b4d5..744ae6bd7a 100644 --- a/deployment/database/redis-pacemaker-puppet.yaml +++ b/deployment/database/redis-pacemaker-puppet.yaml @@ -193,7 +193,14 @@ outputs: owner: redis:redis recurse: true /var/lib/kolla/config_files/redis_tls_proxy.json: - command: stunnel /etc/stunnel/stunnel.conf + command: + # Note: kolla doesn't process string arguments as expected, + # so use a bash idiom to achieve the same result + str_replace: + template: + bash -c $* -- eval /wait-port-and-run.sh $(hiera fqdn_$NETWORK) 6379 stunnel /etc/stunnel/stunnel.conf + params: + $NETWORK: {get_param: [ServiceNetMap, RedisNetwork]} config_files: - source: "/var/lib/kolla/config_files/src/*" dest: "/" @@ -277,6 +284,7 @@ outputs: - /var/lib/config-data/puppet-generated/redis:/var/lib/kolla/config_files/src:ro - /etc/pki/tls/certs/redis.crt:/var/lib/kolla/config_files/src-tls/etc/pki/tls/certs/redis.crt:ro - /etc/pki/tls/private/redis.key:/var/lib/kolla/config_files/src-tls/etc/pki/tls/private/redis.key:ro + - /var/lib/container-config-scripts/wait-port-and-run.sh:/wait-port-and-run.sh:ro environment: KOLLA_CONFIG_STRATEGY: COPY_ALWAYS - {} diff --git a/tools/yaml-validate.py b/tools/yaml-validate.py index 44629e82aa..d2cb144b2b 100755 --- a/tools/yaml-validate.py +++ b/tools/yaml-validate.py @@ -963,6 +963,10 @@ def validate_service_hiera_interpol(f, tpl): if enter_lists and path[-1] != 0 and path[-2] != 'get_param': continue + # Omit if it is not a hiera config setting + if path[1] in ['kolla_config']: + continue + path_str = ';'.join(str(x) for x in path) # NOTE(bogdando): Omit foo_network keys looking like a network # name. The only exception is allow anything under