From 0799782ce83d1057f262b44c979a15f9a1b05c72 Mon Sep 17 00:00:00 2001 From: Michal Nasiadka Date: Thu, 12 Dec 2019 13:19:48 +0100 Subject: [PATCH] Fix keystone fernet bootstrap There are cases when a multinode deployment ends up in unusable keystone public wsgi on some nodes. The root cause is that keystone public wsgi doesn't find fernet keys on startup - and then persists on sending 500 errors to any requests - due to a race condition between fernet_setup/fernet-push.sh and keystone startup. Depends-On: https://review.opendev.org/703742/ Change-Id: I63709c2e3f6a893db82a05640da78f492bf8440f Closes-Bug: #1846789 --- .../keystone/tasks/bootstrap_service.yml | 38 ++++++++++++++++++- ansible/roles/keystone/tasks/deploy.yml | 2 +- .../keystone/tasks/distribute_fernet.yml | 19 ++++++++++ ansible/roles/keystone/tasks/init_fernet.yml | 27 ------------- .../keystone/templates/fernet-node-sync.sh.j2 | 20 +++++++++- .../keystone/templates/fernet-push.sh.j2 | 3 ++ .../keystone/templates/fernet-rotate.sh.j2 | 3 ++ ...fix-fernet-bootstrap-36f87e36e4dc6ec9.yaml | 6 +++ 8 files changed, 87 insertions(+), 31 deletions(-) create mode 100644 ansible/roles/keystone/tasks/distribute_fernet.yml delete mode 100644 ansible/roles/keystone/tasks/init_fernet.yml create mode 100644 releasenotes/notes/bug-1846789-fix-fernet-bootstrap-36f87e36e4dc6ec9.yaml diff --git a/ansible/roles/keystone/tasks/bootstrap_service.yml b/ansible/roles/keystone/tasks/bootstrap_service.yml index 0a296fb2b8..8a20685edd 100644 --- a/ansible/roles/keystone/tasks/bootstrap_service.yml +++ b/ansible/roles/keystone/tasks/bootstrap_service.yml @@ -1,4 +1,15 @@ --- +- name: Checking for any running keystone_fernet containers + become: true + kolla_container_facts: + name: + - keystone_fernet + register: container_facts + +- name: Group nodes where keystone_fernet is running + group_by: + key: keystone_fernet_{{ container_facts['keystone_fernet'].State | default('bootstrap') }} + - name: Running Keystone bootstrap container vars: keystone: "{{ keystone_services.keystone }}" @@ -17,4 +28,29 @@ restart_policy: no volumes: "{{ keystone.volumes|reject('equalto', '')|list }}" run_once: True - delegate_to: "{{ groups['keystone'][0] }}" + +- name: Running Keystone fernet bootstrap container + vars: + keystone_fernet: "{{ keystone_services['keystone-fernet'] }}" + become: true + kolla_docker: + action: "start_container" + common_options: "{{ docker_common_options }}" + detach: False + environment: + KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}" + image: "{{ keystone_fernet.image }}" + labels: + BOOTSTRAP: + command: > + bash -c 'sudo -E kolla_set_configs && + keystone-manage --config-file /etc/keystone/keystone.conf + fernet_setup --keystone-user {{ keystone_username }} --keystone-group {{ keystone_groupname }} && ls -l /etc/keystone/fernet-keys/' + name: "bootstrap_keystone_fernet" + restart_policy: no + volumes: "{{ keystone_fernet.volumes|reject('equalto', '')|list }}" + run_once: True + delegate_to: "{{ groups['keystone_fernet_bootstrap'][0] }}" + when: + - keystone_token_provider == 'fernet' + - groups['keystone_fernet_running'] is not defined diff --git a/ansible/roles/keystone/tasks/deploy.yml b/ansible/roles/keystone/tasks/deploy.yml index cf24dfb30b..7a73af2b53 100644 --- a/ansible/roles/keystone/tasks/deploy.yml +++ b/ansible/roles/keystone/tasks/deploy.yml @@ -10,7 +10,7 @@ - name: Flush handlers meta: flush_handlers -- include_tasks: init_fernet.yml +- include_tasks: distribute_fernet.yml when: - keystone_token_provider == 'fernet' diff --git a/ansible/roles/keystone/tasks/distribute_fernet.yml b/ansible/roles/keystone/tasks/distribute_fernet.yml new file mode 100644 index 0000000000..184acac840 --- /dev/null +++ b/ansible/roles/keystone/tasks/distribute_fernet.yml @@ -0,0 +1,19 @@ +--- +- name: Waiting for Keystone SSH port to be UP + wait_for: + host: "{{ api_interface_address }}" + port: "{{ keystone_ssh_port }}" + connect_timeout: 1 + register: check_keystone_ssh_port + until: check_keystone_ssh_port is success + retries: 10 + delay: 5 + +- name: Run key distribution + become: true + command: docker exec -t keystone_fernet /usr/bin/fernet-push.sh + run_once: True + delegate_to: >- + {% if groups['keystone_fernet_running'] is defined -%} + {{ groups['keystone_fernet_running'][0] }} + {%- else -%}{{ groups['keystone'][0] }}{%- endif %} diff --git a/ansible/roles/keystone/tasks/init_fernet.yml b/ansible/roles/keystone/tasks/init_fernet.yml deleted file mode 100644 index 9fa0769468..0000000000 --- a/ansible/roles/keystone/tasks/init_fernet.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -- name: Waiting for Keystone SSH port to be UP - wait_for: - host: "{{ api_interface_address }}" - port: "{{ keystone_ssh_port }}" - connect_timeout: 1 - register: check_keystone_ssh_port - until: check_keystone_ssh_port is success - retries: 10 - delay: 5 - -- name: Initialise fernet key authentication - become: true - command: "docker exec -t keystone_fernet kolla_keystone_bootstrap {{ keystone_username }} {{ keystone_groupname }}" - register: fernet_create - changed_when: fernet_create.stdout.find('localhost | SUCCESS => ') != -1 and (fernet_create.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed - until: fernet_create.stdout.split()[2] == 'SUCCESS' or fernet_create.stdout.find('Key repository is already initialized') != -1 - retries: 10 - delay: 5 - run_once: True - delegate_to: "{{ groups['keystone'][0] }}" - -- name: Run key distribution - become: true - command: docker exec -t keystone_fernet /usr/bin/fernet-push.sh - run_once: True - delegate_to: "{{ groups['keystone'][0] }}" diff --git a/ansible/roles/keystone/templates/fernet-node-sync.sh.j2 b/ansible/roles/keystone/templates/fernet-node-sync.sh.j2 index 050198f0d8..7556eed518 100644 --- a/ansible/roles/keystone/templates/fernet-node-sync.sh.j2 +++ b/ansible/roles/keystone/templates/fernet-node-sync.sh.j2 @@ -4,10 +4,26 @@ set -o errexit set -o pipefail # Get data on the fernet tokens -TOKEN_CHECK=$(/usr/bin/python{{ distro_python_version }} /usr/bin/fetch_fernet_tokens.py -t {{ fernet_token_expiry }} -n {{ (groups['keystone'] | length) + 1 }}) +# NOTE(mnasiadka): Check for existence of at least two tokens (should exist after bootstrap) +TOKEN_CHECK=$(/usr/bin/python{{ distro_python_version }} /usr/bin/fetch_fernet_tokens.py -t {{ fernet_token_expiry }} -n 2) + +# Ensure tokens are populated +n=0 +while /usr/bin/python{{ distro_python_version }} /usr/bin/fetch_fernet_tokens.py -t 86400 -n 1 | grep -q '"populated": false'; do + if [ $n -lt 10 ]; then + n=$(( n + 1 )) + echo "ERROR: Fernet tokens have not been populated, rechecking in 1 minute" + echo "DEBUG: /etc/keystone/fernet-keys contents:" + ls -l /etc/keystone/fernet-keys/ + sleep 60 + else + echo "CRITICAL: Waited for 10 minutes - failing" + exit 1 + fi +done # Ensure the primary token exists and is not stale -if $(echo "$TOKEN_CHECK" | grep -q '"update_required":"false"'); then +if $(echo "$TOKEN_CHECK" | grep -q '"update_required": false'); then exit 0; fi diff --git a/ansible/roles/keystone/templates/fernet-push.sh.j2 b/ansible/roles/keystone/templates/fernet-push.sh.j2 index 6179cb2a81..5aeda018fc 100644 --- a/ansible/roles/keystone/templates/fernet-push.sh.j2 +++ b/ansible/roles/keystone/templates/fernet-push.sh.j2 @@ -1,5 +1,8 @@ #!/bin/bash +set -o errexit +set -o pipefail + {% for host in groups['keystone'] %} {% if inventory_hostname != host %} /usr/bin/rsync -az -e 'ssh -i /var/lib/keystone/.ssh/id_rsa -p {{ hostvars[host]['keystone_ssh_port'] }} -F /var/lib/keystone/.ssh/config' --delete /etc/keystone/fernet-keys/ keystone@{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:/etc/keystone/fernet-keys diff --git a/ansible/roles/keystone/templates/fernet-rotate.sh.j2 b/ansible/roles/keystone/templates/fernet-rotate.sh.j2 index 3ef7a0e63c..a67aa19e26 100644 --- a/ansible/roles/keystone/templates/fernet-rotate.sh.j2 +++ b/ansible/roles/keystone/templates/fernet-rotate.sh.j2 @@ -1,5 +1,8 @@ #!/bin/bash +set -o errexit +set -o pipefail + keystone-manage --config-file /etc/keystone/keystone.conf fernet_rotate --keystone-user {{ keystone_username }} --keystone-group {{ keystone_groupname }} /usr/bin/fernet-push.sh diff --git a/releasenotes/notes/bug-1846789-fix-fernet-bootstrap-36f87e36e4dc6ec9.yaml b/releasenotes/notes/bug-1846789-fix-fernet-bootstrap-36f87e36e4dc6ec9.yaml new file mode 100644 index 0000000000..c60bd36344 --- /dev/null +++ b/releasenotes/notes/bug-1846789-fix-fernet-bootstrap-36f87e36e4dc6ec9.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Rework keystone fernet bootstrap which had tendencies to fail on multinode + setups. See `bug 1846789 `__ for + details.