diff --git a/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml b/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml index ef12548ff..151be69ca 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml +++ b/tripleo_ansible/roles/tripleo-container-manage/defaults/main.yml @@ -26,3 +26,4 @@ tripleo_container_manage_config_patterns: 'hashed-*.json' tripleo_container_manage_debug: false tripleo_container_manage_healthcheck_disabled: false tripleo_container_manage_log_path: '/var/log/containers/stdouts' +tripleo_container_manage_systemd_order: false diff --git a/tripleo_ansible/roles/tripleo-container-manage/files/91-netns-placeholder-preset b/tripleo_ansible/roles/tripleo-container-manage/files/91-netns-placeholder-preset new file mode 100644 index 000000000..8bf2ba2b0 --- /dev/null +++ b/tripleo_ansible/roles/tripleo-container-manage/files/91-netns-placeholder-preset @@ -0,0 +1 @@ +enable netns-placeholder.service diff --git a/tripleo_ansible/roles/tripleo-container-manage/files/91-tripleo-container-shutdown-preset b/tripleo_ansible/roles/tripleo-container-manage/files/91-tripleo-container-shutdown-preset new file mode 100644 index 000000000..675a9c7cb --- /dev/null +++ b/tripleo_ansible/roles/tripleo-container-manage/files/91-tripleo-container-shutdown-preset @@ -0,0 +1 @@ +enable tripleo-container-shutdown.service diff --git a/tripleo_ansible/roles/tripleo-container-manage/files/netns-placeholder-service b/tripleo_ansible/roles/tripleo-container-manage/files/netns-placeholder-service new file mode 100644 index 000000000..57326edaa --- /dev/null +++ b/tripleo_ansible/roles/tripleo-container-manage/files/netns-placeholder-service @@ -0,0 +1,11 @@ +[Unit] +Description=Create netns directory +Before=tripleo-container-shutdown.service +Wants=network.target +[Service] +Type=oneshot +ExecStart=/sbin/ip netns add placeholder +ExecStop=/sbin/ip netns delete placeholder +KillMode=process +[Install] +WantedBy=multi-user.target diff --git a/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-container-shutdown b/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-container-shutdown new file mode 100644 index 000000000..cba9ecaec --- /dev/null +++ b/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-container-shutdown @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +TIMEOUT=${1:-90} +PARALLEL=${2:-10} + +if command -v dnf >/dev/null;then + if command -v podman >/dev/null; then + containers=$(podman ps --filter label=managed_by=tripleo_ansible --format {{.Names}}) + for c in $containers; do + logger -p warning "WARNING ($c) Container $c managed by tripleo-ansible is not stopped yet" + logger -p warning "WARNING ($c) Check systemd logs: journalctl -u tripleo_$c" + done + fi +else + if command -v docker >/dev/null; then + /usr/bin/docker ps --format \"{{.Names}}\" --filter "label=managed_by=tripleo_ansible" | \ + /usr/bin/xargs -n 1 -P $PARALLEL /usr/bin/docker stop --time=$TIMEOUT + fi +fi diff --git a/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-container-shutdown-service b/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-container-shutdown-service new file mode 100644 index 000000000..e30c9b1a1 --- /dev/null +++ b/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-container-shutdown-service @@ -0,0 +1,21 @@ +[Unit] +Description=TripleO Container Shutdown +Documentation=https://docs.openstack.org/tripleo-docs/ +# Note: docker.service will be removed once CentOS8 / RHEL8 will be the default +# platform, but for now we keep it for Pacemaker testing. +# pacemaker.service is needed here, to make sure that all non-Pacemaker managed +# containers are stopped before Pacemaker. +After=pacemaker.service docker.service network-online.target iptables.service ip6tables.service +Before=shutdown.target +RefuseManualStop=yes + +[Service] +Type=oneshot +ExecStart=/bin/true +RemainAfterExit=yes +ExecStop=/usr/libexec/tripleo-container-shutdown +# Wait at most 900 seconds for all containers to shutdown +TimeoutStopSec=900 + +[Install] +WantedBy=multi-user.target diff --git a/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-start-podman-container b/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-start-podman-container new file mode 100644 index 000000000..3423b5df6 --- /dev/null +++ b/tripleo_ansible/roles/tripleo-container-manage/files/tripleo-start-podman-container @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +PODMAN=/usr/bin/podman + +NAME=$1 + +if [ -z "$NAME" ]; then + echo "No name provided, cannot start container. Aborting" >&2 + exit 1 +fi + +# Start container. Podman does not fail if container is already started +$PODMAN start $NAME +rc=$? + +if [ $rc -ne 0 ]; then + echo "Error starting podman container $NAME: $rc" >&2 + exit $rc +fi + +# The environment can ben configured to create additional drop-in +# dependencies for the scopes associated with the container. This is +# done to prevent systemd from stopping the scopes early and break the +# configured dependencies in tripleo_*.services +# Stop here otherwise. +if [ ! -f "/etc/sysconfig/podman_drop_in" ]; then + exit 0 +fi + +# Retrieve the container's ID +# Note: currently the only API to retrieve the CID is either +# 1) via "podman inspect" but we don't want to use it because it can be +# very slow under IO load. +# 2) by running "podman start $NAME" but that command only returns the CID +# if the container is already running. Otherwise it returns the container +# name, which would break us. +# The only other means is via "podman ps". ps option "--filter" cannot +# enforce full name matches, so use grep instead and stop at first match. +CID=$($PODMAN ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$NAME" | cut -d' ' -f1) + +if [ -z "$CID" ]; then + echo "Container ID not found for \"$NAME\". Not creating drop-in dependency" 2>&1 + exit 1 +else + echo "Creating additional drop-in dependency for \"$NAME\" ($CID)" +fi + +# Note: a tripleo-ansible container has three systemd files associated with it: +# 1. tripleo_*.service - the regular systemd service generated by tripleo-ansible +# 2. libpod-conmon*.scope - created dynamically by podman. runs a conmon +# process that creates a pidfile for tripleo_*.service and monitor it. +# 3. libpod-*.scope - created dynamically by runc. for cgroups accounting +# +# tripleo-ansible can only set start/stop dependencies on 1., not 2. and 3. +# On reboot, systemd is allowed to stop 2. or 3. at any time, which can +# cause 1. to stop before its deps as set up by tripleo-ansible. +# +# To prevent an unexpected stop of 1. from happening, inject a dependency +# in 2. and 3. so that systemd is forbidden to stop those scopes +# automatically until tripleo-container-shutdown.service is stopped. +# That way, when systemd stops 1., the two scopes 2. and 3. will +# finish in sequence and tripleo-ansible dependencies will be respected. + +for scope in "libpod-$CID.scope.d" "libpod-conmon-$CID.scope.d"; do + if [ $rc -eq 0 ] && [ ! -d /run/systemd/transient/"$scope" ]; then + mkdir -p /run/systemd/transient/"$scope" && \ + echo -e "[Unit]\nBefore=tripleo-container-shutdown.service" > /run/systemd/transient/"$scope"/dep.conf && \ + chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf + rc=$? + fi +done + +if [ $rc -ne 0 ]; then + echo "Could not create drop-in dependency for \"$NAME\" ($CID)" >&2 + exit 1 +fi + +systemctl daemon-reload +rc=$? +if [ $rc -ne 0 ]; then + echo "Could not refresh service definition after creating drop-in for \"$NAME\": $rc" >&2 + exit 1 +fi diff --git a/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml b/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml index 54ce00057..8728e6f69 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml +++ b/tripleo_ansible/roles/tripleo-container-manage/molecule/default/playbook.yml @@ -22,6 +22,7 @@ tripleo_container_manage_config: '/tmp/container-configs' tripleo_container_manage_debug: true tripleo_container_manage_config_patterns: '*.json' + tripleo_container_manage_systemd_order: true tasks: - include_role: name: tripleo-container-manage diff --git a/tripleo_ansible/roles/tripleo-container-manage/tasks/main.yml b/tripleo_ansible/roles/tripleo-container-manage/tasks/main.yml index cc00b4cb0..acc335af7 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/tasks/main.yml +++ b/tripleo_ansible/roles/tripleo-container-manage/tasks/main.yml @@ -67,7 +67,17 @@ set_fact: all_containers_hash: "{{ container_hashes.results | map(attribute='ansible_facts.container_hash') | list | singledict() }}" +- name: "Manage systemd shutdown files" + become: true + when: + - tripleo_container_manage_systemd_order + block: + - name: Include tasks for systemd shutdown service + include_tasks: shutdown.yml + - name: "Manage containers from {{ tripleo_container_manage_config }}" + when: + - tripleo_container_manage_cli == 'podman' become: true block: - name: "Delete containers from {{ tripleo_container_manage_config }}" diff --git a/tripleo_ansible/roles/tripleo-container-manage/tasks/shutdown.yml b/tripleo_ansible/roles/tripleo-container-manage/tasks/shutdown.yml new file mode 100644 index 000000000..ed3ebbc89 --- /dev/null +++ b/tripleo_ansible/roles/tripleo-container-manage/tasks/shutdown.yml @@ -0,0 +1,103 @@ +--- +# Copyright 2019 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +- name: Cleanup Paunch services and files + block: + - name: Check if /usr/lib/systemd/system/paunch-container-shutdown.service exists + stat: + path: /usr/lib/systemd/system/paunch-container-shutdown.service + register: paunch_shutdown + - name: Tear-down paunch-container-shutdown + when: + - paunch_shutdown.stat.exists + block: + - name: Allow paunch-container-shutdown to be stopped + lineinfile: + path: /usr/lib/systemd/system/paunch-container-shutdown.service + regexp: '^RefuseManualStop' + line: 'RefuseManualStop=no' + - name: Force systemd to reread configs + systemd: + daemon_reload: true + - name: Disable and stop paunch-container-shutdown service + systemd: + name: paunch-container-shutdown + state: stopped + enabled: false + # TODO(emilien): this task can be removed later when paunch isn't a + # dependency of python-tripleoclient. It'll be replaced by an rpm removal. + - name: "Remove paunch files for systemd" + file: + path: "{{ item }}" + state: absent + loop: + - /usr/libexec/paunch-container-shutdown + - /usr/libexec/paunch-start-podman-container + - /usr/lib/systemd/system/paunch-container-shutdown.service + - /usr/lib/systemd/system-preset/91-paunch-container-shutdown.preset + +- name: Create TripleO Container systemd service + block: + - name: "Create /usr/libexec/{{ item }}" + copy: + src: "{{ role_path }}/files/{{ item }}" + dest: "/usr/libexec/{{ item }}" + mode: '0700' + owner: root + group: root + loop: + - 'tripleo-container-shutdown' + - 'tripleo-start-podman-container' + - name: "Create /usr/lib/systemd/system/tripleo-container-shutdown.service" + copy: + src: "{{ role_path }}/files/tripleo-container-shutdown-service" + dest: "/usr/lib/systemd/system/tripleo-container-shutdown.service" + mode: '0700' + owner: root + group: root + - name: "Create /usr/lib/systemd/system-preset/91-tripleo-container-shutdown.preset" + copy: + src: "{{ role_path }}/files/91-tripleo-container-shutdown-preset" + dest: "/usr/lib/systemd/system-preset/91-tripleo-container-shutdown.preset" + mode: '0700' + owner: root + group: root + - name: Enable and start tripleo-container-shutdown + systemd: + name: tripleo-container-shutdown + state: started + enabled: true + daemon_reload: true + - name: "Create /usr/lib/systemd/system/netns-placeholder.service" + copy: + src: "{{ role_path }}/files/netns-placeholder-service" + dest: "/usr/lib/systemd/system/netns-placeholder.service" + mode: '0700' + owner: root + group: root + - name: "Create /usr/lib/systemd/system-preset/91-netns-placeholder.preset" + copy: + src: "{{ role_path }}/files/91-netns-placeholder-preset" + dest: "/usr/lib/systemd/system-preset/91-netns-placeholder.preset" + mode: '0700' + owner: root + group: root + - name: Enable and start netns-placeholder + systemd: + name: netns-placeholder + state: started + enabled: true + daemon_reload: true diff --git a/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-healthcheck.j2 b/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-healthcheck.j2 index 876d04038..4e40d487d 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-healthcheck.j2 +++ b/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-healthcheck.j2 @@ -1,6 +1,6 @@ [Unit] Description=tripleo_{{ container_name }} healthcheck -After=paunch-container-shutdown.service tripleo_{{ container_name }}.service +After=tripleo-container-shutdown.service tripleo_{{ container_name }}.service Requisite=tripleo_{{ container_name }}.service [Service] Type=oneshot diff --git a/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-service.j2 b/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-service.j2 index e7f110600..aa3c4817b 100644 --- a/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-service.j2 +++ b/tripleo_ansible/roles/tripleo-container-manage/templates/systemd-service.j2 @@ -1,11 +1,11 @@ [Unit] Description={{ container_name }} container -After=paunch-container-shutdown.service +After=tripleo-container-shutdown.service Wants={{ container_data.depends_on | default([]) | join(',') }} [Service] Restart=always {% if container_data.depends_on is defined and (container_data.depends_on | length > 0) and podman_drop_in | default('false') %} -ExecStart=/usr/libexec/paunch-start-podman-container {{ container_name }} +ExecStart=/usr/libexec/tripleo-start-podman-container {{ container_name }} {% else %} ExecStart=/usr/bin/podman start {{ container_name }} {% endif %}