tripleo-container-manage: port paunch-services

paunch-services used to be useful for container start/stop ordering,
when on the host some containers are managed by Pacemaker and some
others by Paunch.

We need to keep that feature so we are now porting these scripts and
services into tripleo-ansible.

These files where managed by Paunch:
%{_libexecdir}/paunch-container-shutdown
%{_libexecdir}/paunch-start-podman-container
%{_unitdir}/paunch-container-shutdown.service
%{_presetdir}/91-paunch-container-shutdown.preset
%{_unitdir}/netns-placeholder.service
%{_presetdir}/91-netns-placeholder.preset

Now we handle them via Ansible now, and cleanup the Paunch version.
It creates the exact same files from:
https://github.com/rdo-packages/paunch-distgit

This feature is disabled by default and will be explicitely enabled by
THT later. Molecule tests enable it though for testing coverage.

Story: 2006732
Task: 37382
Change-Id: I4f79429baab50bc0199fb65fe84641908d83935d
This commit is contained in:
Emilien Macchi 2019-11-07 19:58:02 +01:00
parent 3279244ff9
commit 6e76f444df
12 changed files with 254 additions and 3 deletions

View File

@ -26,3 +26,4 @@ tripleo_container_manage_config_patterns: 'hashed-*.json'
tripleo_container_manage_debug: false
tripleo_container_manage_healthcheck_disabled: false
tripleo_container_manage_log_path: '/var/log/containers/stdouts'
tripleo_container_manage_systemd_order: false

View File

@ -0,0 +1 @@
enable netns-placeholder.service

View File

@ -0,0 +1 @@
enable tripleo-container-shutdown.service

View File

@ -0,0 +1,11 @@
[Unit]
Description=Create netns directory
Before=tripleo-container-shutdown.service
Wants=network.target
[Service]
Type=oneshot
ExecStart=/sbin/ip netns add placeholder
ExecStop=/sbin/ip netns delete placeholder
KillMode=process
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
TIMEOUT=${1:-90}
PARALLEL=${2:-10}
if command -v dnf >/dev/null;then
if command -v podman >/dev/null; then
containers=$(podman ps --filter label=managed_by=tripleo_ansible --format {{.Names}})
for c in $containers; do
logger -p warning "WARNING ($c) Container $c managed by tripleo-ansible is not stopped yet"
logger -p warning "WARNING ($c) Check systemd logs: journalctl -u tripleo_$c"
done
fi
else
if command -v docker >/dev/null; then
/usr/bin/docker ps --format \"{{.Names}}\" --filter "label=managed_by=tripleo_ansible" | \
/usr/bin/xargs -n 1 -P $PARALLEL /usr/bin/docker stop --time=$TIMEOUT
fi
fi

View File

@ -0,0 +1,21 @@
[Unit]
Description=TripleO Container Shutdown
Documentation=https://docs.openstack.org/tripleo-docs/
# Note: docker.service will be removed once CentOS8 / RHEL8 will be the default
# platform, but for now we keep it for Pacemaker testing.
# pacemaker.service is needed here, to make sure that all non-Pacemaker managed
# containers are stopped before Pacemaker.
After=pacemaker.service docker.service network-online.target iptables.service ip6tables.service
Before=shutdown.target
RefuseManualStop=yes
[Service]
Type=oneshot
ExecStart=/bin/true
RemainAfterExit=yes
ExecStop=/usr/libexec/tripleo-container-shutdown
# Wait at most 900 seconds for all containers to shutdown
TimeoutStopSec=900
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,83 @@
#!/usr/bin/env bash
PODMAN=/usr/bin/podman
NAME=$1
if [ -z "$NAME" ]; then
echo "No name provided, cannot start container. Aborting" >&2
exit 1
fi
# Start container. Podman does not fail if container is already started
$PODMAN start $NAME
rc=$?
if [ $rc -ne 0 ]; then
echo "Error starting podman container $NAME: $rc" >&2
exit $rc
fi
# The environment can ben configured to create additional drop-in
# dependencies for the scopes associated with the container. This is
# done to prevent systemd from stopping the scopes early and break the
# configured dependencies in tripleo_*.services
# Stop here otherwise.
if [ ! -f "/etc/sysconfig/podman_drop_in" ]; then
exit 0
fi
# Retrieve the container's ID
# Note: currently the only API to retrieve the CID is either
# 1) via "podman inspect" but we don't want to use it because it can be
# very slow under IO load.
# 2) by running "podman start $NAME" but that command only returns the CID
# if the container is already running. Otherwise it returns the container
# name, which would break us.
# The only other means is via "podman ps". ps option "--filter" cannot
# enforce full name matches, so use grep instead and stop at first match.
CID=$($PODMAN ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$NAME" | cut -d' ' -f1)
if [ -z "$CID" ]; then
echo "Container ID not found for \"$NAME\". Not creating drop-in dependency" 2>&1
exit 1
else
echo "Creating additional drop-in dependency for \"$NAME\" ($CID)"
fi
# Note: a tripleo-ansible container has three systemd files associated with it:
# 1. tripleo_*.service - the regular systemd service generated by tripleo-ansible
# 2. libpod-conmon*.scope - created dynamically by podman. runs a conmon
# process that creates a pidfile for tripleo_*.service and monitor it.
# 3. libpod-*.scope - created dynamically by runc. for cgroups accounting
#
# tripleo-ansible can only set start/stop dependencies on 1., not 2. and 3.
# On reboot, systemd is allowed to stop 2. or 3. at any time, which can
# cause 1. to stop before its deps as set up by tripleo-ansible.
#
# To prevent an unexpected stop of 1. from happening, inject a dependency
# in 2. and 3. so that systemd is forbidden to stop those scopes
# automatically until tripleo-container-shutdown.service is stopped.
# That way, when systemd stops 1., the two scopes 2. and 3. will
# finish in sequence and tripleo-ansible dependencies will be respected.
for scope in "libpod-$CID.scope.d" "libpod-conmon-$CID.scope.d"; do
if [ $rc -eq 0 ] && [ ! -d /run/systemd/transient/"$scope" ]; then
mkdir -p /run/systemd/transient/"$scope" && \
echo -e "[Unit]\nBefore=tripleo-container-shutdown.service" > /run/systemd/transient/"$scope"/dep.conf && \
chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf
rc=$?
fi
done
if [ $rc -ne 0 ]; then
echo "Could not create drop-in dependency for \"$NAME\" ($CID)" >&2
exit 1
fi
systemctl daemon-reload
rc=$?
if [ $rc -ne 0 ]; then
echo "Could not refresh service definition after creating drop-in for \"$NAME\": $rc" >&2
exit 1
fi

View File

@ -22,6 +22,7 @@
tripleo_container_manage_config: '/tmp/container-configs'
tripleo_container_manage_debug: true
tripleo_container_manage_config_patterns: '*.json'
tripleo_container_manage_systemd_order: true
tasks:
- include_role:
name: tripleo-container-manage

View File

@ -67,7 +67,17 @@
set_fact:
all_containers_hash: "{{ container_hashes.results | map(attribute='ansible_facts.container_hash') | list | singledict() }}"
- name: "Manage systemd shutdown files"
become: true
when:
- tripleo_container_manage_systemd_order
block:
- name: Include tasks for systemd shutdown service
include_tasks: shutdown.yml
- name: "Manage containers from {{ tripleo_container_manage_config }}"
when:
- tripleo_container_manage_cli == 'podman'
become: true
block:
- name: "Delete containers from {{ tripleo_container_manage_config }}"

View File

@ -0,0 +1,103 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: Cleanup Paunch services and files
block:
- name: Check if /usr/lib/systemd/system/paunch-container-shutdown.service exists
stat:
path: /usr/lib/systemd/system/paunch-container-shutdown.service
register: paunch_shutdown
- name: Tear-down paunch-container-shutdown
when:
- paunch_shutdown.stat.exists
block:
- name: Allow paunch-container-shutdown to be stopped
lineinfile:
path: /usr/lib/systemd/system/paunch-container-shutdown.service
regexp: '^RefuseManualStop'
line: 'RefuseManualStop=no'
- name: Force systemd to reread configs
systemd:
daemon_reload: true
- name: Disable and stop paunch-container-shutdown service
systemd:
name: paunch-container-shutdown
state: stopped
enabled: false
# TODO(emilien): this task can be removed later when paunch isn't a
# dependency of python-tripleoclient. It'll be replaced by an rpm removal.
- name: "Remove paunch files for systemd"
file:
path: "{{ item }}"
state: absent
loop:
- /usr/libexec/paunch-container-shutdown
- /usr/libexec/paunch-start-podman-container
- /usr/lib/systemd/system/paunch-container-shutdown.service
- /usr/lib/systemd/system-preset/91-paunch-container-shutdown.preset
- name: Create TripleO Container systemd service
block:
- name: "Create /usr/libexec/{{ item }}"
copy:
src: "{{ role_path }}/files/{{ item }}"
dest: "/usr/libexec/{{ item }}"
mode: '0700'
owner: root
group: root
loop:
- 'tripleo-container-shutdown'
- 'tripleo-start-podman-container'
- name: "Create /usr/lib/systemd/system/tripleo-container-shutdown.service"
copy:
src: "{{ role_path }}/files/tripleo-container-shutdown-service"
dest: "/usr/lib/systemd/system/tripleo-container-shutdown.service"
mode: '0700'
owner: root
group: root
- name: "Create /usr/lib/systemd/system-preset/91-tripleo-container-shutdown.preset"
copy:
src: "{{ role_path }}/files/91-tripleo-container-shutdown-preset"
dest: "/usr/lib/systemd/system-preset/91-tripleo-container-shutdown.preset"
mode: '0700'
owner: root
group: root
- name: Enable and start tripleo-container-shutdown
systemd:
name: tripleo-container-shutdown
state: started
enabled: true
daemon_reload: true
- name: "Create /usr/lib/systemd/system/netns-placeholder.service"
copy:
src: "{{ role_path }}/files/netns-placeholder-service"
dest: "/usr/lib/systemd/system/netns-placeholder.service"
mode: '0700'
owner: root
group: root
- name: "Create /usr/lib/systemd/system-preset/91-netns-placeholder.preset"
copy:
src: "{{ role_path }}/files/91-netns-placeholder-preset"
dest: "/usr/lib/systemd/system-preset/91-netns-placeholder.preset"
mode: '0700'
owner: root
group: root
- name: Enable and start netns-placeholder
systemd:
name: netns-placeholder
state: started
enabled: true
daemon_reload: true

View File

@ -1,6 +1,6 @@
[Unit]
Description=tripleo_{{ container_name }} healthcheck
After=paunch-container-shutdown.service tripleo_{{ container_name }}.service
After=tripleo-container-shutdown.service tripleo_{{ container_name }}.service
Requisite=tripleo_{{ container_name }}.service
[Service]
Type=oneshot

View File

@ -1,11 +1,11 @@
[Unit]
Description={{ container_name }} container
After=paunch-container-shutdown.service
After=tripleo-container-shutdown.service
Wants={{ container_data.depends_on | default([]) | join(',') }}
[Service]
Restart=always
{% if container_data.depends_on is defined and (container_data.depends_on | length > 0) and podman_drop_in | default('false') %}
ExecStart=/usr/libexec/paunch-start-podman-container {{ container_name }}
ExecStart=/usr/libexec/tripleo-start-podman-container {{ container_name }}
{% else %}
ExecStart=/usr/bin/podman start {{ container_name }}
{% endif %}