Introduce an Action Plugin to manage systemd services for containers

Instead of running a bunch of tasks to manage systemd resources, move
it into an action plugin which should make the execution faster and
easier to debug as well.

Example of task:
- name: Manage container systemd services
  container_systemd:
    container_config:
      - keystone:
          image: quay.io/tripleo/keystone
          restart: always
      - mysql:
          image: quay.io/tripleo/mysql
          stop_grace_period: 25
          restart: always

The output is "restarted" for the list of services that were actually
restarted in systemd.

Note on testing: since that module is consummed by
tripleo_container_manage role, there is no need to create dedicated
molecule tests, we already cover containers with restart policy in that
role's molecule tests. So we'll re-use it.

Co-Authored-By: Alex Schultz <aschultz@redhat.com>
Co-Authored-By: Kevin Carter <kecarter@redhat.com>

Change-Id: I614766bd9b111bda9ddfea0a60b032e1dee09abc
This commit is contained in:
Emilien Macchi 2020-06-25 18:02:17 -04:00
parent 19774d0be4
commit af7f083066
7 changed files with 376 additions and 140 deletions

View File

@ -0,0 +1,363 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2020 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
__metaclass__ = type
import copy
import os
import tenacity
import yaml
from ansible.errors import AnsibleActionFail
from ansible.plugins.action import ActionBase
from ansible.utils.display import Display
DISPLAY = Display()
DOCUMENTATION = """
module: container_systemd
author:
- "TripleO team"
version_added: '2.9'
short_description: Create systemd files and manage services to run containers
notes: []
description:
- Manage the systemd unit files for containers with a restart policy and
then make sure the services are started so the containers are running.
It takes the container config data in entry to figure out how the unit
files will be configured. It returns a list of services that were
restarted.
requirements:
- None
options:
container_config:
description:
- List of container configurations
type: list
elements: dict
systemd_healthchecks:
default: true
description:
- Whether or not we cleanup the old healthchecks with SystemD.
type: boolean
restart_containers:
description:
- List of container names to be restarted
default: []
type: list
debug:
default: false
description:
- Whether or not debug is enabled.
type: boolean
"""
EXAMPLES = """
- name: Manage container systemd services
container_systemd:
container_config:
- keystone:
image: quay.io/tripleo/keystone
restart: always
- mysql:
image: quay.io/tripleo/mysql
stop_grace_period: 25
restart: always
"""
RETURN = """
restarted:
description: List of services that were restarted
returned: always
type: list
sample:
- tripleo_keystone.service
- tripleo_mysql.service
"""
class ActionModule(ActionBase):
"""Class for the container_systemd action plugin.
"""
_VALID_ARGS = yaml.safe_load(DOCUMENTATION)['options']
def _get_args(self):
missing = []
args = {}
for option, vals in self._VALID_ARGS.items():
if 'default' not in vals:
if self._task.args.get(option, None) is None:
missing.append(option)
continue
args[option] = self._task.args.get(option)
else:
args[option] = self._task.args.get(option, vals['default'])
if missing:
raise AnsibleActionFail('Missing required parameters: {}'.format(
', '.join(missing)))
return args
def _cleanup_requires(self, container_names, task_vars):
"""Cleanup systemd requires files.
:param container_names: List of container names.
:param task_vars: Dictionary of Ansible task variables.
"""
for name in container_names:
path = "/etc/systemd/system/tripleo_{}.requires".format(name)
if self.debug:
DISPLAY.display('Removing {} file'.format(path))
results = self._execute_module(
module_name='file',
module_args=dict(path=path, state='absent'),
task_vars=task_vars
)
if results.get('changed', False):
self.changed = True
def _delete_service(self, name, task_vars):
"""Stop and disable a systemd service.
:param name: String for service name to stop and disable.
:param task_vars: Dictionary of Ansible task variables.
"""
tvars = copy.deepcopy(task_vars)
results = self._execute_module(
module_name='systemd',
module_args=dict(state='stopped',
name='tripleo_{}_healthcheck.timer'.format(name),
enabled=False,
daemon_reload=False),
task_vars=tvars
)
return results
def _cleanup_healthchecks(self, container_names, task_vars):
"""Cleanup systemd healthcheck files.
:param container_names: List of container names.
:param task_vars: Dictionary of Ansible task variables.
"""
systemd_reload = False
for cname in container_names:
h_path = os.path.join('/etc/systemd/system',
'tripleo_{}_healthcheck.timer'.format(cname))
healthcheck_stat = self._execute_module(
module_name='stat',
module_args=dict(path=h_path),
task_vars=task_vars
)
if healthcheck_stat.get('stat', {}).get('exists', False):
if self.debug:
DISPLAY.display('Cleaning-up systemd healthcheck for '
'{}'.format(cname))
self._delete_service(cname, task_vars)
files_ext = ['service', 'timer']
for ext in files_ext:
sysd_base = '/etc/systemd/system'
file_path = 'tripleo_{}_healthcheck.{}'.format(cname, ext)
full_path = os.path.join(sysd_base, file_path)
results = self._execute_module(
module_name='file',
module_args=dict(path=full_path, state='absent'),
task_vars=task_vars
)
if results.get('changed', False):
self.changed = True
systemd_reload = True
if systemd_reload:
self._systemd_reload(task_vars)
def _get_unit_template(self):
"""Return systemd unit template data
:returns data: Template data.
"""
if self._task._role:
file_path = self._task._role._role_path
else:
file_path = self._loader.get_basedir()
# NOTE: if templates doesn't exist, it'll always return
# file_path/systemd-service.j2
# This file is required to exist from the
# tripleo_container_manage role, as there is no
# parameter to override it now.
source = self._loader.path_dwim_relative(
file_path,
'templates',
'systemd-service.j2'
)
if not os.path.exists(source):
raise AnsibleActionFail('Template {} was '
'not found'.format(source))
with open(source) as template_file:
data = template_file.read()
return data
def _create_units(self, container_config, task_vars):
"""Create system units and get list of changed services
:param container_config: List of dictionaries for container configs.
:param task_vars: Dictionary of Ansible task variables.
:returns changed_containers: List of containers which has a new unit.
"""
try:
remote_user = self._get_remote_user()
except Exception:
remote_user = task_vars.get('ansible_user')
if not remote_user:
remote_user = self._play_context.remote_user
tmp = self._make_tmp_path(remote_user)
unit_template = self._get_unit_template()
changed_containers = []
for container in container_config:
for name, config in container.items():
dest = '/etc/systemd/system/tripleo_{}.service'.format(name)
task_vars['container_data_unit'] = container
unit = (self._templar.template(unit_template,
preserve_trailing_newlines=True,
escape_backslashes=False,
convert_data=False))
del task_vars['container_data_unit']
remote_data = self._transfer_data(
self._connection._shell.join_path(tmp, 'source'), unit)
results = self._execute_module(
module_name='copy',
module_args=dict(src=remote_data,
dest=dest,
mode='0644',
owner='root',
group='root'),
task_vars=task_vars)
if results.get('changed', False):
changed_containers.append(name)
if self.debug:
DISPLAY.display('Systemd unit files were created or updated for: '
'{}'.format(changed_containers))
return changed_containers
def _systemd_reload(self, task_vars):
"""Reload systemd to load new units.
:param task_vars: Dictionary of Ansible task variables.
"""
if self.debug:
DISPLAY.display('Running systemd daemon reload')
results = self._execute_module(
module_name='systemd',
module_args=dict(daemon_reload=True),
task_vars=task_vars
)
if results.get('changed', False):
self.changed = True
@tenacity.retry(
reraise=True,
stop=tenacity.stop_after_attempt(5),
wait=tenacity.wait_fixed(5)
)
def _restart_service(self, name, task_vars):
"""Restart a systemd service with retries and delay.
:param name: String for service name to restart.
:param task_vars: Dictionary of Ansible task variables.
"""
tvars = copy.deepcopy(task_vars)
results = self._execute_module(
module_name='systemd',
module_args=dict(state='restarted',
name='tripleo_{}.service'.format(name),
enabled=True,
daemon_reload=False),
task_vars=tvars
)
if 'Result' in results['status']:
if results['status']['Result'] == 'success':
if results.get('changed', False):
self.changed = True
self.restarted.append('tripleo_{}.service'.format(name))
return
raise AnsibleActionFail('Service {} has not started yet'.format(name))
def _restart_services(self, service_names, task_vars):
"""Restart systemd services.
:param service_names: List of services to restart.
:param task_vars: Dictionary of Ansible task variables.
"""
for name in service_names:
if self.debug:
DISPLAY.display('Restarting systemd service for '
'{}'.format(name))
self._restart_service(name, task_vars)
def run(self, tmp=None, task_vars=None):
self.changed = False
self.restarted = []
if task_vars is None:
task_vars = dict()
result = super(ActionModule, self).run(tmp, task_vars)
del tmp
# parse args
args = self._get_args()
container_config = args['container_config']
systemd_healthchecks = args['systemd_healthchecks']
restart_containers = args['restart_containers']
self.debug = args['debug']
extra_restarts = []
for c in restart_containers:
s_path = os.path.join('/etc/systemd/system',
'tripleo_{}.service'.format(c))
service_stat = self._execute_module(
module_name='stat',
module_args=dict(path=s_path),
task_vars=task_vars
)
if service_stat.get('stat', {}).get('exists', False):
if self.debug:
DISPLAY.display('Systemd unit file found for {}, the '
'container will be restarted'.format(c))
extra_restarts.append(c)
container_names = []
for container in container_config:
for name, config in container.items():
container_names.append(name)
self._cleanup_requires(container_names, task_vars)
if systemd_healthchecks:
self._cleanup_healthchecks(container_names, task_vars)
changed_services = self._create_units(container_config, task_vars)
if len(changed_services) > 0:
self._systemd_reload(task_vars)
service_names = set(changed_services + extra_restarts)
self._restart_services(service_names, task_vars)
result['changed'] = self.changed
result['restarted'] = self.restarted
return result

View File

@ -100,6 +100,11 @@
- "'healthy' in fedora_infos.containers.0.State.Healthcheck.Status"
fail_msg: 'fedora container healthcheck is not healthy'
success_msg: 'fedora container healthcheck is healthy'
- name: Verify that Fedora systemd healthcheck container was removed correctly
command: systemctl is-active --quiet tripleo_fedora_healthcheck.timer
register: tripleo_fedora_healthcheck_active_result
failed_when:
- tripleo_fedora_healthcheck_active_result.rc == 0
- name: Verify that Fedora bis container was created correctly
block:
- name: Check for fedora_bis container

View File

@ -20,8 +20,13 @@
include: podman/start_order.yml order="{{ item.key }}" data="{{ item.value }}"
loop: "{{ all_containers_hash | subsort(attribute='start_order', null_value=0) | dict2items | list }}"
- name: "Manage container systemd services and healthchecks for {{ tripleo_container_manage_config }}"
include_tasks: podman/systemd.yml
- name: "Manage container systemd services and cleanup old systemd healthchecks for {{ tripleo_container_manage_config }}"
become: true
container_systemd:
container_config: "{{ container_config }}"
debug: "{{ tripleo_container_manage_debug | bool }}"
restart_containers: "{{ containers_changed | default([]) }}"
systemd_healthchecks: "{{ (not tripleo_container_manage_healthcheck_disabled | bool) }}"
vars:
container_config: "{{ all_containers_hash | dict_to_list | haskey(attribute='restart', value=['always','unless-stopped'], any=True) | default([]) }}"
when:

View File

@ -1,48 +0,0 @@
---
# Copyright 2020 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: "Stop and disable systemd timer for {{ container_systemd_healthcheck_name }}"
systemd:
state: stopped
name: "tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.timer"
enabled: false
daemon_reload: false
- name: "Remove systemd healthcheck files for {{ container_systemd_healthcheck_name }}"
file:
path: "{{ container_systemd_healthcheck_file }}"
state: absent
loop:
- "/etc/systemd/system/tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.service"
- "/etc/systemd/system/tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.timer"
loop_control:
loop_var: container_systemd_healthcheck_file
- name: Force systemd to re-read config after healthcheck removals
systemd:
daemon_reload: true
- name: "Check if {{ container_systemd_healthcheck_name }} healthcheck is not running"
command: "systemctl is-active --quiet tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.timer"
register: tripleo_healthcheck_result
failed_when:
- tripleo_healthcheck_result.rc == 0
- name: "Check if {{ container_systemd_healthcheck_name }} service is running and healthy"
command: "systemctl is-active --quiet tripleo_{{ container_systemd_healthcheck_name }}.service"
register: tripleo_service_result
failed_when:
- tripleo_service_result.rc != 0

View File

@ -1,25 +0,0 @@
---
# Copyright 2020 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: "Check if systemd healthcheck exists for {{ container_systemd_healthcheck_name }}"
stat:
path: "/etc/systemd/system/tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.service"
register: container_systemd_healthcheck_stat
- name: "Cleanup systemd healthcheck for {{ container_systemd_healthcheck_name }}"
when:
- (container_systemd_healthcheck_stat.stat.exists|bool)
include_tasks: podman/cleanup_healthcheck.yml

View File

@ -1,65 +0,0 @@
---
# Copyright 2019 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: "Remove trailing .requires"
no_log: "{{ not tripleo_container_manage_debug }}"
file:
path: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_requires).key }}.requires"
state: absent
loop: "{{ container_config }}"
loop_control:
loop_var: container_data_requires
- name: "Cleanup systemd healthchecks"
no_log: "{{ not tripleo_container_manage_debug }}"
when:
- not tripleo_container_manage_healthcheck_disabled
include: podman/stat_healthcheck.yml container_systemd_healthcheck_name="{{ lookup('dict', item).key }}"
loop: "{{ container_config }}"
- name: "Create systemd services files"
no_log: "{{ not tripleo_container_manage_debug }}"
template:
src: systemd-service.j2
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_unit).key }}.service"
mode: '0644'
owner: root
group: root
register: systemd_file
loop: "{{ container_config }}"
loop_control:
loop_var: container_data_unit
- name: "Force systemd daemon reload if a systemd file changed"
systemd:
daemon_reload: true
when:
- (systemd_file|get_changed_async_task_names|length) > 0
- name: "Start or restart systemd services"
systemd:
# Restart the service if it was already running
state: restarted
name: "tripleo_{{ container_sysd_name }}.service"
enabled: true
daemon_reload: false
loop: "{{ (systemd_file|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
loop_control:
loop_var: container_sysd_name
register: systemd_service_enable
until: (systemd_service_enable.status is defined) and (systemd_service_enable.status.Result == "success")
retries: 5
delay: 5

View File

@ -246,6 +246,7 @@
- ^tripleo_ansible/roles/tripleo_container_manage/.*
- ^tripleo_ansible/roles/tripleo_container_rm/.*
- ^tripleo_ansible/ansible_plugins/action/container_status.py$
- ^tripleo_ansible/ansible_plugins/action/container_systemd.py$
- ^tripleo_ansible/ansible_plugins/filter/helpers.py$
- ^tripleo_ansible/ansible_plugins/modules/container_config_data.py$
- ^tripleo_ansible/ansible_plugins/modules/container_puppet_config.py$