From 7f56c0c559e9e0db84aa4064ba04c50edef255c4 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Thu, 25 Jun 2020 18:02:17 -0400 Subject: [PATCH] Introduce an Action Plugin to manage systemd services for containers Instead of running a bunch of tasks to manage systemd resources, move it into an action plugin which should make the execution faster and easier to debug as well. Example of task: - name: Manage container systemd services container_systemd: container_config: - keystone: image: quay.io/tripleo/keystone restart: always - mysql: image: quay.io/tripleo/mysql stop_grace_period: 25 restart: always The output is "restarted" for the list of services that were actually restarted in systemd. Note on testing: since that module is consummed by tripleo_container_manage role, there is no need to create dedicated molecule tests, we already cover containers with restart policy in that role's molecule tests. So we'll re-use it. Co-Authored-By: Alex Schultz Co-Authored-By: Kevin Carter Change-Id: I614766bd9b111bda9ddfea0a60b032e1dee09abc (cherry picked from commit af7f083066574dcefe637b3aae5026de4573d954) --- .../action/container_systemd.py | 363 ++++++++++++++++++ .../molecule/default/converge.yml | 5 + .../tripleo_container_manage/tasks/create.yml | 9 +- .../tasks/podman/cleanup_healthcheck.yml | 48 --- .../tasks/podman/stat_healthcheck.yml | 25 -- .../tasks/podman/systemd.yml | 65 ---- zuul.d/molecule.yaml | 1 + 7 files changed, 376 insertions(+), 140 deletions(-) create mode 100644 tripleo_ansible/ansible_plugins/action/container_systemd.py delete mode 100644 tripleo_ansible/roles/tripleo_container_manage/tasks/podman/cleanup_healthcheck.yml delete mode 100644 tripleo_ansible/roles/tripleo_container_manage/tasks/podman/stat_healthcheck.yml delete mode 100644 tripleo_ansible/roles/tripleo_container_manage/tasks/podman/systemd.yml diff --git a/tripleo_ansible/ansible_plugins/action/container_systemd.py b/tripleo_ansible/ansible_plugins/action/container_systemd.py new file mode 100644 index 000000000..e35581c48 --- /dev/null +++ b/tripleo_ansible/ansible_plugins/action/container_systemd.py @@ -0,0 +1,363 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2020 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +import copy +import os + +import tenacity +import yaml + +from ansible.errors import AnsibleActionFail +from ansible.plugins.action import ActionBase +from ansible.utils.display import Display + + +DISPLAY = Display() + +DOCUMENTATION = """ +module: container_systemd +author: + - "TripleO team" +version_added: '2.9' +short_description: Create systemd files and manage services to run containers +notes: [] +description: + - Manage the systemd unit files for containers with a restart policy and + then make sure the services are started so the containers are running. + It takes the container config data in entry to figure out how the unit + files will be configured. It returns a list of services that were + restarted. +requirements: + - None +options: + container_config: + description: + - List of container configurations + type: list + elements: dict + systemd_healthchecks: + default: true + description: + - Whether or not we cleanup the old healthchecks with SystemD. + type: boolean + restart_containers: + description: + - List of container names to be restarted + default: [] + type: list + debug: + default: false + description: + - Whether or not debug is enabled. + type: boolean +""" +EXAMPLES = """ +- name: Manage container systemd services + container_systemd: + container_config: + - keystone: + image: quay.io/tripleo/keystone + restart: always + - mysql: + image: quay.io/tripleo/mysql + stop_grace_period: 25 + restart: always +""" +RETURN = """ +restarted: + description: List of services that were restarted + returned: always + type: list + sample: + - tripleo_keystone.service + - tripleo_mysql.service +""" + + +class ActionModule(ActionBase): + """Class for the container_systemd action plugin. + """ + + _VALID_ARGS = yaml.safe_load(DOCUMENTATION)['options'] + + def _get_args(self): + missing = [] + args = {} + + for option, vals in self._VALID_ARGS.items(): + if 'default' not in vals: + if self._task.args.get(option, None) is None: + missing.append(option) + continue + args[option] = self._task.args.get(option) + else: + args[option] = self._task.args.get(option, vals['default']) + + if missing: + raise AnsibleActionFail('Missing required parameters: {}'.format( + ', '.join(missing))) + return args + + def _cleanup_requires(self, container_names, task_vars): + """Cleanup systemd requires files. + + :param container_names: List of container names. + :param task_vars: Dictionary of Ansible task variables. + """ + for name in container_names: + path = "/etc/systemd/system/tripleo_{}.requires".format(name) + if self.debug: + DISPLAY.display('Removing {} file'.format(path)) + results = self._execute_module( + module_name='file', + module_args=dict(path=path, state='absent'), + task_vars=task_vars + ) + if results.get('changed', False): + self.changed = True + + def _delete_service(self, name, task_vars): + """Stop and disable a systemd service. + + :param name: String for service name to stop and disable. + :param task_vars: Dictionary of Ansible task variables. + """ + tvars = copy.deepcopy(task_vars) + results = self._execute_module( + module_name='systemd', + module_args=dict(state='stopped', + name='tripleo_{}_healthcheck.timer'.format(name), + enabled=False, + daemon_reload=False), + task_vars=tvars + ) + return results + + def _cleanup_healthchecks(self, container_names, task_vars): + """Cleanup systemd healthcheck files. + + :param container_names: List of container names. + :param task_vars: Dictionary of Ansible task variables. + """ + systemd_reload = False + for cname in container_names: + h_path = os.path.join('/etc/systemd/system', + 'tripleo_{}_healthcheck.timer'.format(cname)) + healthcheck_stat = self._execute_module( + module_name='stat', + module_args=dict(path=h_path), + task_vars=task_vars + ) + if healthcheck_stat.get('stat', {}).get('exists', False): + if self.debug: + DISPLAY.display('Cleaning-up systemd healthcheck for ' + '{}'.format(cname)) + self._delete_service(cname, task_vars) + files_ext = ['service', 'timer'] + for ext in files_ext: + sysd_base = '/etc/systemd/system' + file_path = 'tripleo_{}_healthcheck.{}'.format(cname, ext) + full_path = os.path.join(sysd_base, file_path) + results = self._execute_module( + module_name='file', + module_args=dict(path=full_path, state='absent'), + task_vars=task_vars + ) + if results.get('changed', False): + self.changed = True + systemd_reload = True + if systemd_reload: + self._systemd_reload(task_vars) + + def _get_unit_template(self): + """Return systemd unit template data + + :returns data: Template data. + """ + if self._task._role: + file_path = self._task._role._role_path + else: + file_path = self._loader.get_basedir() + # NOTE: if templates doesn't exist, it'll always return + # file_path/systemd-service.j2 + # This file is required to exist from the + # tripleo_container_manage role, as there is no + # parameter to override it now. + source = self._loader.path_dwim_relative( + file_path, + 'templates', + 'systemd-service.j2' + ) + if not os.path.exists(source): + raise AnsibleActionFail('Template {} was ' + 'not found'.format(source)) + with open(source) as template_file: + data = template_file.read() + return data + + def _create_units(self, container_config, task_vars): + """Create system units and get list of changed services + + :param container_config: List of dictionaries for container configs. + :param task_vars: Dictionary of Ansible task variables. + :returns changed_containers: List of containers which has a new unit. + """ + try: + remote_user = self._get_remote_user() + except Exception: + remote_user = task_vars.get('ansible_user') + if not remote_user: + remote_user = self._play_context.remote_user + tmp = self._make_tmp_path(remote_user) + unit_template = self._get_unit_template() + changed_containers = [] + for container in container_config: + for name, config in container.items(): + dest = '/etc/systemd/system/tripleo_{}.service'.format(name) + task_vars['container_data_unit'] = container + unit = (self._templar.template(unit_template, + preserve_trailing_newlines=True, + escape_backslashes=False, + convert_data=False)) + del task_vars['container_data_unit'] + remote_data = self._transfer_data( + self._connection._shell.join_path(tmp, 'source'), unit) + + results = self._execute_module( + module_name='copy', + module_args=dict(src=remote_data, + dest=dest, + mode='0644', + owner='root', + group='root'), + task_vars=task_vars) + if results.get('changed', False): + changed_containers.append(name) + if self.debug: + DISPLAY.display('Systemd unit files were created or updated for: ' + '{}'.format(changed_containers)) + return changed_containers + + def _systemd_reload(self, task_vars): + """Reload systemd to load new units. + + :param task_vars: Dictionary of Ansible task variables. + """ + if self.debug: + DISPLAY.display('Running systemd daemon reload') + results = self._execute_module( + module_name='systemd', + module_args=dict(daemon_reload=True), + task_vars=task_vars + ) + if results.get('changed', False): + self.changed = True + + @tenacity.retry( + reraise=True, + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_fixed(5) + ) + def _restart_service(self, name, task_vars): + """Restart a systemd service with retries and delay. + + :param name: String for service name to restart. + :param task_vars: Dictionary of Ansible task variables. + """ + tvars = copy.deepcopy(task_vars) + results = self._execute_module( + module_name='systemd', + module_args=dict(state='restarted', + name='tripleo_{}.service'.format(name), + enabled=True, + daemon_reload=False), + task_vars=tvars + ) + if 'Result' in results['status']: + if results['status']['Result'] == 'success': + if results.get('changed', False): + self.changed = True + self.restarted.append('tripleo_{}.service'.format(name)) + return + raise AnsibleActionFail('Service {} has not started yet'.format(name)) + + def _restart_services(self, service_names, task_vars): + """Restart systemd services. + + :param service_names: List of services to restart. + :param task_vars: Dictionary of Ansible task variables. + """ + for name in service_names: + if self.debug: + DISPLAY.display('Restarting systemd service for ' + '{}'.format(name)) + self._restart_service(name, task_vars) + + def run(self, tmp=None, task_vars=None): + self.changed = False + self.restarted = [] + + if task_vars is None: + task_vars = dict() + result = super(ActionModule, self).run(tmp, task_vars) + del tmp + + # parse args + args = self._get_args() + + container_config = args['container_config'] + systemd_healthchecks = args['systemd_healthchecks'] + restart_containers = args['restart_containers'] + self.debug = args['debug'] + + extra_restarts = [] + for c in restart_containers: + s_path = os.path.join('/etc/systemd/system', + 'tripleo_{}.service'.format(c)) + service_stat = self._execute_module( + module_name='stat', + module_args=dict(path=s_path), + task_vars=task_vars + ) + if service_stat.get('stat', {}).get('exists', False): + if self.debug: + DISPLAY.display('Systemd unit file found for {}, the ' + 'container will be restarted'.format(c)) + extra_restarts.append(c) + + container_names = [] + for container in container_config: + for name, config in container.items(): + container_names.append(name) + + self._cleanup_requires(container_names, task_vars) + + if systemd_healthchecks: + self._cleanup_healthchecks(container_names, task_vars) + + changed_services = self._create_units(container_config, task_vars) + if len(changed_services) > 0: + self._systemd_reload(task_vars) + service_names = set(changed_services + extra_restarts) + self._restart_services(service_names, task_vars) + + result['changed'] = self.changed + result['restarted'] = self.restarted + return result diff --git a/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml b/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml index e003be1f9..8e5710f66 100644 --- a/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml +++ b/tripleo_ansible/roles/tripleo_container_manage/molecule/default/converge.yml @@ -100,6 +100,11 @@ - "'healthy' in fedora_infos.containers.0.State.Healthcheck.Status" fail_msg: 'fedora container healthcheck is not healthy' success_msg: 'fedora container healthcheck is healthy' + - name: Verify that Fedora systemd healthcheck container was removed correctly + command: systemctl is-active --quiet tripleo_fedora_healthcheck.timer + register: tripleo_fedora_healthcheck_active_result + failed_when: + - tripleo_fedora_healthcheck_active_result.rc == 0 - name: Verify that Fedora bis container was created correctly block: - name: Check for fedora_bis container diff --git a/tripleo_ansible/roles/tripleo_container_manage/tasks/create.yml b/tripleo_ansible/roles/tripleo_container_manage/tasks/create.yml index 5889d4bee..ae5af5918 100644 --- a/tripleo_ansible/roles/tripleo_container_manage/tasks/create.yml +++ b/tripleo_ansible/roles/tripleo_container_manage/tasks/create.yml @@ -20,8 +20,13 @@ include: podman/start_order.yml order="{{ item.key }}" data="{{ item.value }}" loop: "{{ all_containers_hash | subsort(attribute='start_order', null_value=0) | dict2items | list }}" -- name: "Manage container systemd services and healthchecks for {{ tripleo_container_manage_config }}" - include_tasks: podman/systemd.yml +- name: "Manage container systemd services and cleanup old systemd healthchecks for {{ tripleo_container_manage_config }}" + become: true + container_systemd: + container_config: "{{ container_config }}" + debug: "{{ tripleo_container_manage_debug | bool }}" + restart_containers: "{{ containers_changed | default([]) }}" + systemd_healthchecks: "{{ (not tripleo_container_manage_healthcheck_disabled | bool) }}" vars: container_config: "{{ all_containers_hash | dict_to_list | haskey(attribute='restart', value=['always','unless-stopped'], any=True) | default([]) }}" when: diff --git a/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/cleanup_healthcheck.yml b/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/cleanup_healthcheck.yml deleted file mode 100644 index ad32d5533..000000000 --- a/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/cleanup_healthcheck.yml +++ /dev/null @@ -1,48 +0,0 @@ ---- -# Copyright 2020 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -- name: "Stop and disable systemd timer for {{ container_systemd_healthcheck_name }}" - systemd: - state: stopped - name: "tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.timer" - enabled: false - daemon_reload: false - -- name: "Remove systemd healthcheck files for {{ container_systemd_healthcheck_name }}" - file: - path: "{{ container_systemd_healthcheck_file }}" - state: absent - loop: - - "/etc/systemd/system/tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.service" - - "/etc/systemd/system/tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.timer" - loop_control: - loop_var: container_systemd_healthcheck_file - -- name: Force systemd to re-read config after healthcheck removals - systemd: - daemon_reload: true - -- name: "Check if {{ container_systemd_healthcheck_name }} healthcheck is not running" - command: "systemctl is-active --quiet tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.timer" - register: tripleo_healthcheck_result - failed_when: - - tripleo_healthcheck_result.rc == 0 - -- name: "Check if {{ container_systemd_healthcheck_name }} service is running and healthy" - command: "systemctl is-active --quiet tripleo_{{ container_systemd_healthcheck_name }}.service" - register: tripleo_service_result - failed_when: - - tripleo_service_result.rc != 0 diff --git a/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/stat_healthcheck.yml b/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/stat_healthcheck.yml deleted file mode 100644 index 85e9d2e84..000000000 --- a/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/stat_healthcheck.yml +++ /dev/null @@ -1,25 +0,0 @@ ---- -# Copyright 2020 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -- name: "Check if systemd healthcheck exists for {{ container_systemd_healthcheck_name }}" - stat: - path: "/etc/systemd/system/tripleo_{{ container_systemd_healthcheck_name }}_healthcheck.service" - register: container_systemd_healthcheck_stat - -- name: "Cleanup systemd healthcheck for {{ container_systemd_healthcheck_name }}" - when: - - (container_systemd_healthcheck_stat.stat.exists|bool) - include_tasks: podman/cleanup_healthcheck.yml diff --git a/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/systemd.yml b/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/systemd.yml deleted file mode 100644 index 2b597e8bd..000000000 --- a/tripleo_ansible/roles/tripleo_container_manage/tasks/podman/systemd.yml +++ /dev/null @@ -1,65 +0,0 @@ ---- -# Copyright 2019 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -- name: "Remove trailing .requires" - no_log: "{{ not tripleo_container_manage_debug }}" - file: - path: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_requires).key }}.requires" - state: absent - loop: "{{ container_config }}" - loop_control: - loop_var: container_data_requires - -- name: "Cleanup systemd healthchecks" - no_log: "{{ not tripleo_container_manage_debug }}" - when: - - not tripleo_container_manage_healthcheck_disabled - include: podman/stat_healthcheck.yml container_systemd_healthcheck_name="{{ lookup('dict', item).key }}" - loop: "{{ container_config }}" - -- name: "Create systemd services files" - no_log: "{{ not tripleo_container_manage_debug }}" - template: - src: systemd-service.j2 - dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_unit).key }}.service" - mode: '0644' - owner: root - group: root - register: systemd_file - loop: "{{ container_config }}" - loop_control: - loop_var: container_data_unit - -- name: "Force systemd daemon reload if a systemd file changed" - systemd: - daemon_reload: true - when: - - (systemd_file|get_changed_async_task_names|length) > 0 - -- name: "Start or restart systemd services" - systemd: - # Restart the service if it was already running - state: restarted - name: "tripleo_{{ container_sysd_name }}.service" - enabled: true - daemon_reload: false - loop: "{{ (systemd_file|get_changed_async_task_names(extra=containers_changed|default([]))) }}" - loop_control: - loop_var: container_sysd_name - register: systemd_service_enable - until: (systemd_service_enable.status is defined) and (systemd_service_enable.status.Result == "success") - retries: 5 - delay: 5 diff --git a/zuul.d/molecule.yaml b/zuul.d/molecule.yaml index 29074862c..dd9753e43 100644 --- a/zuul.d/molecule.yaml +++ b/zuul.d/molecule.yaml @@ -246,6 +246,7 @@ - ^tripleo_ansible/roles/tripleo_container_manage/.* - ^tripleo_ansible/roles/tripleo_container_rm/.* - ^tripleo_ansible/ansible_plugins/action/container_status.py$ + - ^tripleo_ansible/ansible_plugins/action/container_systemd.py$ - ^tripleo_ansible/ansible_plugins/filter/helpers.py$ - ^tripleo_ansible/ansible_plugins/modules/container_config_data.py$ - ^tripleo_ansible/ansible_plugins/modules/container_puppet_config.py$