tripleo_container_manage: optimize systemd services/healthcheck bootstrap

Separate the creation of systemd files & service restarts so we don't
call systemd too many times and makes the deployment faster.

It also uses a new filter that will read register data to figure out
what systemd files changed so what containers need a restart.

Change-Id: I16596a5b262642a678a8b8b123384fc387f69c70
This commit is contained in:
Emilien Macchi 2020-04-08 16:10:23 -04:00
parent 17e4139476
commit 761e5cbdd5
8 changed files with 163 additions and 72 deletions

View File

@ -43,7 +43,8 @@ class FilterModule(object):
'get_role_assignments': self.get_role_assignments,
'get_domain_id': self.get_domain_id,
'get_changed_containers': self.get_changed_containers,
'get_failed_containers': self.get_failed_containers
'get_failed_containers': self.get_failed_containers,
'get_changed_async_task_names': self.get_changed_async_task_names
}
def subsort(self, dict_to_sort, attribute, null_value=0):
@ -408,3 +409,19 @@ class FilterModule(object):
# not contain failed or finished keys.
continue
return failed
def get_changed_async_task_names(self, data, extra=[]):
"""Return a list of ansible resources that changed."
This filter will take a list of dictionaries (data)
and will return a list of resources that changed.
An extra list can be given to automatically include the item if
part of the list already.
"""
return_list = []
for i in data['results']:
loop_var = i.get('ansible_loop_var', 'item')
for k, v in i[loop_var].items():
if ('changed' in i and i['changed']) or k in extra:
return_list.append(k)
return return_list

View File

@ -19,15 +19,3 @@
- name: Manage containers asynchronously
include_tasks: podman/create.yml
# We don't want to use async for the systemd tasks or we can have startup
# errors when systemd has to deal with multiple services trying to start
# at the same time. It is more reliable to start them in serial.
- name: Manage container systemd services and healthchecks in serial
include_tasks: podman/systemd.yml
# systemd doesn't have the equivalent of docker unless-stopped.
# Let's force 'always' so containers aren't restarted when stopped by
# systemd, but restarted when in failure.
loop: "{{ batched_container_data | haskey(attribute='restart', value=['always','unless-stopped'], any=True) }}"
loop_control:
loop_var: container_config

View File

@ -26,3 +26,6 @@
loop: "{{ data | batch(tripleo_container_manage_concurrency) | list }}"
loop_control:
loop_var: batched_container_data
- name: Manage container systemd services and healthchecks for start_order {{ order }}"
include_tasks: podman/systemd.yml

View File

@ -16,68 +16,103 @@
- name: Set container_name and container_sysd facts
set_fact:
container_sysd_name: "{{ lookup('dict', container_config).key }}"
container_sysd_data: "{{ lookup('dict', container_config).value }}"
container_config: "{{ data | list | haskey(attribute='restart', value=['always','unless-stopped'], any=True) }}"
container_config_healthcheck: "{{ data | list | haskey(attribute='healthcheck') }}"
- name: "Start systemd service for {{ container_sysd_name }}"
- name: "Manage systemd files"
no_log: "{{ not tripleo_container_manage_debug }}"
block:
- name: "Remove trailing .requires for {{ container_sysd_name }}"
- name: "Remove trailing .requires"
file:
path: "/etc/systemd/system/tripleo_{{ container_sysd_name }}.requires"
path: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_requires).key }}.requires"
state: absent
- name: "Create systemd unit file for {{ container_sysd_name }} service"
loop: "{{ container_config }}"
loop_control:
loop_var: container_data_requires
- name: "Create systemd services files"
template:
src: systemd-service.j2
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}.service"
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_unit).key }}.service"
mode: '0644'
owner: root
group: root
register: systemd_file
- name: "Enable and start systemd service for {{ container_sysd_name }}"
systemd:
# Restart the service if it was already running
state: restarted
name: "tripleo_{{ container_sysd_name }}.service"
enabled: true
daemon_reload: true
when:
- systemd_file is changed or container_sysd_name in containers_changed
- name: "Manage systemd healthcheck for {{ container_sysd_name }}"
loop: "{{ container_config }}"
loop_control:
loop_var: container_data_unit
- name: "Create systemd healthcheck files"
when:
- not tripleo_container_manage_healthcheck_disabled
- container_sysd_data.healthcheck is defined
- (container_config_healthcheck | length) > 0
block:
- name: "Create systemd unit file for {{ container_sysd_name }} healthcheck"
- name: "Create systemd unit files healthchecks"
template:
src: systemd-healthcheck.j2
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}_healthcheck.service"
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_healthcheck).key }}_healthcheck.service"
mode: '0644'
owner: root
group: root
register: systemd_healthcheck
- name: "Create systemd timer for {{ container_sysd_name }} healthcheck"
loop: "{{ container_config_healthcheck }}"
loop_control:
loop_var: container_data_healthcheck
- name: "Create systemd timers for healthchecks"
template:
src: systemd-timer.j2
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}_healthcheck.timer"
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_timer).key }}_healthcheck.timer"
mode: '0644'
owner: root
group: root
register: systemd_timer
- name: "Enable and start systemd timer for {{ container_sysd_name }}"
systemd:
# Restart the timer if it was already running
state: restarted
name: "tripleo_{{ container_sysd_name }}_healthcheck.timer"
enabled: true
daemon_reload: true
when:
- systemd_healthcheck.changed or systemd_timer.changed
- name: "Add systemd require for {{ container_sysd_name }} healthcheck"
command: "systemctl add-requires tripleo_{{ container_sysd_name }}.service tripleo_{{ container_sysd_name }}_healthcheck.timer"
when:
- systemd_healthcheck.changed or systemd_timer.changed
- name: "Force systemd daemon reload"
systemd:
daemon_reload: true
when:
- systemd_healthcheck.changed or systemd_timer.changed
loop: "{{ container_config_healthcheck }}"
loop_control:
loop_var: container_data_timer
- name: Create fact for container_systemd_changes
set_fact:
container_systemd_changes: >-
{{ ((systemd_file|get_changed_async_task_names) + (systemd_healthcheck|get_changed_async_task_names) +
(systemd_timer|get_changed_async_task_names)|default([])|unique) }}
- name: "Force systemd daemon reload if a systemd file changed"
systemd:
daemon_reload: true
when:
- (container_systemd_changes | length) > 0
- name: Create fact for container_systemd_restart_list
set_fact:
container_systemd_services_restart_list: "{{ (systemd_file|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
container_systemd_timers_restart_list: "{{ (systemd_timer|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
- name: "Start or restart systemd services"
systemd:
# Restart the service if it was already running
state: restarted
name: "tripleo_{{ container_sysd_name }}.service"
enabled: true
daemon_reload: false
loop: "{{ container_systemd_services_restart_list }}"
loop_control:
loop_var: container_sysd_name
- name: "Enable and start systemd timers"
systemd:
# Restart the timer if it was already running
state: restarted
name: "tripleo_{{ container_timer_name }}_healthcheck.timer"
enabled: true
daemon_reload: false
loop: "{{ container_systemd_timers_restart_list }}"
loop_control:
loop_var: container_timer_name
when:
- not tripleo_container_manage_healthcheck_disabled
- name: "Add systemd requires for healthchecks"
command: "systemctl add-requires tripleo_{{ container_requires_timer_name }}.service tripleo_{{ container_requires_timer_name }}_healthcheck.timer"
loop: "{{ container_systemd_timers_restart_list }}"
loop_control:
loop_var: container_requires_timer_name
when:
- not tripleo_container_manage_healthcheck_disabled

View File

@ -1,10 +1,10 @@
[Unit]
Description=tripleo_{{ container_sysd_name }} healthcheck
After=tripleo-container-shutdown.service tripleo_{{ container_sysd_name }}.service
Requisite=tripleo_{{ container_sysd_name }}.service
Description=tripleo_{{ lookup('dict', container_data_healthcheck).key }} healthcheck
After=tripleo-container-shutdown.service tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
Requisite=tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
[Service]
Type=oneshot
ExecStart=/usr/bin/podman exec --user root {{ container_sysd_name }} {{ container_sysd_data.healthcheck.test }}
SyslogIdentifier=healthcheck_{{ container_sysd_name }}
ExecStart=/usr/bin/podman exec --user root {{ lookup('dict', container_data_healthcheck).key }} {{ lookup('dict', container_data_healthcheck).value.healthcheck.test }}
SyslogIdentifier=healthcheck_{{ lookup('dict', container_data_healthcheck).key }}
[Install]
WantedBy=multi-user.target

View File

@ -1,21 +1,21 @@
[Unit]
Description={{ container_sysd_name }} container
Description={{ lookup('dict', container_data_unit).key }} container
After=tripleo-container-shutdown.service
Wants={{ container_sysd_data.depends_on | default([]) | join(',') }}
Wants={{ lookup('dict', container_data_unit).value.depends_on | default([]) | join(',') }}
[Service]
Restart=always
{% if container_sysd_data.depends_on is defined and (container_sysd_data.depends_on | length > 0) and podman_drop_in | default('false') %}
ExecStart=/usr/libexec/tripleo-start-podman-container {{ container_sysd_name }}
{% if lookup('dict', container_data_unit).value.depends_on is defined and (lookup('dict', container_data_unit).value.depends_on | length > 0) and podman_drop_in | default('false') %}
ExecStart=/usr/libexec/tripleo-start-podman-container {{ lookup('dict', container_data_unit).key }}
{% else %}
ExecStart=/usr/bin/podman start {{ container_sysd_name }}
ExecStart=/usr/bin/podman start {{ lookup('dict', container_data_unit).key }}
{% endif %}
ExecReload=/usr/bin/podman kill --signal HUP {{ container_sysd_name }}
ExecStop=/usr/bin/podman stop -t {{ container_sysd_data.stop_grace_period | default(10) | int }} {{ container_sysd_name }}
ExecReload=/usr/bin/podman kill --signal HUP {{ lookup('dict', container_data_unit).key }}
ExecStop=/usr/bin/podman stop -t {{ lookup('dict', container_data_unit).value.stop_grace_period | default(10) | int }} {{ lookup('dict', container_data_unit).key }}
KillMode=none
Type=forking
PIDFile=/var/run/{{ container_sysd_name }}.pid
{% if container_sysd_data.systemd_exec_flags is defined %}
{% for s_flag, s_value in container_sysd_data.systemd_exec_flags.items() %}
PIDFile=/var/run/{{ lookup('dict', container_data_unit).key }}.pid
{% if lookup('dict', container_data_unit).value.systemd_exec_flags is defined %}
{% for s_flag, s_value in lookup('dict', container_data_unit).value.systemd_exec_flags.items() %}
{{ s_flag }}={{ s_value }}
{% endfor %}
{% endif %}

View File

@ -1,9 +1,9 @@
[Unit]
Description=tripleo_{{ container_sysd_name }} container healthcheck
PartOf=tripleo_{{ container_sysd_name }}.service
Description=tripleo_{{ lookup('dict', container_data_timer).key }} container healthcheck
PartOf=tripleo_{{ lookup('dict', container_data_timer).key }}.service
[Timer]
OnActiveSec=120
OnUnitActiveSec={{ container_sysd_data.check_interval | default(60) }}
RandomizedDelaySec={{ 45 if container_sysd_data.check_interval is not defined else (container_sysd_data.check_interval * 3 / 4) | int | abs }}
OnUnitActiveSec={{ lookup('dict', container_data_timer).value.check_interval | default(60) }}
RandomizedDelaySec={{ 45 if lookup('dict', container_data_timer).value.check_interval is not defined else (lookup('dict', container_data_timer).value.check_interval * 3 / 4) | int | abs }}
[Install]
WantedBy=timers.target

View File

@ -884,3 +884,51 @@ class TestHelperFilters(tests_base.TestCase):
expected_list = ['memcached', 'mysql']
result = self.filters.get_failed_containers(data)
self.assertEqual(result, expected_list)
def test_get_changed_async_task_names(self):
results = [
{
"ansible_loop_var": "systemd_loop",
"changed": False,
"failed": False,
"systemd_loop": {
'keystone': {
"config": "foo"
}
},
},
{
"ansible_loop_var": "systemd_loop",
"changed": False,
"failed": False,
"systemd_loop": {
'mysql': {
"config": "foo"
}
},
},
{
"ansible_loop_var": "systemd_loop",
"changed": True,
"failed": False,
"systemd_loop": {
'haproxy': {
"config": "foo"
}
},
},
{
"changed": True,
"failed": False,
"item": {
'memcached': {
"config": "foo"
}
},
},
]
data = {}
data['results'] = results
expected_list = ['mysql', 'haproxy', 'memcached']
result = self.filters.get_changed_async_task_names(data=data, extra=['mysql'])
self.assertEqual(result, expected_list)