tripleo_container_manage: optimize systemd services/healthcheck bootstrap
Separate the creation of systemd files & service restarts so we don't call systemd too many times and makes the deployment faster. It also uses a new filter that will read register data to figure out what systemd files changed so what containers need a restart. Change-Id: I16596a5b262642a678a8b8b123384fc387f69c70
This commit is contained in:
parent
17e4139476
commit
761e5cbdd5
|
@ -43,7 +43,8 @@ class FilterModule(object):
|
|||
'get_role_assignments': self.get_role_assignments,
|
||||
'get_domain_id': self.get_domain_id,
|
||||
'get_changed_containers': self.get_changed_containers,
|
||||
'get_failed_containers': self.get_failed_containers
|
||||
'get_failed_containers': self.get_failed_containers,
|
||||
'get_changed_async_task_names': self.get_changed_async_task_names
|
||||
}
|
||||
|
||||
def subsort(self, dict_to_sort, attribute, null_value=0):
|
||||
|
@ -408,3 +409,19 @@ class FilterModule(object):
|
|||
# not contain failed or finished keys.
|
||||
continue
|
||||
return failed
|
||||
|
||||
def get_changed_async_task_names(self, data, extra=[]):
|
||||
"""Return a list of ansible resources that changed."
|
||||
|
||||
This filter will take a list of dictionaries (data)
|
||||
and will return a list of resources that changed.
|
||||
An extra list can be given to automatically include the item if
|
||||
part of the list already.
|
||||
"""
|
||||
return_list = []
|
||||
for i in data['results']:
|
||||
loop_var = i.get('ansible_loop_var', 'item')
|
||||
for k, v in i[loop_var].items():
|
||||
if ('changed' in i and i['changed']) or k in extra:
|
||||
return_list.append(k)
|
||||
return return_list
|
||||
|
|
|
@ -19,15 +19,3 @@
|
|||
|
||||
- name: Manage containers asynchronously
|
||||
include_tasks: podman/create.yml
|
||||
|
||||
# We don't want to use async for the systemd tasks or we can have startup
|
||||
# errors when systemd has to deal with multiple services trying to start
|
||||
# at the same time. It is more reliable to start them in serial.
|
||||
- name: Manage container systemd services and healthchecks in serial
|
||||
include_tasks: podman/systemd.yml
|
||||
# systemd doesn't have the equivalent of docker unless-stopped.
|
||||
# Let's force 'always' so containers aren't restarted when stopped by
|
||||
# systemd, but restarted when in failure.
|
||||
loop: "{{ batched_container_data | haskey(attribute='restart', value=['always','unless-stopped'], any=True) }}"
|
||||
loop_control:
|
||||
loop_var: container_config
|
||||
|
|
|
@ -26,3 +26,6 @@
|
|||
loop: "{{ data | batch(tripleo_container_manage_concurrency) | list }}"
|
||||
loop_control:
|
||||
loop_var: batched_container_data
|
||||
|
||||
- name: Manage container systemd services and healthchecks for start_order {{ order }}"
|
||||
include_tasks: podman/systemd.yml
|
||||
|
|
|
@ -16,68 +16,103 @@
|
|||
|
||||
- name: Set container_name and container_sysd facts
|
||||
set_fact:
|
||||
container_sysd_name: "{{ lookup('dict', container_config).key }}"
|
||||
container_sysd_data: "{{ lookup('dict', container_config).value }}"
|
||||
container_config: "{{ data | list | haskey(attribute='restart', value=['always','unless-stopped'], any=True) }}"
|
||||
container_config_healthcheck: "{{ data | list | haskey(attribute='healthcheck') }}"
|
||||
|
||||
- name: "Start systemd service for {{ container_sysd_name }}"
|
||||
- name: "Manage systemd files"
|
||||
no_log: "{{ not tripleo_container_manage_debug }}"
|
||||
block:
|
||||
- name: "Remove trailing .requires for {{ container_sysd_name }}"
|
||||
- name: "Remove trailing .requires"
|
||||
file:
|
||||
path: "/etc/systemd/system/tripleo_{{ container_sysd_name }}.requires"
|
||||
path: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_requires).key }}.requires"
|
||||
state: absent
|
||||
- name: "Create systemd unit file for {{ container_sysd_name }} service"
|
||||
loop: "{{ container_config }}"
|
||||
loop_control:
|
||||
loop_var: container_data_requires
|
||||
- name: "Create systemd services files"
|
||||
template:
|
||||
src: systemd-service.j2
|
||||
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}.service"
|
||||
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_unit).key }}.service"
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
register: systemd_file
|
||||
- name: "Enable and start systemd service for {{ container_sysd_name }}"
|
||||
systemd:
|
||||
# Restart the service if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_sysd_name }}.service"
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
when:
|
||||
- systemd_file is changed or container_sysd_name in containers_changed
|
||||
- name: "Manage systemd healthcheck for {{ container_sysd_name }}"
|
||||
loop: "{{ container_config }}"
|
||||
loop_control:
|
||||
loop_var: container_data_unit
|
||||
- name: "Create systemd healthcheck files"
|
||||
when:
|
||||
- not tripleo_container_manage_healthcheck_disabled
|
||||
- container_sysd_data.healthcheck is defined
|
||||
- (container_config_healthcheck | length) > 0
|
||||
block:
|
||||
- name: "Create systemd unit file for {{ container_sysd_name }} healthcheck"
|
||||
- name: "Create systemd unit files healthchecks"
|
||||
template:
|
||||
src: systemd-healthcheck.j2
|
||||
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}_healthcheck.service"
|
||||
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_healthcheck).key }}_healthcheck.service"
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
register: systemd_healthcheck
|
||||
- name: "Create systemd timer for {{ container_sysd_name }} healthcheck"
|
||||
loop: "{{ container_config_healthcheck }}"
|
||||
loop_control:
|
||||
loop_var: container_data_healthcheck
|
||||
- name: "Create systemd timers for healthchecks"
|
||||
template:
|
||||
src: systemd-timer.j2
|
||||
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}_healthcheck.timer"
|
||||
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_timer).key }}_healthcheck.timer"
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
register: systemd_timer
|
||||
- name: "Enable and start systemd timer for {{ container_sysd_name }}"
|
||||
systemd:
|
||||
# Restart the timer if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_sysd_name }}_healthcheck.timer"
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
when:
|
||||
- systemd_healthcheck.changed or systemd_timer.changed
|
||||
- name: "Add systemd require for {{ container_sysd_name }} healthcheck"
|
||||
command: "systemctl add-requires tripleo_{{ container_sysd_name }}.service tripleo_{{ container_sysd_name }}_healthcheck.timer"
|
||||
when:
|
||||
- systemd_healthcheck.changed or systemd_timer.changed
|
||||
- name: "Force systemd daemon reload"
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
when:
|
||||
- systemd_healthcheck.changed or systemd_timer.changed
|
||||
loop: "{{ container_config_healthcheck }}"
|
||||
loop_control:
|
||||
loop_var: container_data_timer
|
||||
|
||||
- name: Create fact for container_systemd_changes
|
||||
set_fact:
|
||||
container_systemd_changes: >-
|
||||
{{ ((systemd_file|get_changed_async_task_names) + (systemd_healthcheck|get_changed_async_task_names) +
|
||||
(systemd_timer|get_changed_async_task_names)|default([])|unique) }}
|
||||
|
||||
- name: "Force systemd daemon reload if a systemd file changed"
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
when:
|
||||
- (container_systemd_changes | length) > 0
|
||||
|
||||
- name: Create fact for container_systemd_restart_list
|
||||
set_fact:
|
||||
container_systemd_services_restart_list: "{{ (systemd_file|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
|
||||
container_systemd_timers_restart_list: "{{ (systemd_timer|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
|
||||
|
||||
- name: "Start or restart systemd services"
|
||||
systemd:
|
||||
# Restart the service if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_sysd_name }}.service"
|
||||
enabled: true
|
||||
daemon_reload: false
|
||||
loop: "{{ container_systemd_services_restart_list }}"
|
||||
loop_control:
|
||||
loop_var: container_sysd_name
|
||||
|
||||
- name: "Enable and start systemd timers"
|
||||
systemd:
|
||||
# Restart the timer if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_timer_name }}_healthcheck.timer"
|
||||
enabled: true
|
||||
daemon_reload: false
|
||||
loop: "{{ container_systemd_timers_restart_list }}"
|
||||
loop_control:
|
||||
loop_var: container_timer_name
|
||||
when:
|
||||
- not tripleo_container_manage_healthcheck_disabled
|
||||
|
||||
- name: "Add systemd requires for healthchecks"
|
||||
command: "systemctl add-requires tripleo_{{ container_requires_timer_name }}.service tripleo_{{ container_requires_timer_name }}_healthcheck.timer"
|
||||
loop: "{{ container_systemd_timers_restart_list }}"
|
||||
loop_control:
|
||||
loop_var: container_requires_timer_name
|
||||
when:
|
||||
- not tripleo_container_manage_healthcheck_disabled
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
[Unit]
|
||||
Description=tripleo_{{ container_sysd_name }} healthcheck
|
||||
After=tripleo-container-shutdown.service tripleo_{{ container_sysd_name }}.service
|
||||
Requisite=tripleo_{{ container_sysd_name }}.service
|
||||
Description=tripleo_{{ lookup('dict', container_data_healthcheck).key }} healthcheck
|
||||
After=tripleo-container-shutdown.service tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
|
||||
Requisite=tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/bin/podman exec --user root {{ container_sysd_name }} {{ container_sysd_data.healthcheck.test }}
|
||||
SyslogIdentifier=healthcheck_{{ container_sysd_name }}
|
||||
ExecStart=/usr/bin/podman exec --user root {{ lookup('dict', container_data_healthcheck).key }} {{ lookup('dict', container_data_healthcheck).value.healthcheck.test }}
|
||||
SyslogIdentifier=healthcheck_{{ lookup('dict', container_data_healthcheck).key }}
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
[Unit]
|
||||
Description={{ container_sysd_name }} container
|
||||
Description={{ lookup('dict', container_data_unit).key }} container
|
||||
After=tripleo-container-shutdown.service
|
||||
Wants={{ container_sysd_data.depends_on | default([]) | join(',') }}
|
||||
Wants={{ lookup('dict', container_data_unit).value.depends_on | default([]) | join(',') }}
|
||||
[Service]
|
||||
Restart=always
|
||||
{% if container_sysd_data.depends_on is defined and (container_sysd_data.depends_on | length > 0) and podman_drop_in | default('false') %}
|
||||
ExecStart=/usr/libexec/tripleo-start-podman-container {{ container_sysd_name }}
|
||||
{% if lookup('dict', container_data_unit).value.depends_on is defined and (lookup('dict', container_data_unit).value.depends_on | length > 0) and podman_drop_in | default('false') %}
|
||||
ExecStart=/usr/libexec/tripleo-start-podman-container {{ lookup('dict', container_data_unit).key }}
|
||||
{% else %}
|
||||
ExecStart=/usr/bin/podman start {{ container_sysd_name }}
|
||||
ExecStart=/usr/bin/podman start {{ lookup('dict', container_data_unit).key }}
|
||||
{% endif %}
|
||||
ExecReload=/usr/bin/podman kill --signal HUP {{ container_sysd_name }}
|
||||
ExecStop=/usr/bin/podman stop -t {{ container_sysd_data.stop_grace_period | default(10) | int }} {{ container_sysd_name }}
|
||||
ExecReload=/usr/bin/podman kill --signal HUP {{ lookup('dict', container_data_unit).key }}
|
||||
ExecStop=/usr/bin/podman stop -t {{ lookup('dict', container_data_unit).value.stop_grace_period | default(10) | int }} {{ lookup('dict', container_data_unit).key }}
|
||||
KillMode=none
|
||||
Type=forking
|
||||
PIDFile=/var/run/{{ container_sysd_name }}.pid
|
||||
{% if container_sysd_data.systemd_exec_flags is defined %}
|
||||
{% for s_flag, s_value in container_sysd_data.systemd_exec_flags.items() %}
|
||||
PIDFile=/var/run/{{ lookup('dict', container_data_unit).key }}.pid
|
||||
{% if lookup('dict', container_data_unit).value.systemd_exec_flags is defined %}
|
||||
{% for s_flag, s_value in lookup('dict', container_data_unit).value.systemd_exec_flags.items() %}
|
||||
{{ s_flag }}={{ s_value }}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
[Unit]
|
||||
Description=tripleo_{{ container_sysd_name }} container healthcheck
|
||||
PartOf=tripleo_{{ container_sysd_name }}.service
|
||||
Description=tripleo_{{ lookup('dict', container_data_timer).key }} container healthcheck
|
||||
PartOf=tripleo_{{ lookup('dict', container_data_timer).key }}.service
|
||||
[Timer]
|
||||
OnActiveSec=120
|
||||
OnUnitActiveSec={{ container_sysd_data.check_interval | default(60) }}
|
||||
RandomizedDelaySec={{ 45 if container_sysd_data.check_interval is not defined else (container_sysd_data.check_interval * 3 / 4) | int | abs }}
|
||||
OnUnitActiveSec={{ lookup('dict', container_data_timer).value.check_interval | default(60) }}
|
||||
RandomizedDelaySec={{ 45 if lookup('dict', container_data_timer).value.check_interval is not defined else (lookup('dict', container_data_timer).value.check_interval * 3 / 4) | int | abs }}
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
|
|
|
@ -884,3 +884,51 @@ class TestHelperFilters(tests_base.TestCase):
|
|||
expected_list = ['memcached', 'mysql']
|
||||
result = self.filters.get_failed_containers(data)
|
||||
self.assertEqual(result, expected_list)
|
||||
|
||||
def test_get_changed_async_task_names(self):
|
||||
results = [
|
||||
{
|
||||
"ansible_loop_var": "systemd_loop",
|
||||
"changed": False,
|
||||
"failed": False,
|
||||
"systemd_loop": {
|
||||
'keystone': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"ansible_loop_var": "systemd_loop",
|
||||
"changed": False,
|
||||
"failed": False,
|
||||
"systemd_loop": {
|
||||
'mysql': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"ansible_loop_var": "systemd_loop",
|
||||
"changed": True,
|
||||
"failed": False,
|
||||
"systemd_loop": {
|
||||
'haproxy': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"changed": True,
|
||||
"failed": False,
|
||||
"item": {
|
||||
'memcached': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
data = {}
|
||||
data['results'] = results
|
||||
expected_list = ['mysql', 'haproxy', 'memcached']
|
||||
result = self.filters.get_changed_async_task_names(data=data, extra=['mysql'])
|
||||
self.assertEqual(result, expected_list)
|
||||
|
|
Loading…
Reference in New Issue