[train-squash] tripleo_container_manage: backport systemd optimizations
This is a combination of 2 commits to avoid regressions. (see https://bugs.launchpad.net/tripleo/+bug/1873249) This is the 1st commit message: tripleo_container_manage: optimize systemd services/healthcheck bootstrap Separate the creation of systemd files & service restarts so we don't call systemd too many times and makes the deployment faster. It also uses a new filter that will read register data to figure out what systemd files changed so what containers need a restart. Change-Id: I16596a5b262642a678a8b8b123384fc387f69c70 (cherry picked from commit761e5cbdd5
) This is the commit message #2: tripleo_container_manage: add safeguard against wrong healthcheck config If a container config has by mistake a healthcheck but no systemd restart policy, we don't want to manage the healthcheck because it requires its service to be created. To prevent that situation, we'll create the healthchecks only if they are already part of the systemd services list that was created earlier. For that, we're using the intersect() filter which allows to get the intersection of 2 lists (systemd services and healthchecks to create). Adding molecule coverage to test this scenario. Closes-Bug: #1873249 Change-Id: Id5cc784bae597def0648f07d28b6463b387d2212 (cherry picked from commit04f16051cc
)
This commit is contained in:
parent
0019bb66a3
commit
40669aeee3
|
@ -43,7 +43,8 @@ class FilterModule(object):
|
|||
'get_domain_id': self.get_domain_id,
|
||||
'get_changed_containers': self.get_changed_containers,
|
||||
'get_failed_containers': self.get_failed_containers,
|
||||
'recursive_get_key_from_dict': self.recursive_get_key_from_dict
|
||||
'recursive_get_key_from_dict': self.recursive_get_key_from_dict,
|
||||
'get_changed_async_task_names': self.get_changed_async_task_names
|
||||
}
|
||||
|
||||
def subsort(self, dict_to_sort, attribute, null_value=0):
|
||||
|
@ -408,3 +409,19 @@ class FilterModule(object):
|
|||
# not contain failed or finished keys.
|
||||
continue
|
||||
return failed
|
||||
|
||||
def get_changed_async_task_names(self, data, extra=[]):
|
||||
"""Return a list of ansible resources that changed."
|
||||
|
||||
This filter will take a list of dictionaries (data)
|
||||
and will return a list of resources that changed.
|
||||
An extra list can be given to automatically include the item if
|
||||
part of the list already.
|
||||
"""
|
||||
return_list = []
|
||||
for i in data['results']:
|
||||
loop_var = i.get('ansible_loop_var', 'item')
|
||||
for k, v in i[loop_var].items():
|
||||
if ('changed' in i and i['changed']) or k in extra:
|
||||
return_list.append(k)
|
||||
return return_list
|
||||
|
|
|
@ -62,6 +62,21 @@
|
|||
- tripleo_fedora_healthcheck_active_result.rc == 0
|
||||
fail_msg: 'tripleo_fedora systemd healthcheck service is not active'
|
||||
success_msg: 'tripleo_fedora systemd healthcheck service is active'
|
||||
- name: Check if tripleo_fedora_bis has systemd service
|
||||
stat:
|
||||
path: /etc/systemd/system/tripleo_fedora_bis.service
|
||||
register: stat_tripleo_fedora_bis_systemd
|
||||
- name: Check if tripleo_fedora_bis has systemd healthcheck timer
|
||||
stat:
|
||||
path: /etc/systemd/system/tripleo_fedora_bis_healthcheck.timer
|
||||
register: stat_tripleo_fedora_bis_systemd_timer
|
||||
- name: Assert that tripleo_fedora_bis has no systemd integration
|
||||
assert:
|
||||
that:
|
||||
- not stat_tripleo_fedora_bis_systemd.stat.exists
|
||||
- not stat_tripleo_fedora_bis_systemd_timer.stat.exists
|
||||
fail_msg: 'tripleo_fedora_bis has systemd service'
|
||||
success_msg: 'tripleo_fedora_bis has no systemd service'
|
||||
- name: Verify that Fedora bis container was created correctly
|
||||
block:
|
||||
- name: Check for fedora_bis container
|
||||
|
|
|
@ -43,7 +43,8 @@
|
|||
{
|
||||
"image": "fedora:latest",
|
||||
"net": "host",
|
||||
"command": "sleep 3600"
|
||||
"command": "sleep 3600",
|
||||
"healthcheck": { "test": "echo test" }
|
||||
}
|
||||
dest: '/tmp/container-configs/fedora_bis.json'
|
||||
- name: Create a third configuration file for a fedora container
|
||||
|
|
|
@ -19,15 +19,3 @@
|
|||
|
||||
- name: Manage containers asynchronously
|
||||
include_tasks: podman/create.yml
|
||||
|
||||
# We don't want to use async for the systemd tasks or we can have startup
|
||||
# errors when systemd has to deal with multiple services trying to start
|
||||
# at the same time. It is more reliable to start them in serial.
|
||||
- name: Manage container systemd services and healthchecks in serial
|
||||
include_tasks: podman/systemd.yml
|
||||
# systemd doesn't have the equivalent of docker unless-stopped.
|
||||
# Let's force 'always' so containers aren't restarted when stopped by
|
||||
# systemd, but restarted when in failure.
|
||||
loop: "{{ batched_container_data | haskey(attribute='restart', value=['always','unless-stopped'], any=True) }}"
|
||||
loop_control:
|
||||
loop_var: container_config
|
||||
|
|
|
@ -26,3 +26,6 @@
|
|||
loop: "{{ data | batch(tripleo_container_manage_concurrency) | list }}"
|
||||
loop_control:
|
||||
loop_var: batched_container_data
|
||||
|
||||
- name: Manage container systemd services and healthchecks for start_order {{ order }}"
|
||||
include_tasks: podman/systemd.yml
|
||||
|
|
|
@ -14,70 +14,111 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
- name: Set container_name and container_sysd facts
|
||||
- name: Set container_config fact
|
||||
set_fact:
|
||||
container_sysd_name: "{{ lookup('dict', container_config).key }}"
|
||||
container_sysd_data: "{{ lookup('dict', container_config).value }}"
|
||||
container_config: "{{ data | list | haskey(attribute='restart', value=['always','unless-stopped'], any=True) | default([]) }}"
|
||||
|
||||
- name: "Start systemd service for {{ container_sysd_name }}"
|
||||
- name: Set container_config_healthcheck fact
|
||||
set_fact:
|
||||
# Using intersect to prevent a service which isn't controlled by systemd
|
||||
# but has healthcheck in its configuration (by mistake)
|
||||
# See https://bugs.launchpad.net/tripleo/+bug/1873249
|
||||
container_config_healthcheck: "{{ data | list | haskey(attribute='healthcheck') | intersect(container_config) | default([]) }}"
|
||||
|
||||
- name: "Manage systemd files"
|
||||
no_log: "{{ not tripleo_container_manage_debug }}"
|
||||
block:
|
||||
- name: "Remove trailing .requires for {{ container_sysd_name }}"
|
||||
- name: "Remove trailing .requires"
|
||||
file:
|
||||
path: "/etc/systemd/system/tripleo_{{ container_sysd_name }}.requires"
|
||||
path: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_requires).key }}.requires"
|
||||
state: absent
|
||||
- name: "Create systemd unit file for {{ container_sysd_name }} service"
|
||||
loop: "{{ container_config }}"
|
||||
loop_control:
|
||||
loop_var: container_data_requires
|
||||
- name: "Create systemd services files"
|
||||
template:
|
||||
src: systemd-service.j2
|
||||
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}.service"
|
||||
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_unit).key }}.service"
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
register: systemd_file
|
||||
- name: "Enable and start systemd service for {{ container_sysd_name }}"
|
||||
systemd:
|
||||
# Restart the service if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_sysd_name }}.service"
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
when:
|
||||
- systemd_file is changed or container_sysd_name in containers_changed
|
||||
- name: "Manage systemd healthcheck for {{ container_sysd_name }}"
|
||||
loop: "{{ container_config }}"
|
||||
loop_control:
|
||||
loop_var: container_data_unit
|
||||
- name: "Create systemd healthcheck files"
|
||||
when:
|
||||
- not tripleo_container_manage_healthcheck_disabled
|
||||
- container_sysd_data.healthcheck is defined
|
||||
- (container_config_healthcheck | length) > 0
|
||||
block:
|
||||
- name: "Create systemd unit file for {{ container_sysd_name }} healthcheck"
|
||||
- name: "Create systemd unit files healthchecks"
|
||||
template:
|
||||
src: systemd-healthcheck.j2
|
||||
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}_healthcheck.service"
|
||||
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_healthcheck).key }}_healthcheck.service"
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
register: systemd_healthcheck
|
||||
- name: "Create systemd timer for {{ container_sysd_name }} healthcheck"
|
||||
loop: "{{ container_config_healthcheck }}"
|
||||
loop_control:
|
||||
loop_var: container_data_healthcheck
|
||||
- name: "Create systemd timers for healthchecks"
|
||||
template:
|
||||
src: systemd-timer.j2
|
||||
dest: "/etc/systemd/system/tripleo_{{ container_sysd_name }}_healthcheck.timer"
|
||||
dest: "/etc/systemd/system/tripleo_{{ lookup('dict', container_data_timer).key }}_healthcheck.timer"
|
||||
mode: '0644'
|
||||
owner: root
|
||||
group: root
|
||||
register: systemd_timer
|
||||
- name: "Enable and start systemd timer for {{ container_sysd_name }}"
|
||||
systemd:
|
||||
# Restart the timer if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_sysd_name }}_healthcheck.timer"
|
||||
enabled: true
|
||||
daemon_reload: true
|
||||
when:
|
||||
- systemd_healthcheck.changed or systemd_timer.changed
|
||||
- name: "Add systemd require for {{ container_sysd_name }} healthcheck"
|
||||
command: "systemctl add-requires tripleo_{{ container_sysd_name }}.service tripleo_{{ container_sysd_name }}_healthcheck.timer"
|
||||
when:
|
||||
- systemd_healthcheck.changed or systemd_timer.changed
|
||||
- name: "Force systemd daemon reload"
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
when:
|
||||
- systemd_healthcheck.changed or systemd_timer.changed
|
||||
loop: "{{ container_config_healthcheck }}"
|
||||
loop_control:
|
||||
loop_var: container_data_timer
|
||||
|
||||
- name: Create fact for container_systemd_changes
|
||||
set_fact:
|
||||
container_systemd_changes: >-
|
||||
{{ ((systemd_file|get_changed_async_task_names) + (systemd_healthcheck|get_changed_async_task_names) +
|
||||
(systemd_timer|get_changed_async_task_names)|default([])|unique) }}
|
||||
|
||||
- name: "Force systemd daemon reload if a systemd file changed"
|
||||
systemd:
|
||||
daemon_reload: true
|
||||
when:
|
||||
- (container_systemd_changes | length) > 0
|
||||
|
||||
- name: Create fact for container_systemd_restart_list
|
||||
set_fact:
|
||||
container_systemd_services_restart_list: "{{ (systemd_file|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
|
||||
container_systemd_timers_restart_list: "{{ (systemd_timer|get_changed_async_task_names(extra=containers_changed|default([]))) }}"
|
||||
|
||||
- name: "Start or restart systemd services"
|
||||
systemd:
|
||||
# Restart the service if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_sysd_name }}.service"
|
||||
enabled: true
|
||||
daemon_reload: false
|
||||
loop: "{{ container_systemd_services_restart_list }}"
|
||||
loop_control:
|
||||
loop_var: container_sysd_name
|
||||
|
||||
- name: "Enable and start systemd timers"
|
||||
systemd:
|
||||
# Restart the timer if it was already running
|
||||
state: restarted
|
||||
name: "tripleo_{{ container_timer_name }}_healthcheck.timer"
|
||||
enabled: true
|
||||
daemon_reload: false
|
||||
loop: "{{ container_systemd_timers_restart_list }}"
|
||||
loop_control:
|
||||
loop_var: container_timer_name
|
||||
when:
|
||||
- not tripleo_container_manage_healthcheck_disabled
|
||||
|
||||
- name: "Add systemd requires for healthchecks"
|
||||
command: "systemctl add-requires tripleo_{{ container_requires_timer_name }}.service tripleo_{{ container_requires_timer_name }}_healthcheck.timer"
|
||||
loop: "{{ container_systemd_timers_restart_list }}"
|
||||
loop_control:
|
||||
loop_var: container_requires_timer_name
|
||||
when:
|
||||
- not tripleo_container_manage_healthcheck_disabled
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
[Unit]
|
||||
Description=tripleo_{{ container_sysd_name }} healthcheck
|
||||
After=tripleo-container-shutdown.service tripleo_{{ container_sysd_name }}.service
|
||||
Requisite=tripleo_{{ container_sysd_name }}.service
|
||||
Description=tripleo_{{ lookup('dict', container_data_healthcheck).key }} healthcheck
|
||||
After=tripleo-container-shutdown.service tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
|
||||
Requisite=tripleo_{{ lookup('dict', container_data_healthcheck).key }}.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/bin/podman exec --user root {{ container_sysd_name }} {{ container_sysd_data.healthcheck.test }}
|
||||
SyslogIdentifier=healthcheck_{{ container_sysd_name }}
|
||||
ExecStart=/usr/bin/podman exec --user root {{ lookup('dict', container_data_healthcheck).key }} {{ lookup('dict', container_data_healthcheck).value.healthcheck.test }}
|
||||
SyslogIdentifier=healthcheck_{{ lookup('dict', container_data_healthcheck).key }}
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
[Unit]
|
||||
Description={{ container_sysd_name }} container
|
||||
Description={{ lookup('dict', container_data_unit).key }} container
|
||||
After=tripleo-container-shutdown.service
|
||||
Wants={{ container_sysd_data.depends_on | default([]) | join(',') }}
|
||||
Wants={{ lookup('dict', container_data_unit).value.depends_on | default([]) | join(',') }}
|
||||
[Service]
|
||||
Restart=always
|
||||
{% if container_sysd_data.depends_on is defined and (container_sysd_data.depends_on | length > 0) and podman_drop_in | default('false') %}
|
||||
ExecStart=/usr/libexec/tripleo-start-podman-container {{ container_sysd_name }}
|
||||
{% if lookup('dict', container_data_unit).value.depends_on is defined and (lookup('dict', container_data_unit).value.depends_on | length > 0) and podman_drop_in | default('false') %}
|
||||
ExecStart=/usr/libexec/tripleo-start-podman-container {{ lookup('dict', container_data_unit).key }}
|
||||
{% else %}
|
||||
ExecStart=/usr/bin/podman start {{ container_sysd_name }}
|
||||
ExecStart=/usr/bin/podman start {{ lookup('dict', container_data_unit).key }}
|
||||
{% endif %}
|
||||
ExecReload=/usr/bin/podman kill --signal HUP {{ container_sysd_name }}
|
||||
ExecStop=/usr/bin/podman stop -t {{ container_sysd_data.stop_grace_period | default(10) | int }} {{ container_sysd_name }}
|
||||
ExecReload=/usr/bin/podman kill --signal HUP {{ lookup('dict', container_data_unit).key }}
|
||||
ExecStop=/usr/bin/podman stop -t {{ lookup('dict', container_data_unit).value.stop_grace_period | default(10) | int }} {{ lookup('dict', container_data_unit).key }}
|
||||
KillMode=none
|
||||
Type=forking
|
||||
PIDFile=/var/run/{{ container_sysd_name }}.pid
|
||||
{% if container_sysd_data.systemd_exec_flags is defined %}
|
||||
{% for s_flag, s_value in container_sysd_data.systemd_exec_flags.items() %}
|
||||
PIDFile=/var/run/{{ lookup('dict', container_data_unit).key }}.pid
|
||||
{% if lookup('dict', container_data_unit).value.systemd_exec_flags is defined %}
|
||||
{% for s_flag, s_value in lookup('dict', container_data_unit).value.systemd_exec_flags.items() %}
|
||||
{{ s_flag }}={{ s_value }}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
[Unit]
|
||||
Description=tripleo_{{ container_sysd_name }} container healthcheck
|
||||
PartOf=tripleo_{{ container_sysd_name }}.service
|
||||
Description=tripleo_{{ lookup('dict', container_data_timer).key }} container healthcheck
|
||||
PartOf=tripleo_{{ lookup('dict', container_data_timer).key }}.service
|
||||
[Timer]
|
||||
OnActiveSec=120
|
||||
OnUnitActiveSec={{ container_sysd_data.check_interval | default(60) }}
|
||||
RandomizedDelaySec={{ 45 if container_sysd_data.check_interval is not defined else (container_sysd_data.check_interval * 3 / 4) | int | abs }}
|
||||
OnUnitActiveSec={{ lookup('dict', container_data_timer).value.check_interval | default(60) }}
|
||||
RandomizedDelaySec={{ 45 if lookup('dict', container_data_timer).value.check_interval is not defined else (lookup('dict', container_data_timer).value.check_interval * 3 / 4) | int | abs }}
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
|
|
|
@ -884,3 +884,51 @@ class TestHelperFilters(tests_base.TestCase):
|
|||
expected_list = ['memcached', 'mysql']
|
||||
result = self.filters.get_failed_containers(data)
|
||||
self.assertEqual(result, expected_list)
|
||||
|
||||
def test_get_changed_async_task_names(self):
|
||||
results = [
|
||||
{
|
||||
"ansible_loop_var": "systemd_loop",
|
||||
"changed": False,
|
||||
"failed": False,
|
||||
"systemd_loop": {
|
||||
'keystone': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"ansible_loop_var": "systemd_loop",
|
||||
"changed": False,
|
||||
"failed": False,
|
||||
"systemd_loop": {
|
||||
'mysql': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"ansible_loop_var": "systemd_loop",
|
||||
"changed": True,
|
||||
"failed": False,
|
||||
"systemd_loop": {
|
||||
'haproxy': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"changed": True,
|
||||
"failed": False,
|
||||
"item": {
|
||||
'memcached': {
|
||||
"config": "foo"
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
data = {}
|
||||
data['results'] = results
|
||||
expected_list = ['mysql', 'haproxy', 'memcached']
|
||||
result = self.filters.get_changed_async_task_names(data=data, extra=['mysql'])
|
||||
self.assertEqual(result, expected_list)
|
||||
|
|
Loading…
Reference in New Issue