From a640ae8cc9b5b6f290a1a67ab3dd09470a785887 Mon Sep 17 00:00:00 2001 From: Andrew Bonney Date: Thu, 21 Jan 2021 10:19:05 +0000 Subject: [PATCH] Prevent neutron-l3-agent killing keepalived on restart Systemd processes use a default KillMode of 'control-group' which causes all other processes spawned during execution to be killed on service stop. Neutron expects the keepalived processes it starts to remain running in order to prevent data-plane interruptions for HA routers. This change switches the systemd KillMode to process in order to prevent this issue. In doing so we also have to clean up non-keepalived processes started by neutron so that upon restart everything is running from the latest virtualenv which may have changed during an upgrade. Change-Id: I958fda17e6207553466d8a7512e35c30b122c22c Closes-Bug: #1846198 Depends-On: https://review.opendev.org/771770 --- handlers/main.yml | 21 +++++++++++++++++++++ vars/main.yml | 11 ++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/handlers/main.yml b/handlers/main.yml index a37e5b90..f840faa4 100644 --- a/handlers/main.yml +++ b/handlers/main.yml @@ -49,6 +49,27 @@ - "Restart neutron services" - "venv changed" +# NOTE +# When restarting neutron-l3-agent, a non-default systemd KillMode of 'process' is used +# to prevent Keepalived from exiting and causing a data-plane outage. As a result of this +# some neutron processes remain running. In the case of an upgrade, these remaining +# processes will be running code from the previous version. This step ensures these +# orphaned processes are cleaned up correctly. +- name: Run neutron-l3-agent process cleanup + shell: | + for ns_pid in $(cat /sys/fs/cgroup/pids/neutron.slice/neutron-l3-agent.service/cgroup.procs); do + echo $(readlink -f "/proc/$ns_pid/exe") | grep -qv "keepalived" + if [ $? -eq 0 ]; then + if kill -9 "$ns_pid"; then + logger -s "old neutron-l3-agent pid found and has been cleaned up on: \"$ns_pid\"" + fi + fi + done + when: "'neutron-l3-agent' in (filtered_neutron_services | map(attribute='service_key') | list)" + listen: + - "Restart neutron services" + - "venv changed" + - name: Perform a DB contract command: "{{ neutron_bin }}/neutron-db-manage upgrade --contract" become: yes diff --git a/vars/main.yml b/vars/main.yml index 01342737..62676786 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -415,7 +415,7 @@ neutron_services: execstarts: "{{ neutron_bin }}/neutron-l3-agent --config-file {{ neutron_conf_dir }}/neutron.conf --config-file {{ neutron_conf_dir }}/l3_agent.ini" config_overrides: "{{ neutron_l3_agent_ini_overrides }}" config_type: "ini" - init_config_overrides: "{{ neutron_l3_agent_init_overrides }}" + init_config_overrides: "{{ _neutron_l3_agent_init_defaults | combine(neutron_l3_agent_init_overrides) }}" start_order: 3 neutron-bgp-dragent: group: neutron_bgp_dragent @@ -508,6 +508,15 @@ neutron_services: init_config_overrides: "{{ neutron_ovn_metadata_agent_init_overrides }}" start_order: 3 +### +### Overrides for services where required +### + +# Ensure that keepalived processes are not stopped on exit +_neutron_l3_agent_init_defaults: + Service: + KillMode: process + ### ### Internals: Drivers mappings ###