Modified neutron-netns-cleanup-cron.py script.

We needed to terminate qdhcp namespaces which aren't satisfied hosts in neutron db.
It happens after people interrupt neutron agents work and after restart agents, they can start ns on different hosts,
but nothing tracks previous hosts. Previous version of the script checked only IPs inside ns but didn't validate hosts.

Change-Id: I9968f627ce3ab1596711fe9e8d3345d0a5fc42c8
This commit is contained in:
Alexey Terekhin 2022-05-23 15:38:23 -07:00 committed by Alexey
parent 4520ebdf98
commit 32afc483e5
4 changed files with 129 additions and 6 deletions

View File

@ -14,7 +14,7 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Neutron
name: neutron
version: 0.2.18
version: 0.2.19
home: https://docs.openstack.org/neutron/latest/
icon: https://www.openstack.org/themes/openstack/images/project-mascots/Neutron/OpenStack_Project_Neutron_vertical.png
sources:

View File

@ -1,18 +1,135 @@
#!/usr/bin/env python
import sys
import os
import time
import socket
from neutron.common import config
from oslo_config import cfg
from neutron.agent.linux import dhcp
from neutron.agent.l3 import namespaces
from neutron.agent.l3 import dvr_snat_ns
from neutron.agent.l3 import dvr_fip_ns
from neutron.cmd.netns_cleanup import setup_conf
from neutron.cmd.netns_cleanup import destroy_namespace
from neutron.cmd.netns_cleanup import eligible_for_deletion
from neutron.conf.agent import common as agent_config
from neutron.agent.linux import ip_lib
from keystoneauth1.identity import v3
from keystoneauth1 import session
from neutronclient.neutron import client as neutron_client
NS_PREFIXES = {'l3': [namespaces.NS_PREFIX, dvr_snat_ns.SNAT_NS_PREFIX,
dvr_fip_ns.FIP_NS_PREFIX]}
DHCP_NS_PREFIX = dhcp.NS_PREFIX
from neutron.cmd.netns_cleanup import main
def get_neutron_creds():
opts = {'auth_url': os.getenv('OS_AUTH_URL', 'https://keystone-api.openstack.svc.cluster.local:5000/v3'),
'password': os.getenv('OS_PASSWORD','nopassword'),
'project_domain_name': os.getenv('OS_PROJECT_DOMAIN_NAME', 'default'),
'project_name': os.getenv('OS_PROJECT_NAME', 'admin'),
'user_domain_name': os.getenv('OS_USER_DOMAIN_NAME', 'default'),
'username': os.getenv('OS_USERNAME', 'admin'),
'cafile' : os.getenv('OS_CACERT','/var/lib/neutron/openstack-helm/openstack-helm.crt'),
'insecure' : os.getenv('NEUTRON_CLEANUP_INSECURE', 'true'),
'debug': os.getenv('NEUTRON_CLEANUP_DEBUG', 'false'),
'wait': os.getenv('NEUTRON_CLEANUP_TIMEOUT', '1800')}
return opts
def net_list(neutron_get):
hosts = dict()
net_list = neutron_get.list_networks()
if net_list['networks']:
for item in net_list['networks']:
net_id=item['id']
dhcp_agents = neutron_get.list_dhcp_agent_hosting_networks(net_id)['agents']
agents = list()
if dhcp_agents:
for agent in dhcp_agents:
agents.append(agent['host'].split('.')[0])
hosts[net_id] = agents
return hosts
def sort_ns(all_ns, dhcp_prefix):
dhcp_ns = list()
not_dhcp_ns = list()
for ns in all_ns:
if ns[:len(dhcp_prefix)] == dhcp_prefix:
dhcp_ns.append(ns)
else:
not_dhcp_ns.append(ns)
return dhcp_ns, not_dhcp_ns
def del_bad_dhcp(dhcp_ns, dhcp_hosts, conf, dhcp_prefix, debug):
for ns in dhcp_ns:
cut_ns_name = ns[len(dhcp_prefix):]
if cut_ns_name in dhcp_hosts:
if hostname not in dhcp_hosts[cut_ns_name]:
destroy_namespace(conf, ns, conf.force)
if debug:
sys.stdout.write("DEBUG: {} host {} deleted {} because host wrong\n"
.format(sys.argv[0], hostname, ns))
else:
if debug:
sys.stdout.write("DEBUG: {} host {} {} looks ok\n"
.format(sys.argv[0], hostname, ns))
else:
destroy_namespace(conf, ns, conf.force)
if debug:
sys.stdout.write("DEBUG: {} host {} deleted {} because no related network found\n"
.format(sys.argv[0], hostname, ns))
def del_bad_not_dhcp(not_dhcp_ns, conf, debug):
for ns in not_dhcp_ns:
if eligible_for_deletion(conf, ns, conf.force):
destroy_namespace(conf, ns, conf.force)
if debug:
sys.stdout.write("DEBUG: {} host {} deleted {} because no IP addr\n"
.format(sys.argv[0], hostname, ns))
if __name__ == "__main__":
conf = setup_conf()
cfg.CONF(sys.argv[1:])
opts = get_neutron_creds()
debug = False
verify= False
if opts.pop('debug') in ('true', '1', 'True'):
debug = True
insecure = opts.pop('insecure')
cafile = opts.pop('cafile')
if insecure in ('false', '0', 'False'):
verify = cafile
timeout = int(opts.pop('wait'))
conf()
config.setup_logging()
agent_config.setup_privsep()
auth = v3.Password(**opts)
hostname = socket.gethostname().split('.')[0]
while True:
try:
main()
# Sleep for 12 hours
time.sleep(43200)
all_ns = ip_lib.list_network_namespaces()
sess = session.Session(auth=auth, verify=verify)
neutron_get = neutron_client.Client('2.0', session=sess)
dhcp_hosts = net_list(neutron_get)
if all_ns:
dhcp_ns, not_dhcp_ns = sort_ns(all_ns, DHCP_NS_PREFIX)
if dhcp_ns:
del_bad_dhcp(dhcp_ns, dhcp_hosts, conf, DHCP_NS_PREFIX, debug)
else:
if debug:
sys.stdout.write("DEBUG: {} host {} no dhcp ns found\n"
.format(sys.argv[0], hostname))
if not_dhcp_ns:
del_bad_not_dhcp(not_dhcp_ns, conf, debug)
else:
if debug:
sys.stdout.write("DEBUG: {} host {} no not_dhcp ns found\n"
.format(sys.argv[0], hostname))
else:
if debug:
sys.stdout.write("DEBUG: {} host {} no ns found at all\n"
.format(sys.argv[0], hostname))
except Exception as ex:
sys.stderr.write(
"Cleaning network namespaces caught an exception %s"
@ -24,3 +141,4 @@ if __name__ == "__main__":
time.sleep(30)
finally:
cfg.CONF.clear()
time.sleep(timeout)

View File

@ -76,6 +76,10 @@ spec:
- /etc/neutron/dhcp_agent.ini
- --config-file
- /etc/neutron/l3_agent.ini
env:
{{- with $env := dict "ksUserSecret" $envAll.Values.secrets.identity.admin "useCA" false }}
{{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 12 }}
{{- end }}
volumeMounts:
- name: pod-tmp
mountPath: /tmp

View File

@ -32,4 +32,5 @@ neutron:
- 0.2.16 Remove usage of six
- 0.2.17 Migrated PodDisruptionBudget resource to policy/v1 API version
- 0.2.18 Updated naming for subchart compatibility
- 0.2.19 Added qdhcp NS host validation for deleting wrong namespaces.
...