OpenStack Networking (Neutron)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

netns_cleanup.py 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. # Copyright (c) 2012 OpenStack Foundation.
  2. # All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. # not use this file except in compliance with the License. You may obtain
  6. # a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. # License for the specific language governing permissions and limitations
  14. # under the License.
  15. import itertools
  16. import re
  17. import signal
  18. import time
  19. from neutron_lib import constants
  20. from oslo_config import cfg
  21. from oslo_log import log as logging
  22. from oslo_utils import importutils
  23. from neutron.agent.common import ovs_lib
  24. from neutron.agent.l3 import dvr_fip_ns
  25. from neutron.agent.l3 import dvr_snat_ns
  26. from neutron.agent.l3 import namespaces
  27. from neutron.agent.linux import dhcp
  28. from neutron.agent.linux import external_process
  29. from neutron.agent.linux import ip_lib
  30. from neutron.agent.linux import utils
  31. from neutron.common import config
  32. from neutron.conf.agent import cmd
  33. from neutron.conf.agent import common as agent_config
  34. from neutron.conf.agent import dhcp as dhcp_config
  35. LOG = logging.getLogger(__name__)
  36. LB_NS_PREFIX = 'qlbaas-'
  37. NS_PREFIXES = {
  38. 'dhcp': [dhcp.NS_PREFIX],
  39. 'l3': [namespaces.NS_PREFIX, dvr_snat_ns.SNAT_NS_PREFIX,
  40. dvr_fip_ns.FIP_NS_PREFIX],
  41. 'lbaas': [LB_NS_PREFIX],
  42. }
  43. SIGTERM_WAITTIME = 10
  44. NETSTAT_PIDS_REGEX = re.compile(r'.* (?P<pid>\d{2,6})/.*')
  45. class PidsInNamespaceException(Exception):
  46. pass
  47. class FakeDhcpPlugin(object):
  48. """Fake RPC plugin to bypass any RPC calls."""
  49. def __getattribute__(self, name):
  50. def fake_method(*args):
  51. pass
  52. return fake_method
  53. def setup_conf():
  54. """Setup the cfg for the clean up utility.
  55. Use separate setup_conf for the utility because there are many options
  56. from the main config that do not apply during clean-up.
  57. """
  58. conf = cfg.CONF
  59. cmd.register_cmd_opts(cmd.netns_opts, conf)
  60. agent_config.register_interface_driver_opts_helper(conf)
  61. dhcp_config.register_agent_dhcp_opts(conf)
  62. agent_config.register_interface_opts()
  63. return conf
  64. def _get_dhcp_process_monitor(config):
  65. return external_process.ProcessMonitor(config=config,
  66. resource_type='dhcp')
  67. def kill_dhcp(conf, namespace):
  68. """Disable DHCP for a network if DHCP is still active."""
  69. network_id = namespace.replace(dhcp.NS_PREFIX, '')
  70. dhcp_driver = importutils.import_object(
  71. conf.dhcp_driver,
  72. conf=conf,
  73. process_monitor=_get_dhcp_process_monitor(conf),
  74. network=dhcp.NetModel({'id': network_id}),
  75. plugin=FakeDhcpPlugin())
  76. if dhcp_driver.active:
  77. dhcp_driver.disable()
  78. def eligible_for_deletion(conf, namespace, force=False):
  79. """Determine whether a namespace is eligible for deletion.
  80. Eligibility is determined by having only the lo device or if force
  81. is passed as a parameter.
  82. """
  83. if conf.agent_type:
  84. prefixes = NS_PREFIXES.get(conf.agent_type)
  85. else:
  86. prefixes = itertools.chain(*NS_PREFIXES.values())
  87. ns_mangling_pattern = '(%s%s)' % ('|'.join(prefixes),
  88. constants.UUID_PATTERN)
  89. # filter out namespaces without UUID as the name
  90. if not re.match(ns_mangling_pattern, namespace):
  91. return False
  92. ip = ip_lib.IPWrapper(namespace=namespace)
  93. return force or ip.namespace_is_empty()
  94. def unplug_device(conf, device):
  95. orig_log_fail_as_error = device.get_log_fail_as_error()
  96. device.set_log_fail_as_error(False)
  97. try:
  98. device.link.delete()
  99. except RuntimeError:
  100. device.set_log_fail_as_error(orig_log_fail_as_error)
  101. # Maybe the device is OVS port, so try to delete
  102. ovs = ovs_lib.BaseOVS()
  103. bridge_name = ovs.get_bridge_for_iface(device.name)
  104. if bridge_name:
  105. bridge = ovs_lib.OVSBridge(bridge_name)
  106. bridge.delete_port(device.name)
  107. else:
  108. LOG.debug('Unable to find bridge for device: %s', device.name)
  109. finally:
  110. device.set_log_fail_as_error(orig_log_fail_as_error)
  111. def find_listen_pids_namespace(namespace):
  112. """Retrieve a list of pids of listening processes within the given netns.
  113. It executes netstat -nlp and returns a set of unique pairs
  114. """
  115. ip = ip_lib.IPWrapper(namespace=namespace)
  116. pids = set()
  117. cmd = ['netstat', '-nlp']
  118. output = ip.netns.execute(cmd, run_as_root=True)
  119. for line in output.splitlines():
  120. m = NETSTAT_PIDS_REGEX.match(line)
  121. if m:
  122. pids.add(m.group('pid'))
  123. return pids
  124. def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
  125. """Poll listening processes within the given namespace.
  126. If after timeout seconds, there are remaining processes in the namespace,
  127. then a PidsInNamespaceException will be thrown.
  128. """
  129. # NOTE(dalvarez): This function can block forever if
  130. # find_listen_pids_in_namespace never returns which is really unlikely. We
  131. # can't use wait_until_true because we might get interrupted by eventlet
  132. # Timeout during our I/O with rootwrap daemon and that will lead to errors
  133. # in subsequent calls to utils.execute grabbing always the output of the
  134. # previous command
  135. start = end = time.time()
  136. while end - start < timeout:
  137. if not find_listen_pids_namespace(namespace):
  138. return
  139. time.sleep(1)
  140. end = time.time()
  141. raise PidsInNamespaceException
  142. def _kill_listen_processes(namespace, force=False):
  143. """Identify all listening processes within the given namespace.
  144. Then, for each one, find its top parent with same cmdline (in case this
  145. process forked) and issue a SIGTERM to all of them. If force is True,
  146. then a SIGKILL will be issued to all parents and all their children. Also,
  147. this function returns the number of listening processes.
  148. """
  149. pids = find_listen_pids_namespace(namespace)
  150. pids_to_kill = {utils.find_fork_top_parent(pid) for pid in pids}
  151. kill_signal = signal.SIGTERM
  152. if force:
  153. kill_signal = signal.SIGKILL
  154. children = [utils.find_child_pids(pid, True) for pid in pids_to_kill]
  155. pids_to_kill.update(itertools.chain.from_iterable(children))
  156. for pid in pids_to_kill:
  157. # Throw a warning since this particular cleanup may need a specific
  158. # implementation in the right module. Ideally, netns_cleanup wouldn't
  159. # kill any processes as the responsible module should've killed them
  160. # before cleaning up the namespace
  161. LOG.warning("Killing (%(signal)d) [%(pid)s] %(cmdline)s",
  162. {'signal': kill_signal,
  163. 'pid': pid,
  164. 'cmdline': ' '.join(utils.get_cmdline_from_pid(pid))[:80]
  165. })
  166. try:
  167. utils.kill_process(pid, kill_signal, run_as_root=True)
  168. except Exception as ex:
  169. LOG.error('An error occurred while killing '
  170. '[%(pid)s]: %(msg)s', {'pid': pid, 'msg': ex})
  171. return len(pids)
  172. def kill_listen_processes(namespace):
  173. """Kill all processes listening within the given namespace.
  174. First it tries to kill them using SIGTERM, waits until they die gracefully
  175. and then kills remaining processes (if any) with SIGKILL
  176. """
  177. if _kill_listen_processes(namespace, force=False):
  178. try:
  179. wait_until_no_listen_pids_namespace(namespace)
  180. except PidsInNamespaceException:
  181. _kill_listen_processes(namespace, force=True)
  182. # Allow some time for remaining processes to die
  183. wait_until_no_listen_pids_namespace(namespace)
  184. def destroy_namespace(conf, namespace, force=False):
  185. """Destroy a given namespace.
  186. If force is True, then dhcp (if it exists) will be disabled and all
  187. devices will be forcibly removed.
  188. """
  189. try:
  190. ip = ip_lib.IPWrapper(namespace=namespace)
  191. if force:
  192. kill_dhcp(conf, namespace)
  193. # NOTE: The dhcp driver will remove the namespace if is it empty,
  194. # so a second check is required here.
  195. if ip.netns.exists(namespace):
  196. try:
  197. kill_listen_processes(namespace)
  198. except PidsInNamespaceException:
  199. # This is unlikely since, at this point, we have SIGKILLed
  200. # all remaining processes but if there are still some, log
  201. # the error and continue with the cleanup
  202. LOG.error('Not all processes were killed in %s',
  203. namespace)
  204. for device in ip.get_devices():
  205. unplug_device(conf, device)
  206. ip.garbage_collect_namespace()
  207. except Exception:
  208. LOG.exception('Error unable to destroy namespace: %s', namespace)
  209. def cleanup_network_namespaces(conf):
  210. # Identify namespaces that are candidates for deletion.
  211. candidates = [ns for ns in
  212. ip_lib.list_network_namespaces()
  213. if eligible_for_deletion(conf, ns, conf.force)]
  214. if candidates:
  215. time.sleep(2)
  216. for namespace in candidates:
  217. destroy_namespace(conf, namespace, conf.force)
  218. def main():
  219. """Main method for cleaning up network namespaces.
  220. This method will make two passes checking for namespaces to delete. The
  221. process will identify candidates, sleep, and call garbage collect. The
  222. garbage collection will re-verify that the namespace meets the criteria for
  223. deletion (ie it is empty). The period of sleep and the 2nd pass allow
  224. time for the namespace state to settle, so that the check prior deletion
  225. will re-confirm the namespace is empty.
  226. The utility is designed to clean-up after the forced or unexpected
  227. termination of Neutron agents.
  228. The --force flag should only be used as part of the cleanup of a devstack
  229. installation as it will blindly purge namespaces and their devices. This
  230. option also kills any lingering DHCP instances.
  231. """
  232. conf = setup_conf()
  233. conf()
  234. config.setup_logging()
  235. agent_config.setup_privsep()
  236. cleanup_network_namespaces(conf)