Add retry mechanism for some pyroute actions

pyroute 0.6.6 introduced a new exception NetlinkDumpInterrupted which
is raised when NLM_F_DUMP_INTR is set in the flags during dump of
devices.
The suggestion from pyroute developers is to retry in case of this
exception (see [1]).

For reference, this is how it was handled in neutron side: [2]

[1] https://github.com/svinota/pyroute2/issues/874#issuecomment-1063139555
[2] https://review.opendev.org/c/openstack/neutron/+/833015

Change-Id: I6b5604266f29a585bb1e0f8e605ee5df523b5951
This commit is contained in:
Luis Tomas Bolivar 2023-02-09 12:19:50 +01:00
parent 71bd3b396f
commit 0a666bb10c
3 changed files with 94 additions and 10 deletions

View File

@ -14,14 +14,16 @@
import ipaddress
import os
import pyroute2
from pyroute2 import netlink as pyroute_netlink
from pyroute2.netlink.rtnl import ndmsg
from socket import AF_INET6
from oslo_concurrency import processutils
from oslo_log import log as logging
import pyroute2
from pyroute2 import netlink as pyroute_netlink
from pyroute2.netlink import exceptions as netlink_exceptions
from pyroute2.netlink.rtnl import ndmsg
import tenacity
from ovn_bgp_agent import constants
from ovn_bgp_agent.utils import linux_net as l_net
@ -31,6 +33,12 @@ import ovn_bgp_agent.privileged.linux_net
LOG = logging.getLogger(__name__)
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
@ovn_bgp_agent.privileged.default.entrypoint
def set_device_status(device, status, ndb=None):
_ndb = ndb
@ -92,6 +100,12 @@ def ensure_veth(veth_name, veth_peer):
set_device_status(veth_peer, constants.LINK_UP)
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
@ovn_bgp_agent.privileged.default.entrypoint
def set_master_for_device(device, master):
with pyroute2.NDB() as ndb:

View File

@ -13,7 +13,6 @@
# limitations under the License.
import ipaddress
import pyroute2
import random
import re
import sys
@ -22,6 +21,9 @@ from socket import AF_INET
from socket import AF_INET6
from oslo_log import log as logging
import pyroute2
from pyroute2.netlink import exceptions as netlink_exceptions
import tenacity
from ovn_bgp_agent import constants
from ovn_bgp_agent import exceptions as agent_exc
@ -34,12 +36,24 @@ def get_ip_version(ip):
return ipaddress.ip_address(ip.split('/')[0]).version
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_interfaces(filter_out=[]):
with pyroute2.NDB() as ndb:
return [iface.ifname for iface in ndb.interfaces
if iface.ifname not in filter_out]
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_interface_index(nic):
with pyroute2.NDB() as ndb:
return ndb.interfaces[nic]['index']
@ -138,6 +152,16 @@ def ensure_routing_table_for_bridge(ovn_routing_tables, bridge):
LOG.debug("Added routing table for %s with number: %s", bridge,
table_number)
return _ensure_routing_table_routes(ovn_routing_tables, bridge)
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def _ensure_routing_table_routes(ovn_routing_tables, bridge):
# add default route on that table if it does not exist
extra_routes = []
@ -244,6 +268,12 @@ def enable_proxy_arp(device):
ovn_bgp_agent.privileged.linux_net.set_kernel_flag(flag, 1)
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_exposed_ips(nic):
exposed_ips = []
with pyroute2.NDB() as ndb:
@ -253,6 +283,12 @@ def get_exposed_ips(nic):
return exposed_ips
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_nic_ip(nic, prefixlen_filter=None):
exposed_ips = []
with pyroute2.NDB() as ndb:
@ -267,6 +303,12 @@ def get_nic_ip(nic, prefixlen_filter=None):
return exposed_ips
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_exposed_ips_on_network(nic, network):
exposed_ips = []
with pyroute2.NDB() as ndb:
@ -282,6 +324,12 @@ def get_exposed_ips_on_network(nic, network):
return exposed_ips
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_exposed_routes_on_network(table_ids, network):
with pyroute2.NDB() as ndb:
# NOTE: skip bgp routes (proto 186)
@ -296,6 +344,12 @@ def get_exposed_routes_on_network(table_ids, network):
]
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_ovn_ip_rules(routing_table):
# get the rules pointing to ovn bridges
ovn_ip_rules = {}
@ -356,13 +410,29 @@ def delete_bridge_ip_routes(routing_tables, routing_tables_routes,
def delete_routes_from_table(table):
with pyroute2.NDB() as ndb:
# FIXME: problem in pyroute2 removing routes with local (254) scope
table_routes = [r for r in ndb.routes.dump().filter(table=table)
if r.scope != 254 and r.proto != 186]
table_routes = _get_table_routes(table)
delete_ip_routes(table_routes)
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def _get_table_routes(table):
with pyroute2.NDB() as ndb:
# FIXME: problem in pyroute2 removing routes with local (254) scope
return [r for r in ndb.routes.dump().filter(table=table)
if r.scope != 254 and r.proto != 186]
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
netlink_exceptions.NetlinkDumpInterrupted),
wait=tenacity.wait_exponential(multiplier=0.02, max=1),
stop=tenacity.stop_after_delay(8),
reraise=True)
def get_routes_on_tables(table_ids):
with pyroute2.NDB() as ndb:
# NOTE: skip bgp routes (proto 186)

View File

@ -14,6 +14,6 @@ oslo.rootwrap>=5.15.0 # Apache-2.0
oslo.service>=1.40.2 # Apache-2.0
ovs>=2.8.0 # Apache-2.0
ovsdbapp>=1.4.0 # Apache-2.0
pyroute2>=0.6.4;sys_platform!='win32' # Apache-2.0 (+ dual licensed GPL2)
pyroute2>=0.6.6;sys_platform!='win32' # Apache-2.0 (+ dual licensed GPL2)
stevedore>=1.20.0 # Apache-2.0
tenacity>=6.0.0 # Apache-2.0