You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

616 lines
23 KiB

# Copyright 2016 OVH SAS
# All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import math
import re
import netaddr
from neutron_lib import exceptions
from neutron_lib.exceptions import qos as qos_exc
from import constants as qos_consts
from oslo_log import log as logging
from pyroute2.iproute import linux as iproute_linux
from pyroute2.netlink import rtnl
from pyroute2.netlink.rtnl.tcmsg import common as rtnl_common
from neutron._i18n import _
from neutron.agent.linux import ip_lib
from neutron.common import utils
from neutron.privileged.agent.linux import tc_lib as priv_tc_lib
LOG = logging.getLogger(__name__)
BW_LIMIT_UNIT = "kbit" # kilobits per second in tc's notation
BURST_UNIT = "kbit" # kilobits in tc's notation
# Those are RATES (bits per second) and SIZE (bytes) unit names from tc manual
"k": 1,
"m": 2,
"g": 3,
"t": 4
filters_pattern = re.compile(r"police \w+ rate (\w+) burst (\w+)")
tbf_pattern = re.compile(
r"qdisc (\w+) \w+: \w+ refcnt \d rate (\w+) burst (\w+) \w*")
TC_QDISC_PARENT = {'root': rtnl.TC_H_ROOT,
'ingress': rtnl.TC_H_INGRESS}
TC_QDISC_PARENT_NAME = {v: k for k, v in TC_QDISC_PARENT.items()}
# NOTE(ralonsoh): VXLAN header: +28 bytes from the outer MAC header (TC
# initial offset)
# - VXLAN flags: 1 byte
# - Reserved: 3 bytes
# - VNI: 3 bytes --> VXLAN_VNI_OFFSET = 32 (+32 from the TC initial offset)
# - Reserved: 1 byte
class InvalidKernelHzValue(exceptions.NeutronException):
message = _("Kernel HZ value %(value)s is not valid. This value must be "
"greater than 0.")
class InvalidUnit(exceptions.NeutronException):
message = _("Unit name '%(unit)s' is not valid.")
def convert_to_kilobits(value, base):
value = value.lower()
if "bit" in value:
input_in_bits = True
value = value.replace("bit", "")
input_in_bits = False
value = value.replace("b", "")
# if it is now bare number then it is in bits, so we return it simply
if value.isdigit():
value = int(value)
if input_in_bits:
return utils.bits_to_kilobits(value, base)
bits_value = utils.bytes_to_bits(value)
return utils.bits_to_kilobits(bits_value, base)
unit = value[-1:]
if unit not in UNITS.keys():
raise InvalidUnit(unit=unit)
val = int(value[:-1])
if input_in_bits:
bits_value = val * (base ** UNITS[unit])
bits_value = utils.bytes_to_bits(val * (base ** UNITS[unit]))
return utils.bits_to_kilobits(bits_value, base)
def _get_attr(pyroute2_obj, attr_name):
"""Get an attribute in a pyroute object
pyroute2 object attributes are stored under a key called 'attrs'. This key
contains a tuple of tuples. E.g.:
pyroute2_obj = {'attrs': (('TCA_KIND': 'htb'),
('TCA_OPTIONS': {...}))}
:param pyroute2_obj: (dict) pyroute2 object
:param attr_name: (string) first value of the tuple we are looking for
:return: (object) second value of the tuple, None if the tuple doesn't
rule_attrs = pyroute2_obj.get('attrs', [])
for attr in (attr for attr in rule_attrs if attr[0] == attr_name):
return attr[1]
def _get_tbf_burst_value(rate, burst_limit, kernel_hz):
min_burst_value = float(rate) / float(kernel_hz)
return max(min_burst_value, burst_limit)
def _calc_burst(rate, buffer):
"""Calculate burst rate
:param rate: (int) rate in bytes per second.
:param buffer: (int) buffer size in bytes.
:return: (int) burst in bytes
# NOTE(ralonsoh): this function is based in
# pyroute2.netlink.rtnl.tcmsg.common.calc_xmittime
return int(math.ceil(
float(buffer * rate) /
(rtnl_common.TIME_UNITS_PER_SEC * rtnl_common.tick_in_usec)))
def _calc_min_rate(burst):
"""Calculate minimum rate (bytes per second) accepted by Pyroute2
When creating a TC policy class, this function calculates the minimum
rate (bytes/sec) accepted by Pyroute2. This method is based on
:param rate: (int) rate in bytes per second.
:param burst: (int) burst in bytes.
:return: (int) minimum accepted rate in bytes per second.
return max(8, math.ceil((rtnl_common.TIME_UNITS_PER_SEC *
rtnl_common.tick_in_usec * burst) / 2**32))
def _calc_latency_ms(limit, burst, rate):
"""Calculate latency value, in ms
:param limit: (int) pyroute2 limit value
:param burst: (int) burst in bytes
:param rate: (int) maximum bandwidth in kbytes per second
:return: (int) latency, in ms
return int(math.ceil(
float((limit - burst) * rtnl_common.TIME_UNITS_PER_SEC) /
(rate * 1000)))
def _handle_from_hex_to_string(handle):
"""Convert TC handle from hex to string
:param handle: (int) TC handle
:return: (string) handle formatted to string: 0xMMMMmmmm -> "M:m"
minor = format(handle & 0xFFFF, 'x')
major = format((handle & 0xFFFF0000) >> 16, 'x')
return ':'.join([major, minor])
def _mac_to_pyroute2_keys(mac, offset):
"""Convert a MAC address to a list of filter keys
For example:
MAC: '01:23:45:67:89:0a', offset: 8
keys: ['0x01234567/0xffffffff+8', '0x890a0000/0xffff0000+12']
:param mac: (string) MAC address
:param offset: (int) natural number, offset bytes number from the IP header
int_mac = int(netaddr.EUI(mac))
high_value = int_mac >> 16
high_mask = 0xffffffff
high_offset = offset
high = {'value': high_value,
'mask': high_mask,
'offset': high_offset,
'key': (hex(high_value) + '/' + hex(high_mask) + '+' +
low_value = (int_mac & 0xffff) << 16
low_mask = 0xffff0000
low_offset = offset + 4
low = {'value': low_value,
'mask': low_mask,
'offset': low_offset,
'key': hex(low_value) + '/' + hex(low_mask) + '+' + str(low_offset)}
return [high, low]
class TcCommand(ip_lib.IPDevice):
def __init__(self, name, kernel_hz, namespace=None):
if kernel_hz <= 0:
raise InvalidKernelHzValue(value=kernel_hz)
super(TcCommand, self).__init__(name, namespace=namespace)
self.kernel_hz = kernel_hz
def get_ingress_qdisc_burst_value(bw_limit, burst_limit):
"""Return burst value used in ingress qdisc.
If burst value is not specified given than it will be set to default
rate to ensure that limit for TCP traffic will work well
if not burst_limit:
return float(bw_limit) * qos_consts.DEFAULT_BURST_RATE
return burst_limit
def get_filters_bw_limits(self, qdisc_id=INGRESS_QDISC_ID):
filters = list_tc_filters(, qdisc_id,
if filters:
return filters[0].get('rate_kbps'), filters[0].get('burst_kb')
return None, None
def get_tbf_bw_limits(self):
qdiscs = list_tc_qdiscs(, namespace=self.namespace)
if not qdiscs:
return None, None
qdisc = qdiscs[0]
if qdisc['qdisc_type'] != 'tbf':
return None, None
return qdisc['max_kbps'], qdisc['burst_kb']
def set_filters_bw_limit(self, bw_limit, burst_limit):
"""Set ingress qdisc and filter for police ingress traffic on device
This will allow to police traffic incoming to interface. It
means that it is fine to limit egress traffic from instance point of
# because replace of tc filters is not working properly and it's adding
# new filters each time instead of replacing existing one first old
# ingress qdisc should be deleted and then added new one so update will
# be called to do that:
return self.update_filters_bw_limit(bw_limit, burst_limit)
def set_tbf_bw_limit(self, bw_limit, burst_limit, latency_value):
"""Set/update token bucket filter qdisc on device
This will allow to limit speed of packets going out from interface. It
means that it is fine to limit ingress traffic from instance point of
return add_tc_qdisc(, 'tbf', parent='root',
max_kbps=bw_limit, burst_kb=burst_limit,
latency_ms=latency_value, kernel_hz=self.kernel_hz,
def update_filters_bw_limit(self, bw_limit, burst_limit):
add_tc_qdisc(, 'ingress', namespace=self.namespace)
return self._add_policy_filter(bw_limit, burst_limit)
def delete_filters_bw_limit(self):
# NOTE(slaweq): For limit traffic egress from instance we need to use
# qdisc "ingress" because it is ingress traffic from interface POV:
delete_tc_qdisc(, is_ingress=True,
raise_qdisc_not_found=False, namespace=self.namespace)
def delete_tbf_bw_limit(self):
delete_tc_qdisc(, parent='root',
raise_qdisc_not_found=False, namespace=self.namespace)
def _add_policy_filter(self, bw_limit, burst_limit,
# NOTE(slaweq): it is made in exactly same way how openvswitch is doing
# it when configuing ingress traffic limit on port. It can be found in
# lib/netdev-linux.c#L4698 in openvswitch sources:
add_tc_filter_policy(, qdisc_id, bw_limit, burst_limit,
MAX_MTU_VALUE, 'drop', priority=49)
def add_tc_qdisc(device, qdisc_type, parent=None, handle=None, latency_ms=None,
max_kbps=None, burst_kb=None, kernel_hz=None,
"""Add/replace a TC qdisc on a device
pyroute2 input parameters:
- rate (min bw): bytes/second
- burst: bytes
- latency: us
:param device: (string) device name
:param qdisc_type: (string) qdisc type (TC_QDISC_TYPES)
:param parent: (string) qdisc parent class ('root', '2:10')
:param handle: (string, int) (required for HTB) major handler identifier
(0xffff0000, '1', '1:', '1:0') [1]
:param latency_ms: (string, int) (required for TBF) latency time in ms
:param max_kbps: (string, int) (required for TBF) maximum bandwidth in
kbits per second.
:param burst_kb: (string, int) (required for TBF) maximum bandwidth in
:param kernel_hz: (string, int) (required for TBF) kernel HZ.
:param namespace: (string) (optional) namespace name
if qdisc_type and qdisc_type not in TC_QDISC_TYPES:
raise qos_exc.TcLibQdiscTypeError(
qdisc_type=qdisc_type, supported_qdisc_types=TC_QDISC_TYPES)
args = {'kind': qdisc_type}
if qdisc_type in ['htb', 'ingress']:
if handle:
args['handle'] = str(handle).split(':')[0] + ':0'
elif qdisc_type == 'tbf':
if not latency_ms or not max_kbps or not kernel_hz:
raise qos_exc.TcLibQdiscNeededArguments(
needed_arguments=['latency_ms', 'max_kbps', 'kernel_hz'])
args['burst'] = int(
_get_tbf_burst_value(max_kbps, burst_kb, kernel_hz) * 1000 / 8)
args['rate'] = int(max_kbps * 1000 / 8)
args['latency'] = latency_ms * 1000
if parent:
args['parent'] = rtnl.TC_H_ROOT if parent == 'root' else parent
priv_tc_lib.add_tc_qdisc(device, namespace=namespace, **args)
def list_tc_qdiscs(device, namespace=None):
"""List all TC qdiscs of a device
:param device: (string) device name
:param namespace: (string) (optional) namespace name
:return: (list) TC qdiscs
qdiscs = priv_tc_lib.list_tc_qdiscs(device, namespace=namespace)
retval = []
for qdisc in qdiscs:
qdisc_attrs = {
'qdisc_type': _get_attr(qdisc, 'TCA_KIND'),
'parent': TC_QDISC_PARENT_NAME.get(
qdisc['parent'], _handle_from_hex_to_string(qdisc['parent'])),
'handle': _handle_from_hex_to_string(qdisc['handle'])}
if qdisc_attrs['qdisc_type'] == 'tbf':
tca_options = _get_attr(qdisc, 'TCA_OPTIONS')
tca_tbf_parms = _get_attr(tca_options, 'TCA_TBF_PARMS')
qdisc_attrs['max_kbps'] = int(tca_tbf_parms['rate'] * 8 / 1000)
burst_bytes = _calc_burst(tca_tbf_parms['rate'],
qdisc_attrs['burst_kb'] = int(burst_bytes * 8 / 1000)
qdisc_attrs['latency_ms'] = _calc_latency_ms(
tca_tbf_parms['limit'], burst_bytes, tca_tbf_parms['rate'])
return retval
def delete_tc_qdisc(device, parent=None, is_ingress=False,
raise_interface_not_found=True, raise_qdisc_not_found=True,
"""Delete a TC qdisc of a device
:param device: (string) device name
:param parent: (string) (optional) qdisc parent class ('root', '2:10')
:param is_ingress: (bool) (optional) if qdisc type is 'ingress'
:param raise_interface_not_found: (bool) (optional) raise exception if the
interface doesn't exist
:param raise_qdisc_not_found: (bool) (optional) raise exception if the
qdisc doesn't exist
:param namespace: (string) (optional) namespace name
qdisc_type = 'ingress' if is_ingress else None
if parent:
parent = rtnl.TC_H_ROOT if parent == 'root' else parent
device, parent=parent, kind=qdisc_type,
raise_qdisc_not_found=raise_qdisc_not_found, namespace=namespace)
def add_tc_policy_class(device, parent, classid, max_kbps, min_kbps=None,
burst_kb=None, namespace=None):
"""Add a TC policy class
:param device: (string) device name
:param parent: (string) qdisc parent class ('root', 'ingress', '2:10')
:param classid: (string) major:minor handler identifier ('10:20')
:param max_kbps: (int) maximum bandwidth in kbps
:param min_kbps: (int) (optional) minimum bandwidth in kbps
:param burst_kb: (int) (optional) burst size in kb
:param namespace: (string) (optional) namespace name
parent = TC_QDISC_PARENT.get(parent, parent)
if not burst_kb:
burst_kb = max_kbps * qos_consts.DEFAULT_BURST_RATE
# NOTE(ralonsoh): pyroute2 input parameters and units [1]:
# - rate (min bw): bytes/second
# - ceil (max bw): bytes/second
# - burst: bytes
# [1]
kwargs = {'ceil': int(max_kbps * 1000 / 8),
'burst': int(burst_kb * 1000 / 8)}
rate = int((min_kbps or 0) * 1000 / 8)
min_rate = _calc_min_rate(kwargs['burst'])
if min_rate > rate:
LOG.warning('TC HTB class policy rate %(rate)s (bytes/second) is '
'lower than the minimum accepted %(min_rate)s '
'(bytes/second), for device %(device)s, qdisc '
'%(qdisc)s and classid %(classid)s',
{'rate': rate, 'min_rate': min_rate, 'device': device,
'qdisc': parent, 'classid': classid})
rate = min_rate
kwargs['rate'] = rate
priv_tc_lib.add_tc_policy_class(device, parent, classid, 'htb',
namespace=namespace, **kwargs)
def list_tc_policy_class(device, namespace=None):
"""List all TC policy classes of a device
:param device: (string) device name
:param namespace: (string) (optional) namespace name
:return: (list) TC policy classes
def get_params(tca_options, qdisc_type):
if qdisc_type not in TC_QDISC_TYPES:
return None, None, None
tca_params = _get_attr(tca_options,
'TCA_' + qdisc_type.upper() + '_PARMS')
burst_kb = int(
_calc_burst(tca_params['rate'], tca_params['buffer']) * 8 / 1000)
max_kbps = int(tca_params['ceil'] * 8 / 1000)
min_kbps = int(tca_params['rate'] * 8 / 1000)
return max_kbps, min_kbps, burst_kb
tc_classes = priv_tc_lib.list_tc_policy_classes(device,
classes = []
for tc_class in tc_classes:
index = tc_class['index']
parent = TC_QDISC_PARENT_NAME.get(
tc_class['parent'], _handle_from_hex_to_string(tc_class['parent']))
classid = _handle_from_hex_to_string(tc_class['handle'])
qdisc_type = _get_attr(tc_class, 'TCA_KIND')
tca_options = _get_attr(tc_class, 'TCA_OPTIONS')
max_kbps, min_kbps, burst_kb = get_params(tca_options, qdisc_type)
tc_class_data = {'device': device,
'index': index,
'namespace': namespace,
'parent': parent,
'classid': classid,
'qdisc_type': qdisc_type,
'min_kbps': min_kbps,
'max_kbps': max_kbps,
'burst_kb': burst_kb}
tca_stats = _get_attr(tc_class, 'TCA_STATS')
if tca_stats:
tc_class_data['stats'] = tca_stats
return classes
def delete_tc_policy_class(device, parent, classid, namespace=None):
"""Delete a TC policy class of a device.
:param device: (string) device name
:param parent: (string) qdisc parent class ('root', 'ingress', '2:10')
:param classid: (string) major:minor handler identifier ('10:20')
:param namespace: (string) (optional) namespace name
priv_tc_lib.delete_tc_policy_class(device, parent, classid,
def add_tc_filter_vxlan(device, parent, classid, src_mac, vxlan_id,
"""Add a TC filter to match VXLAN traffic based on the VM mac and the VNI.
:param device: (string) device name
:param parent: (string) qdisc parent class ('root', 'ingress', '2:10')
:param classid: (string) major:minor handler identifier ('10:20')
:param src_mac: (string) source MAC address to match (VM mac)
:param vxlan_id: (int) VXLAN ID (VNI)
:param namespace: (string) (optional) namespace name
keys = [hex(int(vxlan_id << 8)) + '/0xffffff00+' + str(VXLAN_VNI_OFFSET)]
keys += [key['key'] for key in
_mac_to_pyroute2_keys(src_mac, VXLAN_INNER_SRC_MAC_OFFSET)]
priv_tc_lib.add_tc_filter_match32(device, parent, 1, classid, keys,
def add_tc_filter_match_mac(device, parent, classid, mac, offset=0, priority=0,
protocol=None, namespace=None):
"""Add a TC filter in a device to match a MAC address.
:param device: (string) device name
:param parent: (string) qdisc parent class ('root', 'ingress', '2:10')
:param classid: (string) major:minor handler identifier ('10:20')
:param mac: (string) MAC address to match
:param offset: (int) (optional) match offset, starting from the outer
packet IP header
:param priority: (int) (optional) filter priority (lower priority, higher
:param protocol: (int) (optional) traffic filter protocol; if None, all
will be matched.
:param namespace: (string) (optional) namespace name
keys = [key['key'] for key in _mac_to_pyroute2_keys(mac, offset)]
priv_tc_lib.add_tc_filter_match32(device, parent, priority, classid, keys,
protocol=protocol, namespace=namespace)
def add_tc_filter_policy(device, parent, rate_kbps, burst_kb, mtu, action,
priority=0, protocol=None, namespace=None):
"""Add a TC filter in a device to set a policy.
:param device: (string) device name
:param parent: (string) qdisc parent class ('root', 'ingress', '2:10')
:param rate_kbps: (int) rate in kbits/second
:param burst_kb: (int) burst in kbits
:param mtu: (int) MTU size (bytes)
:param action: (string) filter policy action
:param priority: (int) (optional) filter priority (lower priority, higher
:param protocol: (int) (optional) traffic filter protocol; if None, all
will be matched.
:param namespace: (string) (optional) namespace name
rate = int(rate_kbps * 1000 / 8)
burst = int(burst_kb * 1000 / 8)
priv_tc_lib.add_tc_filter_policy(device, parent, priority, rate, burst,
mtu, action, protocol=protocol,
def list_tc_filters(device, parent, namespace=None):
"""List TC filter in a device
:param device: (string) device name
:param parent: (string) qdisc parent class ('root', 'ingress', '2:10')
:param namespace: (string) (optional) namespace name
parent = iproute_linux.transform_handle(parent)
filters = priv_tc_lib.list_tc_filters(device, parent, namespace=namespace)
retval = []
for filter in filters:
tca_options = _get_attr(filter, 'TCA_OPTIONS')
if not tca_options:
tca_u32_sel = _get_attr(tca_options, 'TCA_U32_SEL')
if not tca_u32_sel:
keys = []
for key in tca_u32_sel['keys']:
key_off = key['key_off']
value = 0
for i in range(4):
value = (value << 8) + (key_off & 0xff)
key_off = key_off >> 8
keys.append({'value': value,
'mask': key['key_val'],
'offset': key['key_offmask']})
value = {'keys': keys}
tca_u32_police = _get_attr(tca_options, 'TCA_U32_POLICE')
if tca_u32_police:
tca_police_tbf = _get_attr(tca_u32_police, 'TCA_POLICE_TBF')
if tca_police_tbf:
value['rate_kbps'] = int(tca_police_tbf['rate'] * 8 / 1000)
value['burst_kb'] = int(
tca_police_tbf['burst']) * 8 / 1000)
value['mtu'] = tca_police_tbf['mtu']
return retval