kolla-ansible/tools/ovs-dpdkctl.sh
2018-11-20 15:00:17 +07:00

503 lines
16 KiB
Bash
Executable File

#!/bin/bash
_XTRACE_OVS_DPDK_CTL=$(set +o | grep xtrace)
if [[ "${OVS_DPDK_CTL_DEBUG}" == "True" ]]; then
set -o xtrace
fi
FULL_PATH=$(realpath "${BASH_SOURCE[0]}")
CONFIG_FILE=${CONFIG_FILE:-"/etc/default/ovs-dpdk.conf"}
SERVICE_FILE="/etc/systemd/system/ovs-dpdkctl.service"
BRIDGE_SERVICE_FILE="/etc/systemd/system/ovs-dpdk-bridge.service"
function get_value {
crudini --get $CONFIG_FILE $@
}
function set_value {
crudini --set $CONFIG_FILE $1 $2 "$3"
}
function del_value {
crudini --del $CONFIG_FILE $@
}
function is_set {
output=$(crudini --get $CONFIG_FILE $* &> /dev/null ) && [ -n "$(crudini --get $CONFIG_FILE $*)" ]; echo $?
}
function show_config {
cat $CONFIG_FILE
}
function get_config {
echo $CONFIG_FILE
}
function del_config {
rm -f $CONFIG_FILE
}
function is_redhat_family {
[[ -e /etc/redhat-release ]] ; echo $?
}
function generate_pciwhitelist {
_Whitelist=''
for nic in $(list_dpdk_nics); do
address="$(get_value $nic address)"
if [ "$_Whitelist" == '' ]; then
_Whitelist="-w $address"
else
_Whitelist="$_Whitelist -w $address"
fi
done
echo $_Whitelist
}
function gen_port_mappings {
OVS_BRIDGE_MAPPINGS=$(get_value ovs bridge_mappings)
OVS_BRIDGES=${OVS_BRIDGE_MAPPINGS//,/ }
OVS_DPDK_PORT_MAPPINGS=""
ARRAY=( $OVS_BRIDGES )
for net in "${ARRAY[@]}"; do
bridge="${net##*:}"
nic=${bridge/br-/}
if [[ -z "$OVS_DPDK_PORT_MAPPINGS" ]]; then
OVS_DPDK_PORT_MAPPINGS="$nic:$bridge"
else
OVS_DPDK_PORT_MAPPINGS="$OVS_DPDK_PORT_MAPPINGS,$nic:$bridge"
fi
done
echo "$OVS_DPDK_PORT_MAPPINGS"
}
function gen_config {
del_config
touch $CONFIG_FILE
set_value ovs bridge_mappings ${bridge_mappings:-""}
set_value ovs port_mappings ${port_mappings:-$(gen_port_mappings)}
set_value ovs cidr_mappings ${cidr_mappings:-""}
set_value ovs ovs_coremask ${ovs_coremask:-"0x1"}
set_value ovs pmd_coremask ${pmd_coremask:-"0x2"}
set_value ovs ovs_mem_channels ${ovs_mem_channels:-4}
set_value ovs ovs_socket_mem ${ovs_socket_mem:-"512"}
set_value ovs dpdk_interface_driver ${dpdk_interface_driver:-"uio_pci_generic"}
set_value ovs hugepage_mountpoint ${hugepage_mountpoint:-"/dev/hugepages"}
set_value ovs physical_port_policy ${ovs_physical_port_policy:-"named"}
ls -al /sys/class/net/* | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print "\n[" a[n] "]\naddress = " a[n-2] "\ndriver ="}' >> $CONFIG_FILE
for nic in $(get_value | grep -v ovs); do
set_value $nic driver $(get_driver_by_address $(get_value $nic address))
done
for nic in $(list_dpdk_nics); do
set_value $nic driver ${dpdk_interface_driver:-"uio_pci_generic"}
done
set_value ovs pci_whitelist "${pci_whitelist:-$(generate_pciwhitelist)}"
}
function bind_nic {
echo $2 > /sys/bus/pci/devices/$1/driver_override
echo $1 > /sys/bus/pci/drivers/$2/bind
}
function unbind_nic {
echo $1 > /sys/bus/pci/drivers/$2/unbind
echo > /sys/bus/pci/devices/$1/driver_override
}
function list_dpdk_nics {
for nic in $(get_value ovs port_mappings | tr ',' '\n' | cut -d : -f 1); do
echo $nic;
done
}
function bind_nics {
for nic in $(list_dpdk_nics); do
device_address="$(get_value $nic address)"
current_driver="$(get_driver_by_address $device_address)"
target_driver="$(get_value $nic driver)"
if [ "$current_driver" != "$target_driver" ]; then
set_value $nic old_driver $current_driver
unbind_nic $device_address $current_driver
bind_nic $device_address $target_driver
fi
done
}
function unbind_nics {
for nic in $(list_dpdk_nics); do
if [ "$(is_set $nic old_driver)" == 0 ]; then
device_address="$(get_value $nic address)"
current_driver="$(get_driver_by_address $device_address)"
target_driver="$(get_value $nic old_driver)"
if [ "$current_driver" != "$target_driver" ]; then
unbind_nic $device_address $current_driver
bind_nic $device_address $target_driver
del_value $nic old_driver
fi
fi
done
}
function get_address_by_name {
ls -al /sys/class/net/$1 | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print a[n-2] }'
}
function get_driver_by_address {
ls /sys/bus/pci/devices/$1/driver -al | awk '{n=split($NF,a,"/"); print a[n]}'
}
function get_port_bridge {
for pair in $(get_value ovs port_mappings | tr ',' '\n'); do
nic=`echo $pair | cut -f 1 -d ":"`
if [[ "$nic" == "$1" ]]; then
bridge=`echo $pair | cut -f 2 -d ":"`
echo $bridge
return 0
fi
done
return 1
}
function init_ovs_db {
ovs-vsctl init
ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask="$(get_value ovs pmd_coremask)" \
other_config:dpdk-init=True other_config:dpdk-lcore-mask="$(get_value ovs ovs_coremask)" \
other_config:dpdk-mem-channels="$(get_value ovs ovs_mem_channels)" \
other_config:dpdk-socket-mem="$(get_value ovs ovs_socket_mem)" \
other_config:dpdk-hugepage-dir="$(get_value ovs hugepage_mountpoint)" \
other_config:dpdk-extra=" --proc-type primary $(get_value ovs pci_whitelist) "
}
function init_ovs_bridges {
raw_bridge_mappings=$(get_value ovs bridge_mappings)
bridge_mappings=( ${raw_bridge_mappings//,/ } )
for pair in "${bridge_mappings[@]}"; do
bridge=`echo $pair | cut -f 2 -d ":"`
sudo ovs-vsctl --no-wait -- --may-exist add-br $bridge -- set Bridge $bridge datapath_type=netdev
done
}
function init_ovs_interfaces {
pci_port_pairs=''
for nic in $(list_dpdk_nics); do
address="$(get_value $nic address)"
if [ "$pci_port_pairs" == '' ]; then
pci_port_pairs="$address,$nic"
else
pci_port_pairs="$pci_port_pairs $address,$nic"
fi
done
pci_port_pairs="$(echo $pci_port_pairs | sort)"
dpdk_port_number=0
for pair in $pci_port_pairs; do
addr="$(echo $pair | cut -f 1 -d ",")"
nic="$(echo $pair | cut -f 2 -d ",")"
bridge="$(get_port_bridge $nic)"
# ovs 2.6 and older requires dpdkX names, ovs 2.7+ requires dpdk-devargs instead.
if [ "$(get_value ovs physical_port_policy)" == "indexed" ]; then
ovs-vsctl --no-wait --may-exist add-port $bridge "dpdk${dpdk_port_number}" \
-- set Interface "dpdk${dpdk_port_number}" type=dpdk
else
ovs-vsctl --may-exist add-port $bridge $nic \
-- set Interface $nic type=dpdk options:dpdk-devargs=$addr
fi
dpdk_port_number=$((dpdk_port_number+1))
done
}
function init {
init_ovs_db
init_ovs_bridges
init_ovs_interfaces
}
function install_network_manager_conf {
pair=$(get_value ovs cidr_mappings)
bridge=`echo $pair | cut -f 1 -d ":"`
cidr=`echo $pair | cut -f 2 -d ":"`
ip=`echo $cidr | cut -f 1 -d "/"`
prefix=`echo $cidr | cut -f 2 -d "/"`
mask=""
full_octets=$(expr $prefix / 8)
partial_octet=$(expr $prefix % 8)
for octet in 0 1 2 3 ; do
if [[ "$octet" < "$full_octets" ]]; then
mask+=255
elif [[ "$octet" == "$full_octets" ]]; then
mask+=$((256 - 2**(8-$partial_octet)))
else
mask+=0
fi
[[ "$octet" < 3 ]] && mask+=.
done
if [[ $(is_redhat_family) == 0 ]]; then
cat << EOF | tee "/etc/sysconfig/network-scripts/ifcfg-$bridge"
DEVICE=$bridge
BOOTPROTO=static
IPADDR=$ip
NETMASK=$mask
HOTPLUG=yes
ONBOOT=yes
NM_CONTROLLED=no
EOF
install_redhat_bridge_service $bridge
else
cat << EOF | tee "/etc/network/interfaces.d/$bridge.cfg"
auto $bridge
iface $bridge inet static
address $ip
netmask $mask
EOF
fi
}
function uninstall_network_manager_conf {
pair=$(get_value ovs cidr_mappings)
bridge=`echo $pair | cut -f 1 -d ":"`
if [[ $(is_redhat_family) == 0 ]]; then
rm -f /etc/sysconfig/network-scripts/ifcfg-$bridge
else
rm -f /etc/network/interfaces.d/$bridge.cfg
fi
}
function install_service {
cat << EOF | tee "$SERVICE_FILE"
[Unit]
Description=configuration service for ovs-dpdk nics.
Before=network-pre.target
After=syslog.target
[Service]
# Uncomment to enable debug logging.
# Environment=OVS_DPDK_CTL_DEBUG=True
Environment=CONFIG_FILE=$CONFIG_FILE
Type=oneshot
RemainAfterExit=yes
ExecStart=/bin/ovs-dpdkctl bind_nics
ExecStop=/bin/ovs-dpdkctl unbind_nics
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable ovs-dpdkctl
}
function install_redhat_bridge_service {
cat << EOF | tee "$BRIDGE_SERVICE_FILE"
[Unit]
Description=configuration service for ovs-dpdk bridge.
After=docker.service
After=network.target
After=ovs-dpdkctl.service
[Service]
Type=simple
RemainAfterExit=yes
ExecStartPre=/bin/bash -c "[[ -e /sys/class/net/$1 ]]"
ExecStart=/usr/sbin/ifup $1
ExecStop=/usr/sbin/ifdown $1
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable ovs-dpdk-bridge
}
function uninstall_service {
systemctl disable ovs-dpdkctl
rm -f "$SERVICE_FILE"
if [ -e "$BRIDGE_SERVICE_FILE" ]; then
systemctl disable ovs-dpdk-bridge
rm -f "$BRIDGE_SERVICE_FILE"
fi
systemctl daemon-reload
}
function configure_kernel_modules {
driver="$(get_value ovs dpdk_interface_driver)"
lsmod | grep -ws $driver > /dev/null || modprobe $driver
if [[ $(is_redhat_family) == 0 ]]; then
[[ ! -e /etc/modules-load.d/${driver}.conf ]] && echo $driver | tee /etc/modules-load.d/${driver}.conf
else
grep -ws $driver /etc/modules > /dev/null || echo $driver | tee -a /etc/modules
fi
}
function unconfigure_kernel_modules {
driver="$(get_value ovs dpdk_interface_driver)"
lsmod | grep -ws $driver > /dev/null && rmmod $driver
if [[ $(is_redhat_family) == 0 ]] ; then
[[ -e /etc/modules-load.d/${driver}.conf ]] && rm -f /etc/modules-load.d/${driver}.conf
else
grep -ws $driver /etc/modules > /dev/null && sed -e "s/$driver//" -i /etc/modules
fi
}
function install {
configure_kernel_modules
if [ ! -e "$SERVICE_FILE" ]; then
install_service
fi
if [ ! -e /bin/ovs-dpdkctl ]; then
cp "$FULL_PATH" /bin/ovs-dpdkctl
chmod +x /bin/ovs-dpdkctl
fi
if [ ! -e "$CONFIG_FILE" ]; then
gen_config
fi
systemctl start ovs-dpdkctl
install_network_manager_conf
if [[ $(is_redhat_family) == 0 ]]; then
systemctl start ovs-dpdk-bridge
fi
}
function uninstall {
systemctl stop ovs-dpdkctl
if [ -e "$SERVICE_FILE" ]; then
uninstall_service
fi
uninstall_network_manager_conf
unconfigure_kernel_modules
if [ -e /bin/ovs-dpdkctl ]; then
rm -f /bin/ovs-dpdkctl
fi
if [ -e "$CONFIG_FILE" ]; then
rm -f "$CONFIG_FILE"
fi
}
function usage {
cat << "EOF"
ovs-dpdkctl.sh: A tool to configure ovs with dpdk.
- This tool automate the process of binding host insterfacesto a dpdk
compaible driver (uio_pci_generic | vfio-pci) at boot.
- This tool automate bootstraping ovs so that it can use the
dpdk accelerated netdev datapath.
commands:
- install:
- installs ovs-dpdkctl as a systemd service.
- installs ovs-dpdkctl binary.
- generates ovs-dpdkctl configuration file.
- starts ovs-dpdkctl service.
- uninstall:
- stops ovs-dpdkctl service.
- uninstalls ovs-dpdkctl systemd service.
- uninstalls ovs-dpdkctl binary.
- removes ovs-dpdkctl configuration file.
- bind_nics:
- iterates over all dpdk interfaces defined in ovs-dpdkctl config
and binds the interface to the target driver specifed in the config
if current driver does not equal target.
- unbind_nics:
- iterates over all dpdk interfaces defined in ovs-dpdkctl config
and restores the interface to its original non dpdk driver.
- init:
- defines dpdk specific configuration paramater in the ovsdb.
- creates bridges as spcified by ovs bridge_mappings in
ovs-dpdkctl config.
- creates dpdk ports as defined by ovs port_mappings in
ovs-dpdkctl config.
- usage:
- prints this message
options:
- debuging:
- To enable debuging export OVS_DPDK_CTL_DEBUG=True
- install:
- The varibles described below can be defined to customise
installation of ovs-dpdkctl.
<variable>=<value> ovs-dpdkctl.sh install
- bridge_mappings:
- A comma separated list of physnet to bridge mappings.
- Example: bridge_mappings=physnet1:br-ex1,physnet2:br-ex2
- Default: ""
- port_mappings:
- A comma separated list of port to bridge mappings.
- Example: port_mappings=eth1:br-ex1,eth2:br-ex2
- Default: generated form bridge_mappings assuming bridge names
are constructed by appending br- to port name.
- cidr_mappings:
- A comma separated list of bridge to cidr mappings.
- Example: cidr_mappings=br-ex1:192.168.1.1/24,br-ex2:192.168.2.1/24
- Default: ""
- ovs_coremask:
- A hex encoded string container a bitmask of what cpu core
to pin the non dataplane treads of the ovs-vswitchd to.
- Node that only the first core of the bit mask is currently
used by ovs.
- Example: ovs_coremask=0x1
- Default: "0x1"
- pmd_coremask:
- A hex encoded string container a bitmask of what cpu cores
to pin the dataplane pool mode driver treads of the ovs-vswitchd to.
- Each bit set in the bitmask will result in the creating of a pmd.
- For best performance it is recommended to allocate at least 1 pmd per
numa node. On systems with HyperThreading enabled it is recommended to also
allocate the HT sibling core in the pmd_coremask.cores allocated
to ovs with dpdk via the pmd_coremask should be removed from the
nova vcpu_pin_set and isolated from the kernel scheduler.
- Note it is not recommended to isolate cores in the nova vcpu_pin_set
unless the host will be dedicated for vms that request cpu pinning.
- Example: pmd_coremask=0x4
- Default: "0x4"
- ovs_mem_channels:
- The number of memory channels supported by the plathforms.
- Example: ovs_mem_channels=2
- Default: "4"
- ovs_socket_mem:
- A comma separated list of hugepage memory, specifed in MBs per numa node,
allocated to the ovs-vswitchd to use for the dpdk dataplane.
- For best performance memory should be allocated evenly across all numa node
that will run a pmd.
- Example: ovs_socket_mem=512,512
- Default: "512"
- hugepage_mountpoint:
- The hugepage mountpoint to use when allocating memory for dpdk.
- Example: hugepage_mountpoint=/dev/my_custom_mountpoint
- Default: "/dev/hugepages"
- dpdk_interface_driver:
- The dpdk compatible userspace driver to use when binding host interfaces.
- Example: dpdk_interface_driver=vfio_pci
- Default: "uio_pci_generic"
- pci_whitelist:
- A repeated space separated list of pci whitelist flags
for allowed ovs-dpdk ports.
- The pci_whitelist allows multiple dpdk primary process to
utilise different pci devices without resulting in a conflict
of ownership.
- Example: pci_whitelist="-w <pci address 1> -w <pci address 2>"
- Default: auto generated form port_mappings.
EOF
}
if [ $# -ge 1 ]; then
func=$1
shift
else
func="usage"
fi
#replace with switch later
eval "$func $@"
${_XTRACE_OVS_DPDK_CTL}