add min mtu and min speed config variables

This commit is contained in:
Andrew McLeod 2017-08-31 12:03:04 +02:00
parent a29f9c0a79
commit e55961d08f
5 changed files with 139 additions and 51 deletions

View File

@ -1,8 +1,18 @@
# Overview
Magpie is a charm used for testing the networking (ICMP and DNS specifically)
of a juju provider/substrate. Simply deploy more than one Magpie charm and
watch the status messages and debug logs.
Magpie is a charm used for testing the networking of a juju provider/substrate.
Simply deploy more than one Magpie charm and watch the status messages and
debug logs.
Magpie will test:
- DNS functionality
- Local hostname lookup
- ICMP between peers
- MTU between leader and clients
- Transfer between leader and clients
*MTU and transfer speed are tested with iperf2*
Status messages will show the unit numbers that have issues - if there are
no problems, there will not be a verbose status message.
@ -10,6 +20,13 @@ no problems, there will not be a verbose status message.
All actions, strings, queries and actions are logged in the juju logs.
# MTU Notes
The MTU size reported by iperf is sometimes 8 or 12 bytes less than the configured
MTU on the interface. This is due to TCP options not being included in the measurement,
and therefore we ignore that difference and report everything OK.
# Workload Status
In addition to ICMP and DNS status messages, if a networking problem is
@ -26,12 +43,15 @@ relation.
* **`magpie-dns.failed`** DNS has failed to one or more units in the peer
relation.
Note: work stopped on these states as it is currently unlikely magpie will be consumed
as a layer.
Please open an issue against this github repo if more states are required.
# Usage
```
juju deploy magpie -n 2
juju deploy magpie -n 1 --to lxc:1
juju deploy magpie -n 1 --to lxd:1
```
This charm also supports the following config values:
@ -61,8 +81,21 @@ This charm also supports the following config values:
default: 1
description: Number of ICMP packets per ping
type: int
required_mtu:
default: 0
description: |
Desired MTU for all nodes - block if the unit MTU is different
(accounting for encapsulation). 0 disables.
type: int
min_speed:
default: 0
description: |
Minimum transfer speed in mbits/s required to pass the test.
0 disables.
type: int
```
e.g.
juju set magpie dns_server=8.8.8.8
juju set magpie dns_server=8.8.8.8 required_mtu=9000 min_speed=1000

View File

@ -27,3 +27,11 @@ options:
default: False
description: Enable this if you intend to consume this layer - supresses status messages
type: boolean
required_mtu:
default: 0
description: Desired MTU for all nodes - block if the unit MTU is different (accounting for encapsulation). 0 disables.
type: int
min_speed:
default: 0
description: Minimum transfer speed in mbit/s required to pass the test. 0 disables.
type: int

View File

@ -1,32 +1,24 @@
#!/usr/bin/env python
import os
import sys
import signal
import subprocess
import re
from charmhelpers.core import hookenv
from charmhelpers.core.host import get_nic_mtu
from charmhelpers.fetch import apt_install
from charms.reactive import set_state, remove_state
from charms.reactive.bus import get_state
import threading
import time
# is there a better way to get these packages into the unit?
def install_iperf():
apt_install("iperf")
class Iperf():
"""
Install and start a server automatically
"""
iperf_out = '/home/ubuntu/iperf_output.txt'
def __init__(self):
pass
def install_iperf(self):
apt_install("iperf")
def listen(self):
cmd = "iperf -s -m | tee " + self.iperf_out + " &"
cmd = "iperf -s -m -fm | tee " + self.iperf_out + " &"
os.system(cmd)
def mtu(self):
@ -34,35 +26,39 @@ class Iperf():
for line in f.readlines():
if "MTU" in line:
match = line
return match.split('MTU', 4)[1].split(' ')[1]
try:
return match.split('MTU', 4)[1].split(' ')[1]
except UnboundLocalError:
return "no iperf test results: failed"
def speed(self):
with open(self.iperf_out) as f:
for line in f.readlines():
if "bits" in line:
match = line
return match.rsplit(' ', 2)[1]
try:
return match.rsplit(' ', 2)[1]
except UnboundLocalError:
return "no iperf test results: failed"
def stop_server(self):
return
def selfcheck(self):
subprocess.check_output(["iperf", "-c", "localhost", "-t", "1"])
def iperf_selfcheck():
subprocess.check_output(["iperf", "-c", "localhost", "-t", "1"])
def hostcheck(self, nodes):
# Wait for other nodes to start their servers...
for node in nodes:
msg = "checking iperf on {}".format(node[1])
hookenv.log(msg)
cmd = "iperf -t1 -c {}".format(node[1])
os.system(cmd)
def iperf_hostcheck(nodes):
# safe_status('active', 'Leader is checking all other hosts...')
# Wait for other nodes to start their servers...
for node in nodes:
msg = "checking iperf on {}".format(node[1])
hookenv.log(msg)
cmd = "iperf -t1 -c {}".format(node[1])
os.system(cmd)
def safe_status(workload, status):
cfg = hookenv.config()
if not cfg.get('supress_status'):
hookenv.status_set(workload, status)
def ping(input, ping_time, ping_tries):
ping_string = "ping -c {} -w {} {} > /dev/null 2>&1"\
.format(ping_tries, ping_time, input)
@ -90,36 +86,80 @@ def check_local_hostname():
return result, stderr
def check_local_mtu(required_mtu, iface_mtu):
if required_mtu == 0:
return 0
elif 0 <= (int(iface_mtu) - int(required_mtu)) <= 12:
return 100
else:
return 200
def check_min_speed(min_speed, iperf_speed):
if min_speed == 0:
return 0
elif min_speed <= iperf_speed:
return 100
elif min_speed > iperf_speed:
return 200
def check_nodes(nodes, iperf_client=False):
cfg = hookenv.config()
local_ip = hookenv.unit_private_ip()
ip_prefix = '.'.join(local_ip.split('.')[0:3])
iface_line = subprocess.check_output(["ip", "route", "get", ip_prefix])
primary_iface = str(iface_line).split('dev')[1].split(' ')[1]
iface_mtu = get_nic_mtu(primary_iface)
required_mtu = cfg.get('required_mtu')
min_speed = cfg.get('min_speed')
msg = "MTU for iface: {} is {}".format(primary_iface, iface_mtu)
hookenv.log(msg, 'INFO')
#if required_mtu != 0 and not 0 <= (int(iface_mtu) - int(required_mtu)) <= 12:
# iperf_status = ", local mtu check failed, required_mtu: {}, iface mtu: {}".format(required_mtu, iface_mtu)
#elif required_mtu == 0 or 0 <= (int(iface_mtu) - int(required_mtu)) <= 12:
if not iperf_client:
iperf = Iperf()
mtu = iperf.mtu()
speed = iperf.speed()
if iface_mtu == mtu:
iperf_status = ", mtu: {}, {} mbit/s".format(mtu, speed)
# Make space for 8 or 12 byte variable overhead (TCP options)
if "failed" not in mtu:
if 0 <= (int(iface_mtu) - int(mtu)) <= 12:
iperf_status = ", net mtu ok: {}".format(iface_mtu)
else:
iperf_status = ", net mtu failed, mismatch: {} packet vs {} on iface {}".format(
mtu, iface_mtu, primary_iface)
else:
iperf_status = ", mtu mismatch: {} packet vs {} on iface {}, {} mbits/s".format(mtu, iface_mtu, primary_iface, speed)
iperf_status = ", network mtu check failed"
if "failed" not in speed:
if check_min_speed(min_speed, int(speed)) == 0:
iperf_status = iperf_status + ", {} mbit/s".format(speed)
if check_min_speed(min_speed, int(speed)) == 100:
iperf_status = iperf_status + ", speed ok: {} mbit/s".format(speed)
if check_min_speed(min_speed, int(speed)) == 200:
iperf_status = iperf_status + ", speed failed: {} < {} mbit/s".format(speed, str(min_speed))
else:
iperf_status = iperf_status + ", iperf speed check failed"
elif iperf_client:
iperf_status = ", iperf leader, mtu: {}".format(iface_mtu)
iperf_hostcheck(nodes)
iperf = Iperf()
iperf.hostcheck(nodes)
if check_local_mtu(required_mtu, iface_mtu) == 100:
iperf_status = iperf_status + ", local mtu ok, required: {}".format(required_mtu)
elif check_local_mtu(required_mtu, iface_mtu) == 200:
iperf_status = iperf_status + ", local mtu failed, required: {}, iface: {}".format(required_mtu, iface_mtu)
hookenv.log('doing other things after iperf', 'INFO')
cfg = hookenv.config()
cfg_check_local_hostname = cfg.get('check_local_hostname')
if cfg_check_local_hostname:
no_hostname = check_local_hostname()
if no_hostname[0] == '':
no_hostname = ', local hostname ok'
hookenv.log('Local hostname lookup OK: {}'.format(str(no_hostname)), 'INFO')
hookenv.log('Local hostname lookup OK: {}'.format(
str(no_hostname)), 'INFO')
else:
no_hostname = ', local hostname failed'
hookenv.log('Local hostname lookup FAILED: {}'.format(str(no_hostname)), 'ERROR')
hookenv.log('Local hostname lookup FAILED: {}'.format(
str(no_hostname)), 'ERROR')
no_ping = check_ping(nodes)
no_dns = check_dns(nodes)
@ -157,9 +197,11 @@ def check_nodes(nodes, iperf_client=False):
.format(dns_status, str(no_rev), str(no_fwd))
if cfg_check_local_hostname:
check_status = '{}{}{}{}'.format(no_ping, str(no_hostname), str(dns_status), str(iperf_status))
check_status = '{}{}{}{}'.format(no_ping, str(
no_hostname), str(dns_status), str(iperf_status))
else:
check_status = '{}{}{}'.format(no_ping, str(dns_status), str(iperf_status))
check_status = '{}{}{}'.format(
no_ping, str(dns_status), str(iperf_status))
if 'failed' in check_status:
workload = 'blocked'

View File

@ -2,14 +2,15 @@ name: magpie
summary: Magpie layer to test networking - ICMP and DNS
maintainer: Andrew McLeod <andrew.mcleod@canonical.com>
description: |
Magpie will check ICMP and DNS between itself and any
peer units deployed - deploy more than one magpie unit
for meaningful results.
Magpie will check ICMP, DNS, MTU and rx/tx speed between itself and any
peer units deployed - deploy more than one magpie unit for meaningful results.
tags: ["testing", "CI"]
peers:
magpie:
interface: magpie
series:
- artful
- zesty
- xenial
- trusty
- precise

View File

@ -1,10 +1,8 @@
# pylint: disable=unused-argument
from charms.reactive import when, when_not, set_state, remove_state
from charms.reactive.bus import get_state
from charmhelpers.core import hookenv
from charms.layer.magpie_tools import check_nodes, safe_status, Iperf, install_iperf
from charmhelpers.core.unitdata import Storage
import threading
from charms.layer.magpie_tools import check_nodes, safe_status, Iperf
def _set_states(check_result):
if 'fail' in check_result['icmp']:
@ -16,15 +14,19 @@ def _set_states(check_result):
else:
remove_state('magpie-dns.failed')
@when_not('iperf.installed')
def install_iperf_pkg():
install_iperf()
iperf = Iperf()
iperf.install_iperf()
set_state('iperf.installed')
@when_not('magpie.joined')
def no_peers():
safe_status('waiting', 'Waiting for peers...')
@when('magpie.joined')
@when_not('leadership.is_leader', 'iperf.checked')
def check_check_state(magpie):
@ -35,6 +37,7 @@ def check_check_state(magpie):
(hookenv.local_unit() in magpie.get_iperf_checked()):
set_state('iperf.checked')
@when('magpie.joined', 'leadership.is_leader')
@when_not('iperf.servers.ready')
def leader_wait_servers_ready(magpie):
@ -48,18 +51,19 @@ def leader_wait_servers_ready(magpie):
else:
remove_state('iperf.servers.ready')
@when('magpie.joined')
@when_not('leadership.is_leader', 'iperf.listening')
def listen_for_checks(magpie):
'''
If im not the leader, and im not listening, then listen
'''
nodes = magpie.get_nodes()
iperf = Iperf()
iperf.listen()
magpie.set_iperf_server_ready()
set_state('iperf.listening')
@when('iperf.servers.ready', 'magpie.joined', 'leadership.is_leader')
def client_check_hosts(magpie):
'''
@ -69,6 +73,7 @@ def client_check_hosts(magpie):
_set_states(check_nodes(nodes, iperf_client=True))
magpie.set_iperf_checked()
@when('magpie.joined', 'iperf.checked')
@when_not('leadership.is_leader')
def check_all_node(magpie):
@ -77,4 +82,3 @@ def check_all_node(magpie):
'''
nodes = magpie.get_nodes()
_set_states(check_nodes(nodes))