diff --git a/README.md b/README.md index bbd7c84..63ac970 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,18 @@ # Overview -Magpie is a charm used for testing the networking (ICMP and DNS specifically) -of a juju provider/substrate. Simply deploy more than one Magpie charm and -watch the status messages and debug logs. +Magpie is a charm used for testing the networking of a juju provider/substrate. +Simply deploy more than one Magpie charm and watch the status messages and +debug logs. + +Magpie will test: + + - DNS functionality + - Local hostname lookup + - ICMP between peers + - MTU between leader and clients + - Transfer between leader and clients + +*MTU and transfer speed are tested with iperf2* Status messages will show the unit numbers that have issues - if there are no problems, there will not be a verbose status message. @@ -10,6 +20,13 @@ no problems, there will not be a verbose status message. All actions, strings, queries and actions are logged in the juju logs. +# MTU Notes + +The MTU size reported by iperf is sometimes 8 or 12 bytes less than the configured +MTU on the interface. This is due to TCP options not being included in the measurement, +and therefore we ignore that difference and report everything OK. + + # Workload Status In addition to ICMP and DNS status messages, if a networking problem is @@ -26,12 +43,15 @@ relation. * **`magpie-dns.failed`** DNS has failed to one or more units in the peer relation. +Note: work stopped on these states as it is currently unlikely magpie will be consumed +as a layer. +Please open an issue against this github repo if more states are required. # Usage ``` juju deploy magpie -n 2 -juju deploy magpie -n 1 --to lxc:1 +juju deploy magpie -n 1 --to lxd:1 ``` This charm also supports the following config values: @@ -61,8 +81,21 @@ This charm also supports the following config values: default: 1 description: Number of ICMP packets per ping type: int + required_mtu: + default: 0 + description: | + Desired MTU for all nodes - block if the unit MTU is different + (accounting for encapsulation). 0 disables. + type: int + min_speed: + default: 0 + description: | + Minimum transfer speed in mbits/s required to pass the test. + 0 disables. + type: int ``` e.g. -juju set magpie dns_server=8.8.8.8 +juju set magpie dns_server=8.8.8.8 required_mtu=9000 min_speed=1000 + diff --git a/config.yaml b/config.yaml index d7eb0e3..17006a8 100644 --- a/config.yaml +++ b/config.yaml @@ -27,3 +27,11 @@ options: default: False description: Enable this if you intend to consume this layer - supresses status messages type: boolean + required_mtu: + default: 0 + description: Desired MTU for all nodes - block if the unit MTU is different (accounting for encapsulation). 0 disables. + type: int + min_speed: + default: 0 + description: Minimum transfer speed in mbit/s required to pass the test. 0 disables. + type: int diff --git a/lib/charms/layer/magpie_tools.py b/lib/charms/layer/magpie_tools.py index c05dc80..b8bc904 100644 --- a/lib/charms/layer/magpie_tools.py +++ b/lib/charms/layer/magpie_tools.py @@ -1,32 +1,24 @@ #!/usr/bin/env python import os -import sys -import signal import subprocess import re from charmhelpers.core import hookenv from charmhelpers.core.host import get_nic_mtu from charmhelpers.fetch import apt_install -from charms.reactive import set_state, remove_state -from charms.reactive.bus import get_state -import threading -import time -# is there a better way to get these packages into the unit? -def install_iperf(): - apt_install("iperf") class Iperf(): """ Install and start a server automatically """ iperf_out = '/home/ubuntu/iperf_output.txt' - def __init__(self): - pass - + + def install_iperf(self): + apt_install("iperf") + def listen(self): - cmd = "iperf -s -m | tee " + self.iperf_out + " &" + cmd = "iperf -s -m -fm | tee " + self.iperf_out + " &" os.system(cmd) def mtu(self): @@ -34,35 +26,39 @@ class Iperf(): for line in f.readlines(): if "MTU" in line: match = line - return match.split('MTU', 4)[1].split(' ')[1] + try: + return match.split('MTU', 4)[1].split(' ')[1] + except UnboundLocalError: + return "no iperf test results: failed" def speed(self): with open(self.iperf_out) as f: for line in f.readlines(): if "bits" in line: match = line - return match.rsplit(' ', 2)[1] + try: + return match.rsplit(' ', 2)[1] + except UnboundLocalError: + return "no iperf test results: failed" - def stop_server(self): - return + def selfcheck(self): + subprocess.check_output(["iperf", "-c", "localhost", "-t", "1"]) -def iperf_selfcheck(): - subprocess.check_output(["iperf", "-c", "localhost", "-t", "1"]) + def hostcheck(self, nodes): + # Wait for other nodes to start their servers... + for node in nodes: + msg = "checking iperf on {}".format(node[1]) + hookenv.log(msg) + cmd = "iperf -t1 -c {}".format(node[1]) + os.system(cmd) -def iperf_hostcheck(nodes): - # safe_status('active', 'Leader is checking all other hosts...') - # Wait for other nodes to start their servers... - for node in nodes: - msg = "checking iperf on {}".format(node[1]) - hookenv.log(msg) - cmd = "iperf -t1 -c {}".format(node[1]) - os.system(cmd) def safe_status(workload, status): cfg = hookenv.config() if not cfg.get('supress_status'): hookenv.status_set(workload, status) + def ping(input, ping_time, ping_tries): ping_string = "ping -c {} -w {} {} > /dev/null 2>&1"\ .format(ping_tries, ping_time, input) @@ -90,36 +86,80 @@ def check_local_hostname(): return result, stderr +def check_local_mtu(required_mtu, iface_mtu): + if required_mtu == 0: + return 0 + elif 0 <= (int(iface_mtu) - int(required_mtu)) <= 12: + return 100 + else: + return 200 + + +def check_min_speed(min_speed, iperf_speed): + if min_speed == 0: + return 0 + elif min_speed <= iperf_speed: + return 100 + elif min_speed > iperf_speed: + return 200 + + def check_nodes(nodes, iperf_client=False): + cfg = hookenv.config() local_ip = hookenv.unit_private_ip() ip_prefix = '.'.join(local_ip.split('.')[0:3]) iface_line = subprocess.check_output(["ip", "route", "get", ip_prefix]) primary_iface = str(iface_line).split('dev')[1].split(' ')[1] iface_mtu = get_nic_mtu(primary_iface) + required_mtu = cfg.get('required_mtu') + min_speed = cfg.get('min_speed') msg = "MTU for iface: {} is {}".format(primary_iface, iface_mtu) hookenv.log(msg, 'INFO') + #if required_mtu != 0 and not 0 <= (int(iface_mtu) - int(required_mtu)) <= 12: + # iperf_status = ", local mtu check failed, required_mtu: {}, iface mtu: {}".format(required_mtu, iface_mtu) + #elif required_mtu == 0 or 0 <= (int(iface_mtu) - int(required_mtu)) <= 12: if not iperf_client: iperf = Iperf() mtu = iperf.mtu() speed = iperf.speed() - if iface_mtu == mtu: - iperf_status = ", mtu: {}, {} mbit/s".format(mtu, speed) + # Make space for 8 or 12 byte variable overhead (TCP options) + if "failed" not in mtu: + if 0 <= (int(iface_mtu) - int(mtu)) <= 12: + iperf_status = ", net mtu ok: {}".format(iface_mtu) + else: + iperf_status = ", net mtu failed, mismatch: {} packet vs {} on iface {}".format( + mtu, iface_mtu, primary_iface) else: - iperf_status = ", mtu mismatch: {} packet vs {} on iface {}, {} mbits/s".format(mtu, iface_mtu, primary_iface, speed) + iperf_status = ", network mtu check failed" + if "failed" not in speed: + if check_min_speed(min_speed, int(speed)) == 0: + iperf_status = iperf_status + ", {} mbit/s".format(speed) + if check_min_speed(min_speed, int(speed)) == 100: + iperf_status = iperf_status + ", speed ok: {} mbit/s".format(speed) + if check_min_speed(min_speed, int(speed)) == 200: + iperf_status = iperf_status + ", speed failed: {} < {} mbit/s".format(speed, str(min_speed)) + else: + iperf_status = iperf_status + ", iperf speed check failed" elif iperf_client: iperf_status = ", iperf leader, mtu: {}".format(iface_mtu) - iperf_hostcheck(nodes) + iperf = Iperf() + iperf.hostcheck(nodes) + if check_local_mtu(required_mtu, iface_mtu) == 100: + iperf_status = iperf_status + ", local mtu ok, required: {}".format(required_mtu) + elif check_local_mtu(required_mtu, iface_mtu) == 200: + iperf_status = iperf_status + ", local mtu failed, required: {}, iface: {}".format(required_mtu, iface_mtu) hookenv.log('doing other things after iperf', 'INFO') - cfg = hookenv.config() cfg_check_local_hostname = cfg.get('check_local_hostname') if cfg_check_local_hostname: no_hostname = check_local_hostname() if no_hostname[0] == '': no_hostname = ', local hostname ok' - hookenv.log('Local hostname lookup OK: {}'.format(str(no_hostname)), 'INFO') + hookenv.log('Local hostname lookup OK: {}'.format( + str(no_hostname)), 'INFO') else: no_hostname = ', local hostname failed' - hookenv.log('Local hostname lookup FAILED: {}'.format(str(no_hostname)), 'ERROR') + hookenv.log('Local hostname lookup FAILED: {}'.format( + str(no_hostname)), 'ERROR') no_ping = check_ping(nodes) no_dns = check_dns(nodes) @@ -157,9 +197,11 @@ def check_nodes(nodes, iperf_client=False): .format(dns_status, str(no_rev), str(no_fwd)) if cfg_check_local_hostname: - check_status = '{}{}{}{}'.format(no_ping, str(no_hostname), str(dns_status), str(iperf_status)) + check_status = '{}{}{}{}'.format(no_ping, str( + no_hostname), str(dns_status), str(iperf_status)) else: - check_status = '{}{}{}'.format(no_ping, str(dns_status), str(iperf_status)) + check_status = '{}{}{}'.format( + no_ping, str(dns_status), str(iperf_status)) if 'failed' in check_status: workload = 'blocked' diff --git a/metadata.yaml b/metadata.yaml index 7568b25..8bf6b16 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -2,14 +2,15 @@ name: magpie summary: Magpie layer to test networking - ICMP and DNS maintainer: Andrew McLeod description: | - Magpie will check ICMP and DNS between itself and any - peer units deployed - deploy more than one magpie unit - for meaningful results. + Magpie will check ICMP, DNS, MTU and rx/tx speed between itself and any + peer units deployed - deploy more than one magpie unit for meaningful results. tags: ["testing", "CI"] peers: magpie: interface: magpie series: + - artful + - zesty - xenial - trusty - precise diff --git a/reactive/magpie.py b/reactive/magpie.py index 17e8829..eb5e3b7 100644 --- a/reactive/magpie.py +++ b/reactive/magpie.py @@ -1,10 +1,8 @@ # pylint: disable=unused-argument from charms.reactive import when, when_not, set_state, remove_state -from charms.reactive.bus import get_state from charmhelpers.core import hookenv -from charms.layer.magpie_tools import check_nodes, safe_status, Iperf, install_iperf -from charmhelpers.core.unitdata import Storage -import threading +from charms.layer.magpie_tools import check_nodes, safe_status, Iperf + def _set_states(check_result): if 'fail' in check_result['icmp']: @@ -16,15 +14,19 @@ def _set_states(check_result): else: remove_state('magpie-dns.failed') + @when_not('iperf.installed') def install_iperf_pkg(): - install_iperf() + iperf = Iperf() + iperf.install_iperf() set_state('iperf.installed') + @when_not('magpie.joined') def no_peers(): safe_status('waiting', 'Waiting for peers...') + @when('magpie.joined') @when_not('leadership.is_leader', 'iperf.checked') def check_check_state(magpie): @@ -35,6 +37,7 @@ def check_check_state(magpie): (hookenv.local_unit() in magpie.get_iperf_checked()): set_state('iperf.checked') + @when('magpie.joined', 'leadership.is_leader') @when_not('iperf.servers.ready') def leader_wait_servers_ready(magpie): @@ -48,18 +51,19 @@ def leader_wait_servers_ready(magpie): else: remove_state('iperf.servers.ready') + @when('magpie.joined') @when_not('leadership.is_leader', 'iperf.listening') def listen_for_checks(magpie): ''' If im not the leader, and im not listening, then listen ''' - nodes = magpie.get_nodes() iperf = Iperf() iperf.listen() magpie.set_iperf_server_ready() set_state('iperf.listening') + @when('iperf.servers.ready', 'magpie.joined', 'leadership.is_leader') def client_check_hosts(magpie): ''' @@ -69,6 +73,7 @@ def client_check_hosts(magpie): _set_states(check_nodes(nodes, iperf_client=True)) magpie.set_iperf_checked() + @when('magpie.joined', 'iperf.checked') @when_not('leadership.is_leader') def check_all_node(magpie): @@ -77,4 +82,3 @@ def check_all_node(magpie): ''' nodes = magpie.get_nodes() _set_states(check_nodes(nodes)) -