charm-magpie/lib/charms/layer/magpie_tools.py

361 lines
13 KiB
Python

#!/usr/bin/env python
import os
import subprocess
import re
from charmhelpers.core import hookenv
from charmhelpers.core.host import get_nic_mtu
from charmhelpers.fetch import apt_install
class Iperf():
"""
Install and start a server automatically
"""
def __init__(self):
self.iperf_out = '/home/ubuntu/iperf_output.' + hookenv.application_name() + '.txt'
def install_iperf(self):
apt_install("iperf")
def listen(self):
ip = hookenv.network_get('magpie')['bind-addresses'][0]['addresses'][0]['address']
cmd = "iperf -s -m -fm -B " + ip + " | tee " + self.iperf_out + " &"
os.system(cmd)
def mtu(self):
with open(self.iperf_out) as f:
for line in f.readlines():
if "MTU" in line:
match = line
try:
return match.split('MTU', 4)[1].split(' ')[1]
except UnboundLocalError:
return "no iperf test results: failed"
def speed(self):
with open(self.iperf_out) as f:
for line in f.readlines():
if "bits" in line:
match = line
try:
return match.rsplit(' ', 2)[1]
except UnboundLocalError:
return "no iperf test results: failed"
def selfcheck(self):
subprocess.check_output(["iperf", "-c", "localhost", "-t", "1"])
def hostcheck(self, nodes):
# Wait for other nodes to start their servers...
for node in nodes:
msg = "checking iperf on {}".format(node[1])
hookenv.log(msg)
cmd = "iperf -t1 -c {}".format(node[1])
os.system(cmd)
def safe_status(workload, status):
cfg = hookenv.config()
if not cfg.get('supress_status'):
hookenv.status_set(workload, status)
def ping(input, ping_time, ping_tries):
ping_string = "ping -c {} -w {} {} > /dev/null 2>&1"\
.format(ping_tries, ping_time, input)
hookenv.log('Ping command: {}'.format(ping_string), 'DEBUG')
response = os.system(ping_string)
if response == 0:
return 0
else:
return 1
def check_local_hostname():
local_hostname = subprocess.check_output('hostname', shell=True)\
.decode('utf-8').rstrip()
lookup_cmd = "getent hosts {}".format(local_hostname)
hookenv.log('Looking up local hostname: {}'.format(local_hostname))
try:
result = subprocess.check_output(lookup_cmd, shell=True)\
.decode('utf-8').rstrip()
result = ''
stderr = 0
except subprocess.CalledProcessError as exc:
result = local_hostname
stderr = exc.returncode
return result, stderr
def check_local_mtu(required_mtu, iface_mtu):
if required_mtu == 0:
return 0
elif 0 <= (int(iface_mtu) - int(required_mtu)) <= 12:
return 100
else:
return 200
def check_min_speed(min_speed, iperf_speed):
if min_speed == 0:
return 0
elif min_speed <= iperf_speed:
return 100
elif min_speed > iperf_speed:
return 200
def check_nodes(nodes, iperf_client=False):
cfg = hookenv.config()
local_ip = hookenv.unit_private_ip()
iface_lines = subprocess.check_output(["ip", "route", "show", "to", "match", local_ip]).decode()
iface_lines = iface_lines.split('\n')
for line in iface_lines:
if re.match('.* via .*', line) is None:
break
primary_iface = str(line).split('dev')[1].split(' ')[1]
iface_mtu = get_nic_mtu(primary_iface)
required_mtu = cfg.get('required_mtu')
min_speed = cfg.get('min_speed')
msg = "MTU for iface: {} is {}".format(primary_iface, iface_mtu)
hookenv.log(msg, 'INFO')
#if required_mtu != 0 and not 0 <= (int(iface_mtu) - int(required_mtu)) <= 12:
# iperf_status = ", local mtu check failed, required_mtu: {}, iface mtu: {}".format(required_mtu, iface_mtu)
#elif required_mtu == 0 or 0 <= (int(iface_mtu) - int(required_mtu)) <= 12:
if not iperf_client:
iperf = Iperf()
mtu = iperf.mtu()
speed = iperf.speed()
# Make space for 8 or 12 byte variable overhead (TCP options)
if "failed" not in mtu:
if 0 <= (int(iface_mtu) - int(mtu)) <= 12:
iperf_status = ", net mtu ok: {}".format(iface_mtu)
else:
iperf_status = ", net mtu failed, mismatch: {} packet vs {} on iface {}".format(
mtu, iface_mtu, primary_iface)
else:
iperf_status = ", network mtu check failed"
if "failed" not in speed:
if check_min_speed(min_speed, int(float(speed))) == 0:
iperf_status = iperf_status + ", {} mbit/s".format(speed)
if check_min_speed(min_speed, int(float(speed))) == 100:
iperf_status = iperf_status + ", speed ok: {} mbit/s".format(speed)
if check_min_speed(min_speed, int(float(speed))) == 200:
iperf_status = iperf_status + ", speed failed: {} < {} mbit/s".format(speed, str(min_speed))
else:
iperf_status = iperf_status + ", iperf speed check failed"
elif iperf_client:
iperf_status = ", iperf leader, mtu: {}".format(iface_mtu)
iperf = Iperf()
iperf.hostcheck(nodes)
if check_local_mtu(required_mtu, iface_mtu) == 100:
iperf_status = iperf_status + ", local mtu ok, required: {}".format(required_mtu)
elif check_local_mtu(required_mtu, iface_mtu) == 200:
iperf_status = iperf_status + ", local mtu failed, required: {}, iface: {}".format(required_mtu, iface_mtu)
hookenv.log('doing other things after iperf', 'INFO')
cfg_check_local_hostname = cfg.get('check_local_hostname')
if cfg_check_local_hostname:
no_hostname = check_local_hostname()
if no_hostname[0] == '':
no_hostname = ', local hostname ok'
hookenv.log('Local hostname lookup OK: {}'.format(
str(no_hostname)), 'INFO')
else:
no_hostname = ', local hostname failed'
hookenv.log('Local hostname lookup FAILED: {}'.format(
str(no_hostname)), 'ERROR')
no_ping = check_ping(nodes)
no_dns = check_dns(nodes)
hookenv.log("Units with DNS problems: " + str(no_dns))
try:
dns_status
except NameError:
dns_status = ''
if not no_ping:
no_ping = 'icmp ok'
else:
no_ping = 'icmp failed: ' + str(no_ping)
if no_dns == ([], [], []):
dns_status = ', dns ok'
else:
no_rev = no_dns[0]
no_fwd = no_dns[1]
no_match = no_dns[2]
if no_match != []:
dns_status = ', match dns failed: ' + str(no_match)
else:
if no_rev:
no_rev = ', rev dns failed: ' + str(no_rev)
if no_fwd:
no_fwd = ', fwd dns failed: ' + str(no_fwd)
if no_rev == []:
no_rev = ''
if no_fwd == []:
no_fwd = ''
dns_status = '{}{}{}'\
.format(dns_status, str(no_rev), str(no_fwd))
if cfg_check_local_hostname:
check_status = '{}{}{}{}'.format(no_ping, str(
no_hostname), str(dns_status), str(iperf_status))
else:
check_status = '{}{}{}'.format(
no_ping, str(dns_status), str(iperf_status))
if 'failed' in check_status:
workload = 'blocked'
else:
workload = 'active'
safe_status(workload, check_status)
reactive_state = {'icmp': no_ping, 'dns': dns_status}
return reactive_state
def check_ping(nodes):
cfg = hookenv.config()
ping_time = cfg.get('ping_timeout')
ping_tries = cfg.get('ping_tries')
try:
unreachable
except NameError:
unreachable = []
for node in nodes:
unit_id = node[0].split('/')[1]
hookenv.log('Pinging unit_id: ' + str(unit_id), 'INFO')
if ping(node[1], ping_time, ping_tries) == 1:
hookenv.log('Ping FAILED for unit_id: ' + str(unit_id), 'ERROR')
if unit_id not in unreachable:
unreachable.append(unit_id)
else:
hookenv.log('Ping OK for unit_id: ' + str(unit_id), 'INFO')
if unit_id in unreachable:
unreachable.remove(unit_id)
return unreachable
def check_dns(nodes):
cfg = hookenv.config()
dns_server = cfg.get('dns_server')
dns_tries = cfg.get('dns_tries')
dns_time = cfg.get('dns_time')
try:
norev
except NameError:
norev = []
try:
nofwd
except NameError:
nofwd = []
try:
nomatch
except NameError:
nomatch = []
hookenv.log("DNS (ALL NODES): {}".format(nodes))
for node in nodes:
ip = node[1]
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
hookenv.log("private-address appears to be a hostname: {},"
" attempting forward lookup...", 'WARN')
ip = forward_dns(ip, dns_server, dns_tries, dns_time)[0]
else:
hookenv.log('private-address appears to be an IP', 'INFO')
unit_id = node[0].split('/')[1]
hookenv.log("Reverse lookup for ip: {}, node: {},"
" unit_id: {}".format(ip, node[0], unit_id), 'INFO')
reverse, r_stderr = reverse_dns(ip, dns_server, dns_tries, dns_time)
hookenv.log("Reverse result for unit_id: {}, hostname: {},"
" exitcode: {}".format(unit_id, str(reverse),
str(r_stderr)))
if r_stderr:
hookenv.log("Reverse FAILED for"
" unit_id: {}".format(unit_id), 'ERROR')
if unit_id not in norev:
norev.append(unit_id)
continue
else:
hookenv.log("Reverse OK for unit_id: {}".format(unit_id), 'INFO')
if unit_id in norev:
norev.remove(unit_id)
hookenv.log("Forward lookup for hostname: {}, node: {},"
" unit_id: {}".format(str(reverse), node[0], unit_id),
'INFO')
for rev in reverse.split():
forward, f_stderr = forward_dns(rev, dns_server,
dns_tries, dns_time)
hookenv.log("Forward result for unit_id: {}, ip: {},"
" exitcode: {}".format(unit_id, forward,
str(f_stderr)))
if f_stderr:
hookenv.log("Forward FAILED for"
" unit_id: {}".format(unit_id), 'ERROR')
if unit_id not in nofwd:
nofwd.append(unit_id)
else:
hookenv.log("Forward OK for"
" unit_id: {}".format(unit_id), 'INFO')
if unit_id in nofwd:
nofwd.remove(unit_id)
if ip != forward:
if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",
forward):
forward = "Can not resolve hostname to IP {}"\
.format(repr(forward))
hookenv.log("Original IP and Forward MATCH FAILED for"
" unit_id: {}, Original: {}, Forward: {}"
.format(unit_id, ip, forward), 'ERROR')
if unit_id not in nomatch:
nomatch.append(unit_id)
else:
hookenv.log("Original IP and Forward MATCH OK for unit_id:"
" {}, Original: {}, Forward: {}"
.format(unit_id, ip, forward),
'INFO')
if unit_id in nomatch:
nomatch.remove(unit_id)
break
return norev, nofwd, nomatch
def reverse_dns(input, dns_server, tries, timeout):
cmd = '/usr/bin/dig -x ' + input + ' +short +tries={} +time={}'\
.format(tries, timeout)
if dns_server:
cmd = '{} @{}'.format(cmd, dns_server)
hookenv.log('DNS Reverse command: {}'.format(cmd), 'DEBUG')
try:
result = subprocess.check_output(cmd, shell=True)\
.decode('utf-8').rstrip()
stderr = 0
except subprocess.CalledProcessError as exc:
result = "Reverse DNS lookup error: " + str(exc.output)
stderr = exc.returncode
if result == '':
result = 'No reverse response'
stderr = 1
return result, stderr
def forward_dns(input, dns_server, tries, timeout):
cmd = '/usr/bin/dig ' + input + ' +short +tries={} +time={}'\
.format(tries, timeout)
if dns_server:
cmd = '{} @{}'.format(cmd, dns_server)
hookenv.log('DNS Forward command: {}'.format(cmd), 'DEBUG')
try:
result = subprocess.check_output(cmd, shell=True)\
.decode('utf-8').rstrip()
stderr = 0
except subprocess.CalledProcessError as exc:
result = "Forward DNS lookup error: " + str(exc.output)
stderr = exc.returncode
if result == '':
result = 'No forward response'
stderr = 1
return result, stderr