Initial framework
This commit is contained in:
parent
04b37aa1b7
commit
60cd36e5de
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,3 +1,7 @@
|
|||||||
|
*~
|
||||||
|
*#
|
||||||
|
.stestr/
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
@ -13,9 +13,244 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import configparser
|
||||||
|
import collections
|
||||||
|
import logging
|
||||||
|
import io
|
||||||
import sys
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from enum import Enum
|
||||||
|
from prettytable import PrettyTable
|
||||||
|
|
||||||
|
config = None
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Fileserver
|
||||||
|
#
|
||||||
|
|
||||||
|
class FileServerStatus(Enum):
|
||||||
|
NORMAL = 0
|
||||||
|
TEMPORARY_DISABLED = 1
|
||||||
|
DISABLED = 2
|
||||||
|
UNKNOWN = 3
|
||||||
|
NO_CONNECTION = 4
|
||||||
|
|
||||||
|
Partition = collections.namedtuple(
|
||||||
|
'Partition', 'partition, used, free, total, percent_used')
|
||||||
|
|
||||||
|
Volume = collections.namedtuple(
|
||||||
|
'Voume', 'volume, id, perms, used, quota, percent_used')
|
||||||
|
|
||||||
|
class FileServerStats:
|
||||||
|
'''AFS fileserver status
|
||||||
|
|
||||||
|
Note most attributes are only set if ``status`` is NORMAL
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
status (FileServerStatus): enum of possible status
|
||||||
|
timestamp(:obj:`datetime.datetime`): time statistics retrieved
|
||||||
|
restart (:obj:`datetime.datetime`): time of last restart
|
||||||
|
uptime (:obj:`datetime.timedelta`): current uptime
|
||||||
|
partitions (:obj:`list`): list of :obj:`Partition` tuples for each
|
||||||
|
partition on the server
|
||||||
|
calls_waiting (:obj:`int`): number of calls waiting for a thread
|
||||||
|
idle_threads (:obj:`int`): number of currently idle threads
|
||||||
|
volumes (:obj:`list`): list of :obj:`Volume` tuples for each
|
||||||
|
volume present on the server
|
||||||
|
table (:obj:`PrettyTable`): a printable PrettyTable representation
|
||||||
|
'''
|
||||||
|
|
||||||
|
def _get_volumes(self):
|
||||||
|
cmd = ["vos", "listvol", "-long", "-server", self.hostname]
|
||||||
|
logging.debug("Running: %s" % cmd)
|
||||||
|
output = subprocess.check_output(
|
||||||
|
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||||
|
|
||||||
|
# Read the output into chunks where each chunk is the info for
|
||||||
|
# one volume.
|
||||||
|
chunks = []
|
||||||
|
lines = io.StringIO(output)
|
||||||
|
while True:
|
||||||
|
line = lines.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
chunk = ''
|
||||||
|
if "On-line" in line: # chunks start with this
|
||||||
|
chunk += line
|
||||||
|
# read in the next 9 lines of status
|
||||||
|
for i in range(8):
|
||||||
|
chunk += lines.readline()
|
||||||
|
# convert it to a Volume()
|
||||||
|
# todo: there's a bunch more we could extract...
|
||||||
|
m = re.search(
|
||||||
|
'^(?P<volume>[^\s]+)\s+(?P<id>\d+)\s(?P<perms>R[OW])\s+(?P<used>\d+) K',
|
||||||
|
chunk)
|
||||||
|
q = re.search('MaxQuota\s+(?P<quota>\d+) K', chunk)
|
||||||
|
used = int(m['used'])
|
||||||
|
quota = int(q['quota'])
|
||||||
|
percent_used = round(float(used) / float(quota) * 100, 2)
|
||||||
|
self.volumes.append(Volume(
|
||||||
|
m['volume'], m['id'], m['perms'], used, quota, percent_used))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_calls_waiting(self):
|
||||||
|
cmd = ["rxdebug", self.hostname, "7000", "-rxstats", "-noconns"]
|
||||||
|
logging.debug("Running: %s" % cmd)
|
||||||
|
output = subprocess.check_output(
|
||||||
|
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||||
|
|
||||||
|
for line in output.split('\n'):
|
||||||
|
m = re.search('(?P<waiting>\d+) calls waiting for a thread', line)
|
||||||
|
if m:
|
||||||
|
self.calls_waiting = int(m['waiting'])
|
||||||
|
m = re.search('(?P<idle>\d+) threads are idle', line)
|
||||||
|
if m:
|
||||||
|
self.idle_threads = int(m['idle'])
|
||||||
|
|
||||||
|
def _get_partition_stats(self):
|
||||||
|
cmd = ["vos", "partinfo", self.hostname, "-noauth"]
|
||||||
|
logging.debug("Running: %s" % cmd)
|
||||||
|
output = subprocess.check_output(
|
||||||
|
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||||
|
|
||||||
|
for line in output.split('\n'):
|
||||||
|
m = re.search(
|
||||||
|
'Free space on partition '
|
||||||
|
'/vicep(?P<partition>[a-z][a-z]?): '
|
||||||
|
'(?P<free>\d+) K blocks out of total (?P<total>\d+)', line)
|
||||||
|
if m:
|
||||||
|
part = 'vicep%s' % m['partition']
|
||||||
|
# (used, free, total, %age)
|
||||||
|
used = int(m['total']) - int(m['free'])
|
||||||
|
self.partitions.append(
|
||||||
|
Partition(part, used, int(m['free']), int(m['total']),
|
||||||
|
round(float(used) / float(m['total']) * 100, 2)))
|
||||||
|
|
||||||
|
def _get_fs_stats(self):
|
||||||
|
cmd = ["bos", "status", self.hostname, "-long", "-noauth"]
|
||||||
|
logging.debug("Running: %s" % cmd)
|
||||||
|
try:
|
||||||
|
output = subprocess.check_output(
|
||||||
|
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
logging.debug(" ... failed!")
|
||||||
|
self.status = FileServerStatus.NO_CONNECTION
|
||||||
|
return
|
||||||
|
|
||||||
|
if re.search('currently running normally', output):
|
||||||
|
self.status = FileServerStatus.NORMAL
|
||||||
|
m = re.search(
|
||||||
|
r'last started at (?P<date>\w+ \w+ \w+ \d+:\d+:\d+ \d+)',
|
||||||
|
output)
|
||||||
|
self.restart = datetime.strptime(m['date'], '%a %b %d %H:%M:%S %Y')
|
||||||
|
self.uptime = self.timestamp - self.restart
|
||||||
|
|
||||||
|
elif re.search('temporarily disabled, currently shutdown', output):
|
||||||
|
self.status = FileServerStatus.TEMPORARILY_DISABLED
|
||||||
|
elif re.search('disabled, currently shutdown', output):
|
||||||
|
self.status = FileServerStatus.DISABLED
|
||||||
|
else:
|
||||||
|
logging.debug(output)
|
||||||
|
self.status = FileServerStatus.UNKNOWN
|
||||||
|
|
||||||
|
def get_stats(self):
|
||||||
|
'''Get the complete stats set for the fileserver'''
|
||||||
|
self.timestamp = datetime.now()
|
||||||
|
|
||||||
|
self.restart = None
|
||||||
|
self.uptime = None
|
||||||
|
self.partitions = []
|
||||||
|
self.volumes = []
|
||||||
|
self.calls_waiting = None
|
||||||
|
self.idle_threads = None
|
||||||
|
|
||||||
|
self._get_fs_stats()
|
||||||
|
if self.status == FileServerStatus.NORMAL:
|
||||||
|
self._get_partition_stats()
|
||||||
|
self._get_calls_waiting()
|
||||||
|
self._get_volumes()
|
||||||
|
|
||||||
|
self.table = PrettyTable()
|
||||||
|
self.table.field_names = ["Metric", "Value"]
|
||||||
|
self.table.align["Metric"] = "l"
|
||||||
|
self.table.align["Value"] = "l"
|
||||||
|
self.table.add_row(["Hostname", self.hostname])
|
||||||
|
self.table.add_row(["Timestamp", self.timestamp])
|
||||||
|
self.table.add_row(["Status", self.status])
|
||||||
|
self.table.add_row(["Uptime", self.uptime])
|
||||||
|
self.table.add_row(["Last Restart", self.restart])
|
||||||
|
self.table.add_row(["Calls Waiting", self.calls_waiting])
|
||||||
|
self.table.add_row(["Idle Threads", self.idle_threads])
|
||||||
|
for p in self.partitions:
|
||||||
|
n = "/%s" % p.partition
|
||||||
|
self.table.add_row(["%s used" % n, p.used])
|
||||||
|
self.table.add_row(["%s free" % n, p.free])
|
||||||
|
self.table.add_row(["%s total" % n, p.total])
|
||||||
|
self.table.add_row(["%s %%used" % n,
|
||||||
|
"%s%%" % p.percent_used])
|
||||||
|
for v in self.volumes:
|
||||||
|
if v.perms == 'RW':
|
||||||
|
n = v.volume
|
||||||
|
self.table.add_row(["%s used" % n, v.used])
|
||||||
|
self.table.add_row(["%s quota" % n, v.quota])
|
||||||
|
self.table.add_row(["%s %%used" % n,
|
||||||
|
"%s%%" % v.percent_used])
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.table)
|
||||||
|
|
||||||
|
def __init__(self, hostname):
|
||||||
|
self.hostname = hostname
|
||||||
|
|
||||||
|
#
|
||||||
|
# Volume
|
||||||
|
#
|
||||||
|
|
||||||
|
def get_fs_addresses(cell):
|
||||||
|
'''Get the fileservers associated with a cell'''
|
||||||
|
fs = []
|
||||||
|
cmd = ["vos", "listaddrs", "-noauth", "-cell", cell]
|
||||||
|
logging.debug("Running: %s" % cmd)
|
||||||
|
try:
|
||||||
|
output = subprocess.check_output(
|
||||||
|
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
logging.debug(" ... failed!")
|
||||||
|
return []
|
||||||
|
|
||||||
|
for line in output.split('\n'):
|
||||||
|
if line.strip():
|
||||||
|
fs.append(line)
|
||||||
|
|
||||||
|
return fs
|
||||||
|
|
||||||
|
def get_volumes(cell):
|
||||||
|
'''Get the volumes in a cell'''
|
||||||
|
volumes = []
|
||||||
|
cmd = ["vos", "listvldb", "-quiet", "-noauth",
|
||||||
|
"-noresolve", "-nosort", "-cell", cell]
|
||||||
|
logging.debug("Running: %s" % cmd)
|
||||||
|
try:
|
||||||
|
output = subprocess.check_output(
|
||||||
|
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
logging.debug(" ... failed!")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# details about the volumes are inset, so just look for non-blank lines
|
||||||
|
for line in output.split('\n'):
|
||||||
|
if line and not line.startswith(' '):
|
||||||
|
volumes.append(line.strip())
|
||||||
|
|
||||||
|
return volumes
|
||||||
|
|
||||||
|
|
||||||
def main(args=None):
|
def main(args=None):
|
||||||
|
global config
|
||||||
|
|
||||||
if args is None:
|
if args is None:
|
||||||
args = sys.argv[1:]
|
args = sys.argv[1:]
|
||||||
@ -23,6 +258,31 @@ def main(args=None):
|
|||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='An AFS monitoring tool')
|
description='An AFS monitoring tool')
|
||||||
|
|
||||||
opts = parser.parse_args(args)
|
parser.add_argument("config", help="Path to config file")
|
||||||
|
parser.add_argument("-d", '--debug', action="store_true")
|
||||||
|
|
||||||
|
args = parser.parse_args(args)
|
||||||
|
|
||||||
|
if args.debug:
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
logging.debug("Debugging enabled")
|
||||||
|
|
||||||
|
config = configparser.RawConfigParser()
|
||||||
|
config.read(args.config)
|
||||||
|
|
||||||
|
cell = config.get('main', 'cell').strip()
|
||||||
|
|
||||||
|
# volumes = get_volumes(cell)
|
||||||
|
# logging.debug(volumes)
|
||||||
|
|
||||||
|
fileservers = get_fs_addresses(cell)
|
||||||
|
print(fileservers)
|
||||||
|
|
||||||
|
for fileserver in fileservers:
|
||||||
|
logging.debug("Finding stats for: %s" % fileserver)
|
||||||
|
|
||||||
|
fs = FileServerStats(fileserver)
|
||||||
|
fs.get_stats()
|
||||||
|
print(fs)
|
||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
@ -4,3 +4,4 @@
|
|||||||
|
|
||||||
pbr!=2.1.0,>=2.0.0 # Apache-2.0
|
pbr!=2.1.0,>=2.0.0 # Apache-2.0
|
||||||
Babel!=2.4.0,>=2.3.4 # BSD
|
Babel!=2.4.0,>=2.3.4 # BSD
|
||||||
|
PrettyTable<0.8 # BSD
|
||||||
|
4
sample.cfg
Normal file
4
sample.cfg
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[main]
|
||||||
|
cell = openstack.org
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user