Added tool to capture L3 agent status

Change-Id: Ica6be14059dfd38662993265164817661d4a68d2
This commit is contained in:
Mohammed Naser 2020-09-06 22:00:34 -04:00
parent c697f1da8c
commit 14e08c59fe
5 changed files with 84 additions and 0 deletions

2
bindep.txt Normal file
View File

@ -0,0 +1,2 @@
gcc [compile]
libc-dev [compile]

View File

@ -0,0 +1,80 @@
# Copyright (c) 2020 CLOUD&HEAT GmbH https://www.cloudandheat.com
# Copyright 2020 VEXXHOST, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate data for all routers and their L3 HA states.
There are scenarios where an L3 HA router can end up being active in many
different L3 agents. This can be tricky to find and cause chaos in the system,
while effort should be done in finding the root cause of this, this will help
alert and catch any occurances of it.
"""
import argparse
import glob
import time
import os
import psutil
def main():
"""Entry-point for script."""
parser = argparse.ArgumentParser()
parser.add_argument("--metric", default="node_openstack_l3_router_master",
help="Name of metric")
parser.add_argument("--state", default="/var/lib/neutron",
help="Neutron state path")
parser.add_argument("--loop", type=int, help="Loop every N seconds")
parser.add_argument("--output", help="Output file (default to STDOUT)")
args = parser.parse_args()
while True:
ha_conf_dir = os.path.join(args.state, 'ha_confs')
pid_glob = os.path.join(ha_conf_dir, '*.pid.keepalived-vrrp')
pid_files = glob.glob(pid_glob)
output = ""
for pid_file in pid_files:
with open(pid_file) as pid_fd:
pid = int(pid_fd.read())
# Check if the process is _actually_ running
if psutil.pid_exists(pid) is False:
continue
state_path = pid_file.replace('.pid.keepalived-vrrp', '')
state_file = os.path.join(state_path, 'state')
router_id = os.path.basename(state_path)
with open(state_file) as state_fd:
master = 1 if 'master' in state_fd.read() else 0
output += '%s{router_id="%s"} %d\n' % (
args.metric,
router_id,
master
)
if args.output:
with open(args.output, 'w') as output_fd:
output_fd.write(output)
print(output)
if args.loop:
time.sleep(args.loop)
else:
break

View File

@ -1 +1,2 @@
ovs ovs
psutil

View File

@ -25,4 +25,5 @@ packages =
[entry_points] [entry_points]
console_scripts = console_scripts =
openstack-prometheus-routers-l3-ha = openstack_tools.cmd.prometheus.routers_l3_ha:main
openstack-cleanup-openvswitch = openstack_tools.cmd.cleanup.openvswitch:main openstack-cleanup-openvswitch = openstack_tools.cmd.cleanup.openvswitch:main