Fix diagnostic tool for Fuel 9

The "fuel nodes" command output on Fuel 9 differs from Fuel 8 for the online
column.

This patch also sets the timeout to 5 minutes for MCO remote execution
used by diagnostic script.

And the patch removes the scripts used for 0.8.0 -> 0.8.1 migration.

Change-Id: Ib9537f8cd68a42e08f11721d200bb84231481469
This commit is contained in:
Swann Croiset 2016-06-10 14:05:19 +02:00
parent 24c6782d0a
commit 359a6fd209
5 changed files with 11 additions and 88 deletions

View File

@ -1,11 +0,0 @@
# Description
Scripts and tools for running diagnostics or actions to manage an environment
deployed with StackLight.
remove_heka_package.sh
: This can be used to remove Heka package on all nodes that are up and ready.
stop_services.sh
: This is used to stop hekad and collectd on all nodes that are up and ready.

View File

@ -21,4 +21,8 @@ function check_fuel_nodes_file {
fi
}
# Get IPs list of online nodes from 'fuel command' output.
function get_ready_nodes {
# "fuel nodes" command output differs form Fuel 8 and 9 for online nodes: True/False and 0/1
fuel nodes | grep ready | awk -F '|' -vOFS=':' '{print $5,$9 }'|tr -d ' '|grep -E ':1|:True'|awk -F ':' '{print $1}'
}

10
contrib/tools/diagnostic.sh Executable file → Normal file
View File

@ -15,18 +15,20 @@
set -e
FUEL_NODES_FILE=/tmp/nodes
DIAG_DIR=/var/lma_diagnostics
. "$(dirname "$(readlink -f "$0")")"/common.sh
DIAG_DIR=/var/lma_diagnostics
node_list=$(get_ready_nodes)
rm -rf "$DIAG_DIR"
mkdir $DIAG_DIR
mco rpc --verbose --display all --agent execute_shell_command --action execute --argument cmd="/usr/local/bin/lma_diagnostics" > "$DIAG_DIR/outputs.log" 2>&1
echo "Running lma_diagnostic tool on all nodes which are ready and online (this can take several minutes)"
mco rpc --timeout 300 --verbose --display all --agent execute_shell_command --action execute --argument cmd="/usr/local/bin/lma_diagnostics" > "$DIAG_DIR/outputs.log" 2>&1
node_list=$(grep True $FUEL_NODES_FILE | grep ready | awk -F '|' '{print $5}')
for n in $node_list; do
echo "Downloading diagnostic for node $n"
rsync -arz "$n:$DIAG_DIR" "$DIAG_DIR/$n/" || echo "Fail to retrieve diagnostic from $n"
done

View File

@ -1,28 +0,0 @@
#!/bin/bash
# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FUEL_NODES_FILE=/tmp/nodes
. "$(dirname "$(readlink -f "$0")")"/common.sh
check_fuel_nodes_file "${FUEL_NODES_FILE}"
# Remove Heka due to the issue with heka package versionning
# https://github.com/mozilla-services/heka/issues/1892
echo "** Remove Heka package"
for n in $(grep True $FUEL_NODES_FILE | grep ready |awk -F '|' '{print $5}'); do
echo "$n";
ssh "$n" 'apt-get remove -y heka'
done

View File

@ -1,44 +0,0 @@
#!/bin/bash
# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FUEL_NODES_FILE=/tmp/nodes
. "$(dirname "$(readlink -f "$0")")"/common.sh
check_fuel_nodes_file "${FUEL_NODES_FILE}"
# collectd processes could be wedged, stop or kill them
# see https://bugs.launchpad.net/lma-toolchain/+bug/1560946
echo "** Stopping collectd"
for n in $(grep True $FUEL_NODES_FILE | grep ready | awk -F '|' '{print $5}'); do
echo "$n";
ssh "$n" '/etc/init.d/collectd stop; pkill -9 collectd'
done
# Several hekad processes may run on these nodes, stop or kill them
# see https://bugs.launchpad.net/lma-toolchain/+bug/1561109
echo "** Stopping hekad"
for n in $(grep -v controller $FUEL_NODES_FILE | grep True | grep ready | awk -F '|' '{print $5}'); do
echo "$n";
ssh "$n" 'service lma_collector stop; pkill -TERM hekad; sleep 5; pkill -9 hekad;'
done
# Stop hekad on controllers during the upgrade to avoid losing logs and notification
# (because elasticsearch will be stopped and hekad doesn't buffer data with 0.8.0)
echo "** Stopping Heka on controller(s)"
for n in $(grep controller $FUEL_NODES_FILE | grep True | grep ready | awk -F '|' '{print $5}'|tail -n 1); do
echo "$n";
ssh "$n" 'crm resource stop lma_collector'
done