Split the collect logs task

This split by type in the collect logs is to enable thirdy party who
want's to use collect logs, to choose which type of log they want to
collect. For example, some don't have interest in dstat, or only cares
about openstack logs, or is sure that there isn't container running.

Change-Id: I8e681eef35084a81c7530f51cd6fae0187c6116d
This commit is contained in:
Arx Cruz 2019-08-12 11:57:43 +02:00
parent 9dc533daf2
commit 4bef1611b5
8 changed files with 375 additions and 332 deletions

View File

@ -51,6 +51,9 @@ Collection related
- ``artcl_gzip_only``: false/true When true, gathered files are
gzipped one by one in ``artcl_collect_dir``, when false, a tar.gz
file will contain all the logs.
- ``collect_log_types`` - A list of which type of logs will be collected,
such as openstack logs, network logs, system logs, etc.
Acceptable values are system, monitoring, network, openstack and container.
Documentation generation related
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -205,6 +205,14 @@ ara_tasks_map:
"overcloud-prep-images : Prepare the overcloud images for deploy": prepare_images.seconds
"validate-simple : Validate the overcloud": overcloud.ping_test.seconds
"validate-tempest : Execute tempest": overcloud.tempest.seconds
collect_log_types:
- system
- monitoring
- network
- openstack
- container
# InfluxDB module settings
influxdb_only_successful_tasks: true
influxdb_measurement: test

View File

@ -14,347 +14,27 @@
- name: Prepare directory with extra logs
file: dest=/var/log/extra state=directory
- name: rpm -qa
shell: rpm -qa | sort -f >/var/log/extra/rpm-list.txt
- name: package list installed
shell: "{{ ansible_pkg_mgr }} list installed >/var/log/extra/package-list-installed.txt"
- name: list enabled repositories
shell: "{{ ansible_pkg_mgr }} repolist -v >/var/log/extra/repolist.txt"
- name: Collecting /proc/cpuinfo|meminfo|swaps
shell: "cat /proc/{{ item }} &> /var/log/extra/{{ item }}.txt"
with_items:
- cpuinfo
- meminfo
- swaps
- name: Collect installed cron jobs
shell: |
for user in $(cut -f1 -d':' /etc/passwd); do \
echo $user; crontab -u $user -l | grep -v '^$\|^\s*\#\|^\s*PATH'; done \
&> /var/log/extra/installed_crons.txt
# used by OSP Release Engineering to import into internal builds
- name: package import delorean
shell: |
repoquery --disablerepo='*' --enablerepo='delorean'\
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean.txt
# used by OSP Release Engineering to import into internal builds
- name: package import delorean-testing
shell: |
repoquery --disablerepo='*' --enablerepo='delorean-*-deps'\
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean-deps.txt
- name: Collect logs from all failed systemd services
shell: >
systemctl -t service --failed --no-legend | awk '{print $1}'
| xargs -r -n1 journalctl -u > /var/log/extra/failed_services.txt 2>&1
- name: Collect network status info
shell: >
echo "netstat" > /var/log/extra/network.txt;
netstat -i &> /var/log/extra/network.txt;
for ipv in 4 6; do
echo "### IPv${ipv} addresses" >> /var/log/extra/network.txt;
ip -${ipv} a &>> /var/log/extra/network.txt;
echo "### IPv${ipv} routing" >> /var/log/extra/network.txt;
ip -${ipv} r &>> /var/log/extra/network.txt;
echo "### IPTables (IPv${ipv})" &>> /var/log/extra/network.txt;
test $ipv -eq 4 && iptables-save &>> /var/log/extra/network.txt;
test $ipv -eq 6 && ip6tables-save &>> /var/log/extra/network.txt;
done;
(for NS in $(ip netns list); do
for ipv in 4 6; do
echo "==== $NS (${ipv})====";
echo "### IPv${ipv} addresses";
ip netns exec $NS ip -${ipv} a;
echo "### IPv${ipv} routing";
ip netns exec $NS ip -${ipv} r;
echo "### IPTables (IPv${ipv})";
test $ipv -eq 4 && ip netns exec $NS ip iptables-save;
test $ipv -eq 6 && ip netns exec $NS ip ip6tables-save;
done
PIDS="$(ip netns pids $NS)";
[[ ! -z "$PIDS" ]] && ps --no-headers -f --pids "$PIDS";
echo "";
done) &>> /var/log/extra/network-netns;
(for NB in $(ovs-vsctl show | grep Bridge |awk '{print $2}'); do
echo "==== Bridge name - $NB ====";
ovs-ofctl show $NB;
ovs-ofctl dump-flows $NB;
echo "";
done;
ovsdb-client dump) &> /var/log/extra/network-bridges;
- name: lsof -P -n
shell: "lsof -P -n &> /var/log/extra/lsof.txt"
- name: pstree -p
shell: "pstree -p &> /var/log/extra/pstree.txt"
- name: sysctl -a
shell: "sysctl -a &> /var/log/extra/sysctl.txt"
- name: netstat -lnp
shell: "netstat -lnp &> /var/log/extra/netstat.txt"
- name: openstack-status
shell: "which openstack-status &> /dev/null && (. ~/keystonerc_admin; openstack-status &> /var/log/extra/openstack-status.txt)"
when: "'controller' in inventory_hostname"
- name: List nova servers on undercloud
shell: >
if [[ -e {{ working_dir }}/stackrc ]]; then
source {{ working_dir }}/stackrc;
nova list &> /var/log/extra/nova_list.txt;
fi
- name: Get haproxy stats
shell: >
pgrep haproxy && \
test -S /var/lib/haproxy/stats && \
echo 'show info;show stat;show table' | socat /var/lib/haproxy/stats stdio &> /var/log/extra/haproxy-stats.txt || \
echo "No HAProxy or no socket on host" > /var/log/extra/haproxy-stats.txt
- name: lsmod
shell: "lsmod &> /var/log/extra/lsmod.txt"
- name: lspci
shell: "lspci &> /var/log/extra/lspci.txt"
- name: pip list
shell: "pip list &> /var/log/extra/pip.txt"
- name: lvm debug
shell: "(vgs; pvs; lvs) &> /var/log/extra/lvm.txt"
- name: Collect services status
shell: |
systemctl list-units --full --all &> /var/log/extra/services.txt
systemctl status "*" &>> /var/log/extra/services.txt
- name: check if ODL is enabled via docker
shell: docker ps | grep opendaylight_api
register: odl_container_enabled
- name: check if ODL is enabled via podman
shell: podman ps | grep opendaylight_api
register: odl_container_enabled
when: odl_container_enabled.rc != 0
- name: check if ODL is enabled via rpm
shell: rpm -qa | grep opendaylight
register: odl_rpm_enabled
- name: Create ODL log directory
file: dest="{{ odl_extra_log_dir }}" state=directory
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
- name: Create rsync filter file
template:
src: "odl_extra_logs.j2"
dest: "/tmp/odl_extra_logs.sh"
- name: Collect OVS outputs for ODL
shell: "bash /tmp/odl_extra_logs.sh"
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
# Change the collect_log_types if you don't want to collect
# some specific logs
- import_tasks: collect/system.yml
when: "'system' in collect_log_types"
- name: Collect ODL info and logs (RPM deployment)
shell: >
cp /opt/opendaylight/data/log/* /var/log/extra/odl/;
journalctl -u opendaylight > /var/log/extra/odl/odl_journal.log
when: odl_rpm_enabled.rc == 0
- import_tasks: collect/network.yml
when: "'network' in collect_log_types"
- name: Generate human-readable SAR logs
shell: "[[ -f /usr/lib64/sa/sa2 ]] && /usr/lib64/sa/sa2 -A"
- import_tasks: collect/container.yml
when: "'container' in collect_log_types"
- name: check for dstat log file
stat: path=/var/log/extra/dstat-csv.log
register: dstat_logfile
- import_tasks: collect/monitoring.yml
when: "'monitoring' in collect_log_types"
- name: kill dstat
shell: "pkill dstat"
become: true
when: dstat_logfile.stat.exists
- name: Get dstat_graph tool
git:
repo: "https://github.com/Dabz/dstat_graph.git"
dest: "/tmp/dstat_graph"
version: master
when: dstat_logfile.stat.exists
- name: Generate HTML dstat graphs if it exists
shell: "/tmp/dstat_graph/generate_page.sh /var/log/extra/dstat-csv.log > /var/log/extra/dstat.html"
when: dstat_logfile.stat.exists
args:
chdir: "/tmp/dstat_graph"
- name: Search for AVC denied
shell: >
grep -i denied /var/log/audit/audit* &&
grep -i denied /var/log/audit/audit* > /var/log/extra/denials.txt
- name: Search for segfaults in logs
shell: >
grep -v ansible-command /var/log/messages | grep segfault &&
grep -v ansible-command /var/log/messages | grep segfault > /var/log/extra/segfaults.txt
- name: Search for oom-killer instances in logs
shell: >
grep -v ansible-command /var/log/messages | grep oom-killer &&
grep -v ansible-command /var/log/messages | grep oom-killer > /var/log/extra/oom-killers.txt
- name: Ensure sos package is installed when collect sosreport(s)
package:
name: sos
state: present
when: artcl_collect_sosreport|bool
- name: Collect sosreport
command: >
sosreport {{ artcl_sosreport_options }}
when: artcl_collect_sosreport|bool
- name: Collect delorean logs
shell: >
if [[ -e /home/{{ undercloud_user }}/DLRN/data/repos ]]; then
rm -rf /tmp/delorean_logs && mkdir /tmp/delorean_logs;
find /home/{{ undercloud_user }}/DLRN/data/repos/ -name '*.log' -exec cp --parents \{\} /tmp/delorean_logs/ \; ;
find /tmp/delorean_logs -name '*.log' -exec gzip \{\} \; ;
find /tmp/delorean_logs -name '*.log.gz' -exec sh -c 'x="{}"; mv "$x" "${x%.log.gz}.log.txt.gz"' \; ;
rm -rf {{ artcl_collect_dir }}/delorean_logs && mkdir {{ artcl_collect_dir }}/delorean_logs;
mv /tmp/delorean_logs/home/{{ undercloud_user }}/DLRN/data/repos/* {{ artcl_collect_dir }}/delorean_logs/;
fi
- name: Collect container info and logs
shell: >
for engine in docker podman; do
if [ $engine = 'docker' ]; then
(command -v docker && systemctl is-active docker) || continue
# container_cp CONTAINER SRC DEST
container_cp() {
docker cp ${1}:${2} $3
}
# NOTE(ykarel) podman inspect returns 'source' while docker inspect returns 'Source'
source_mount=Source
fi
if [ $engine = 'podman' ]; then
command -v podman || continue
# NOTE(cjeanner): podman has no "cp" subcommand, we hence have to mount the container, copy,
# umount it. More info: https://www.mankier.com/1/podman-cp
# See also: https://github.com/containers/libpod/issues/613
container_cp() {
mnt=$(podman mount $1)
cp -rT ${mnt}${2} $3
podman umount $1
}
# NOTE(ykarel) podman inspect returns 'source' while docker inspect returns 'Source'
source_mount=source
fi
BASE_CONTAINER_EXTRA=/var/log/extra/${engine};
mkdir -p $BASE_CONTAINER_EXTRA;
ALL_FILE=$BASE_CONTAINER_EXTRA/${engine}_allinfo.log;
CONTAINER_INFO_CMDS=(
"${engine} ps --all --size"
"${engine} images"
"${engine} stats --all --no-stream"
"${engine} version"
"${engine} info"
);
if [ $engine = 'docker' ]; then
CONTAINER_INFO_CMDS+=("${engine} volume ls")
fi
for cmd in "${CONTAINER_INFO_CMDS[@]}"; do
echo "+ $cmd" >> $ALL_FILE;
$cmd >> $ALL_FILE;
echo "" >> $ALL_FILE;
echo "" >> $ALL_FILE;
done;
# Get only failed containers, in a dedicated file
${engine} ps -a | grep -vE ' (IMAGE|Exited \(0\)|Up) ' &>> /var/log/extra/failed_containers.log;
for cont in $(${engine} ps | awk {'print $NF'} | grep -v NAMES); do
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
mkdir -p $INFO_DIR;
(
set -x;
if [ $engine = 'docker' ]; then
${engine} top $cont auxw;
# NOTE(cjeanner): `podman top` does not support `ps` options.
elif [ $engine = 'podman' ]; then
${engine} top $cont;
fi
${engine} exec $cont top -bwn1;
${engine} exec $cont bash -c "\$(command -v dnf || command -v yum) list installed";
${engine} inspect $cont;
) &> $INFO_DIR/${engine}_info.log;
container_cp $cont /var/lib/kolla/config_files/config.json $INFO_DIR/config.json;
# NOTE(flaper87): This should go away. Services should be
# using a `logs` volume
# NOTE(mandre) Do not copy logs if the containers is bind mounting /var/log directory
if ! ${engine} inspect $cont | jq .[0].Mounts[].$source_mount | grep -x '"/var/log[/]*"' 2>1 > /dev/null; then
container_cp $cont /var/log $INFO_DIR/log;
fi;
# Delete symlinks because they break log collection and are generally
# not useful
find $INFO_DIR -type l -delete;
done;
# NOTE(cjeanner) previous loop cannot have the "-a" flag because of the
# "exec" calls. So we just loop a second time, over ALL containers,
# in order to get all the logs we can. For instance, the previous loop
# would not allow to know why a container is "Exited (1)", preventing
# efficient debugging.
for cont in $(${engine} ps -a | awk {'print $NF'} | grep -v NAMES); do
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
mkdir -p $INFO_DIR;
${engine} logs $cont &> $INFO_DIR/stdout.log;
done;
# NOTE(flaper87) Copy contents from the logs volume. We can expect this
# volume to exist in a containerized environment.
# NOTE(cjeanner): Rather test the eXistenZ of the volume, as podman does not
# have such thing
if [ -d /var/lib/docker/volumes/logs/_data ]; then
cp -r /var/lib/docker/volumes/logs/_data $BASE_CONTAINER_EXTRA/logs;
fi
done
- name: Collect config-data
shell: cp -r /var/lib/config-data/puppet-generated /var/log/config-data
- name: Collect text version of the journal from last four hours
shell: journalctl --since=-4h --lines=100000 > /var/log/journal.txt
- name: Collect errors and rename if more than 10 MB
shell: >
grep -rE '^[-0-9]+ [0-9:\.]+ [0-9 ]*ERROR ' /var/log/ |
sed "s/\(.*\)\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.[0-9]\+\)\(.*\)/\2 ERROR \1\3/g" > /tmp/errors.txt;
if (( $(stat -c "%s" /tmp/errors.txt) > 10485760 )); then
ERR_NAME=big-errors.txt;
else
ERR_NAME=errors.txt;
fi;
mv /tmp/errors.txt /var/log/extra/${ERR_NAME}.txt
- name: Create a index file for logstash
shell: >
for i in {{ artcl_logstash_files|default([])|join(" ") }}; do
cat $i; done | grep "^20.*|" | sort -sk1,2 |
sed "s/\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.*[0-9]*\)\(.*\)/\1 INFO \2/g" > /var/log/extra/logstash.txt
- import_tasks: collect/openstack.yml
when: "'openstack' in collect_log_types"
- name: Set default collect list
set_fact:

133
tasks/collect/container.yml Normal file
View File

@ -0,0 +1,133 @@
---
- become: true
ignore_errors: true
block:
- name: check if ODL is enabled via docker
shell: docker ps | grep opendaylight_api
register: odl_container_enabled
- name: check if ODL is enabled via podman
shell: podman ps | grep opendaylight_api
register: odl_container_enabled
when: odl_container_enabled.rc != 0
- name: check if ODL is enabled via rpm
shell: rpm -qa | grep opendaylight
register: odl_rpm_enabled
- name: Create ODL log directory
file: dest="{{ odl_extra_log_dir }}" state=directory
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
- name: Collect OVS outputs for ODL
shell: "bash /tmp/odl_extra_logs.sh"
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
- name: Collect ODL info and logs (RPM deployment)
shell: >
cp /opt/opendaylight/data/log/* /var/log/extra/odl/;
journalctl -u opendaylight > /var/log/extra/odl/odl_journal.log
when: odl_rpm_enabled.rc == 0
- name: Collect container info and logs
shell: >
for engine in docker podman; do
if [ $engine = 'docker' ]; then
(command -v docker && systemctl is-active docker) || continue
# container_cp CONTAINER SRC DEST
container_cp() {
docker cp ${1}:${2} $3
}
# NOTE(ykarel) podman inspect returns 'source' while docker inspect returns 'Source'
source_mount=Source
fi
if [ $engine = 'podman' ]; then
command -v podman || continue
# NOTE(cjeanner): podman has no "cp" subcommand, we hence have to mount the container, copy,
# umount it. More info: https://www.mankier.com/1/podman-cp
# See also: https://github.com/containers/libpod/issues/613
container_cp() {
mnt=$(podman mount $1)
cp -rT ${mnt}${2} $3
podman umount $1
}
# NOTE(ykarel) podman inspect returns 'source' while docker inspect returns 'Source'
source_mount=source
fi
BASE_CONTAINER_EXTRA=/var/log/extra/${engine};
mkdir -p $BASE_CONTAINER_EXTRA;
ALL_FILE=$BASE_CONTAINER_EXTRA/${engine}_allinfo.log;
CONTAINER_INFO_CMDS=(
"${engine} ps --all --size"
"${engine} images"
"${engine} stats --all --no-stream"
"${engine} version"
"${engine} info"
);
if [ $engine = 'docker' ]; then
CONTAINER_INFO_CMDS+=("${engine} volume ls")
fi
for cmd in "${CONTAINER_INFO_CMDS[@]}"; do
echo "+ $cmd" >> $ALL_FILE;
$cmd >> $ALL_FILE;
echo "" >> $ALL_FILE;
echo "" >> $ALL_FILE;
done;
# Get only failed containers, in a dedicated file
${engine} ps -a | grep -vE ' (IMAGE|Exited \(0\)|Up) ' &>> /var/log/extra/failed_containers.log;
for cont in $(${engine} ps | awk {'print $NF'} | grep -v NAMES); do
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
mkdir -p $INFO_DIR;
(
set -x;
if [ $engine = 'docker' ]; then
${engine} top $cont auxw;
# NOTE(cjeanner): `podman top` does not support `ps` options.
elif [ $engine = 'podman' ]; then
${engine} top $cont;
fi
${engine} exec $cont top -bwn1;
${engine} exec $cont bash -c "\$(command -v dnf || command -v yum) list installed";
${engine} inspect $cont;
) &> $INFO_DIR/${engine}_info.log;
container_cp $cont /var/lib/kolla/config_files/config.json $INFO_DIR/config.json;
# NOTE(flaper87): This should go away. Services should be
# using a `logs` volume
# NOTE(mandre) Do not copy logs if the containers is bind mounting /var/log directory
if ! ${engine} inspect $cont | jq .[0].Mounts[].$source_mount | grep -x '"/var/log[/]*"' 2>1 > /dev/null; then
container_cp $cont /var/log $INFO_DIR/log;
fi;
# Delete symlinks because they break log collection and are generally
# not useful
find $INFO_DIR -type l -delete;
done;
# NOTE(cjeanner) previous loop cannot have the "-a" flag because of the
# "exec" calls. So we just loop a second time, over ALL containers,
# in order to get all the logs we can. For instance, the previous loop
# would not allow to know why a container is "Exited (1)", preventing
# efficient debugging.
for cont in $(${engine} ps -a | awk {'print $NF'} | grep -v NAMES); do
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
mkdir -p $INFO_DIR;
${engine} logs $cont &> $INFO_DIR/stdout.log;
done;
# NOTE(flaper87) Copy contents from the logs volume. We can expect this
# volume to exist in a containerized environment.
# NOTE(cjeanner): Rather test the eXistenZ of the volume, as podman does not
# have such thing
if [ -d /var/lib/docker/volumes/logs/_data ]; then
cp -r /var/lib/docker/volumes/logs/_data $BASE_CONTAINER_EXTRA/logs;
fi
done

View File

@ -0,0 +1,39 @@
---
- become: true
ignore_errors: true
block:
- name: check for dstat log file
stat: path=/var/log/extra/dstat-csv.log
register: dstat_logfile
- name: kill dstat
shell: "pkill dstat"
become: true
when: dstat_logfile.stat.exists
- name: Get dstat_graph tool
git:
repo: "https://github.com/Dabz/dstat_graph.git"
dest: "/tmp/dstat_graph"
version: master
when: dstat_logfile.stat.exists
- name: Generate HTML dstat graphs if it exists
shell: "/tmp/dstat_graph/generate_page.sh /var/log/extra/dstat-csv.log > /var/log/extra/dstat.html"
when: dstat_logfile.stat.exists
args:
chdir: "/tmp/dstat_graph"
- name: Generate human-readable SAR logs
shell: "[[ -f /usr/lib64/sa/sa2 ]] && /usr/lib64/sa/sa2 -A"
- name: Ensure sos package is installed when collect sosreport(s)
package:
name: sos
state: present
when: artcl_collect_sosreport|bool
- name: Collect sosreport
command: >
sosreport {{ artcl_sosreport_options }}
when: artcl_collect_sosreport|bool

42
tasks/collect/network.yml Normal file
View File

@ -0,0 +1,42 @@
---
- become: true
ignore_errors: true
block:
- name: netstat -lnp
shell: "netstat -lnp &> /var/log/extra/netstat.txt"
- name: Collect network status info
shell: >
echo "netstat" > /var/log/extra/network.txt;
netstat -i &> /var/log/extra/network.txt;
for ipv in 4 6; do
echo "### IPv${ipv} addresses" >> /var/log/extra/network.txt;
ip -${ipv} a &>> /var/log/extra/network.txt;
echo "### IPv${ipv} routing" >> /var/log/extra/network.txt;
ip -${ipv} r &>> /var/log/extra/network.txt;
echo "### IPTables (IPv${ipv})" &>> /var/log/extra/network.txt;
test $ipv -eq 4 && iptables-save &>> /var/log/extra/network.txt;
test $ipv -eq 6 && ip6tables-save &>> /var/log/extra/network.txt;
done;
(for NS in $(ip netns list); do
for ipv in 4 6; do
echo "==== $NS (${ipv})====";
echo "### IPv${ipv} addresses";
ip netns exec $NS ip -${ipv} a;
echo "### IPv${ipv} routing";
ip netns exec $NS ip -${ipv} r;
echo "### IPTables (IPv${ipv})";
test $ipv -eq 4 && ip netns exec $NS ip iptables-save;
test $ipv -eq 6 && ip netns exec $NS ip ip6tables-save;
done
PIDS="$(ip netns pids $NS)";
[[ ! -z "$PIDS" ]] && ps --no-headers -f --pids "$PIDS";
echo "";
done) &>> /var/log/extra/network-netns;
(for NB in $(ovs-vsctl show | grep Bridge |awk '{print $2}'); do
echo "==== Bridge name - $NB ====";
ovs-ofctl show $NB;
ovs-ofctl dump-flows $NB;
echo "";
done;
ovsdb-client dump) &> /var/log/extra/network-bridges;

View File

@ -0,0 +1,14 @@
---
- become: true
ignore_errors: true
block:
- name: openstack-status
shell: "which openstack-status &> /dev/null && (. ~/keystonerc_admin; openstack-status &> /var/log/extra/openstack-status.txt)"
when: "'controller' in inventory_hostname"
- name: List nova servers on undercloud
shell: >
if [[ -e {{ working_dir }}/stackrc ]]; then
source {{ working_dir }}/stackrc;
nova list &> /var/log/extra/nova_list.txt;
fi

124
tasks/collect/system.yml Normal file
View File

@ -0,0 +1,124 @@
---
- become: true
ignore_errors: true
block:
- name: rpm -qa
shell: rpm -qa | sort -f >/var/log/extra/rpm-list.txt
- name: package list installed
shell: "{{ ansible_pkg_mgr }} list installed >/var/log/extra/package-list-installed.txt"
- name: list enabled repositories
shell: "{{ ansible_pkg_mgr }} repolist -v >/var/log/extra/repolist.txt"
- name: Collecting /proc/cpuinfo|meminfo|swaps
shell: "cat /proc/{{ item }} &> /var/log/extra/{{ item }}.txt"
with_items:
- cpuinfo
- meminfo
- swaps
- name: Collect installed cron jobs
shell: |
for user in $(cut -f1 -d':' /etc/passwd); do \
echo $user; crontab -u $user -l | grep -v '^$\|^\s*\#\|^\s*PATH'; done \
&> /var/log/extra/installed_crons.txt
# used by OSP Release Engineering to import into internal builds
- name: package import delorean
shell: |
repoquery --disablerepo='*' --enablerepo='delorean'\
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean.txt
# used by OSP Release Engineering to import into internal builds
- name: package import delorean-testing
shell: |
repoquery --disablerepo='*' --enablerepo='delorean-*-deps'\
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean-deps.txt
- name: Collect logs from all failed systemd services
shell: >
systemctl -t service --failed --no-legend | awk '{print $1}'
| xargs -r -n1 journalctl -u > /var/log/extra/failed_services.txt 2>&1
- name: lsof -P -n
shell: "lsof -P -n &> /var/log/extra/lsof.txt"
- name: pstree -p
shell: "pstree -p &> /var/log/extra/pstree.txt"
- name: sysctl -a
shell: "sysctl -a &> /var/log/extra/sysctl.txt"
- name: Get haproxy stats
shell: >
pgrep haproxy && \
test -S /var/lib/haproxy/stats && \
echo 'show info;show stat;show table' | socat /var/lib/haproxy/stats stdio &> /var/log/extra/haproxy-stats.txt || \
echo "No HAProxy or no socket on host" > /var/log/extra/haproxy-stats.txt
- name: lsmod
shell: "lsmod &> /var/log/extra/lsmod.txt"
- name: lspci
shell: "lspci &> /var/log/extra/lspci.txt"
- name: pip list
shell: "pip list &> /var/log/extra/pip.txt"
- name: lvm debug
shell: "(vgs; pvs; lvs) &> /var/log/extra/lvm.txt"
- name: Collect services status
shell: |
systemctl list-units --full --all &> /var/log/extra/services.txt
systemctl status "*" &>> /var/log/extra/services.txt
- name: Search for AVC denied
shell: >
grep -i denied /var/log/audit/audit* &&
grep -i denied /var/log/audit/audit* > /var/log/extra/denials.txt
- name: Search for segfaults in logs
shell: >
grep -v ansible-command /var/log/messages | grep segfault &&
grep -v ansible-command /var/log/messages | grep segfault > /var/log/extra/segfaults.txt
- name: Search for oom-killer instances in logs
shell: >
grep -v ansible-command /var/log/messages | grep oom-killer &&
grep -v ansible-command /var/log/messages | grep oom-killer > /var/log/extra/oom-killers.txt
- name: Collect delorean logs
shell: >
if [[ -e /home/{{ undercloud_user }}/DLRN/data/repos ]]; then
rm -rf /tmp/delorean_logs && mkdir /tmp/delorean_logs;
find /home/{{ undercloud_user }}/DLRN/data/repos/ -name '*.log' -exec cp --parents \{\} /tmp/delorean_logs/ \; ;
find /tmp/delorean_logs -name '*.log' -exec gzip \{\} \; ;
find /tmp/delorean_logs -name '*.log.gz' -exec sh -c 'x="{}"; mv "$x" "${x%.log.gz}.log.txt.gz"' \; ;
rm -rf {{ artcl_collect_dir }}/delorean_logs && mkdir {{ artcl_collect_dir }}/delorean_logs;
mv /tmp/delorean_logs/home/{{ undercloud_user }}/DLRN/data/repos/* {{ artcl_collect_dir }}/delorean_logs/;
fi
- name: Collect config-data
shell: cp -r /var/lib/config-data/puppet-generated /var/log/config-data
- name: Collect text version of the journal from last four hours
shell: journalctl --since=-4h --lines=100000 > /var/log/journal.txt
- name: Collect errors and rename if more than 10 MB
shell: >
grep -rE '^[-0-9]+ [0-9:\.]+ [0-9 ]*ERROR ' /var/log/ |
sed "s/\(.*\)\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.[0-9]\+\)\(.*\)/\2 ERROR \1\3/g" > /tmp/errors.txt;
if (( $(stat -c "%s" /tmp/errors.txt) > 10485760 )); then
ERR_NAME=big-errors.txt;
else
ERR_NAME=errors.txt;
fi;
mv /tmp/errors.txt /var/log/extra/${ERR_NAME}.txt
- name: Create a index file for logstash
shell: >
for i in {{ artcl_logstash_files|default([])|join(" ") }}; do
cat $i; done | grep "^20.*|" | sort -sk1,2 |
sed "s/\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.*[0-9]*\)\(.*\)/\1 INFO \2/g" > /var/log/extra/logstash.txt