431 lines
16 KiB
YAML
431 lines
16 KiB
YAML
---
|
|
- become: true
|
|
ignore_errors: true
|
|
block:
|
|
- name: Ensure required rpms for logging are installed
|
|
package:
|
|
state: present
|
|
name:
|
|
- gzip
|
|
- rsync
|
|
- socat
|
|
- tar
|
|
|
|
- name: Prepare directory with extra logs
|
|
file: dest=/var/log/extra state=directory
|
|
|
|
- name: rpm -qa
|
|
shell: rpm -qa | sort -f >/var/log/extra/rpm-list.txt
|
|
|
|
- name: package list installed
|
|
shell: "{{ ansible_pkg_mgr }} list installed >/var/log/extra/package-list-installed.txt"
|
|
|
|
- name: Collecting /proc/cpuinfo|meminfo|swaps
|
|
shell: "cat /proc/{{item}} &> /var/log/extra/{{item}}.txt"
|
|
with_items:
|
|
- cpuinfo
|
|
- meminfo
|
|
- swaps
|
|
|
|
- name: Collect installed cron jobs
|
|
shell: |
|
|
for user in $(cut -f1 -d':' /etc/passwd); do \
|
|
echo $user; crontab -u $user -l | grep -v '^$\|^\s*\#\|^\s*PATH'; done \
|
|
&> /var/log/extra/installed_crons.txt
|
|
|
|
# used by OSP Release Engineering to import into internal builds
|
|
- name: package import delorean
|
|
shell: |
|
|
repoquery --disablerepo='*' --enablerepo='delorean'\
|
|
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean.txt
|
|
|
|
# used by OSP Release Engineering to import into internal builds
|
|
- name: package import delorean-testing
|
|
shell: |
|
|
repoquery --disablerepo='*' --enablerepo='delorean-*-testing'\
|
|
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean-testing.txt
|
|
|
|
- name: Collect logs from all failed systemd services
|
|
shell: >
|
|
systemctl -t service --failed --no-legend | awk '{print $1}'
|
|
| xargs -r -n1 journalctl -u > /var/log/extra/failed_services.txt 2>&1
|
|
|
|
- name: Collect network status info
|
|
shell: >
|
|
echo "netstat" > /var/log/extra/network.txt;
|
|
netstat -i &> /var/log/extra/network.txt;
|
|
for ipv in 4 6; do
|
|
echo "### IPv${ipv} addresses" >> /var/log/extra/network.txt;
|
|
ip -${ipv} a &>> /var/log/extra/network.txt;
|
|
echo "### IPv${ipv} routing" >> /var/log/extra/network.txt;
|
|
ip -${ipv} r &>> /var/log/extra/network.txt;
|
|
echo "### IPTables (IPv${ipv})" &>> /var/log/extra/network.txt;
|
|
test $ipv -eq 4 && iptables-save &>> /var/log/extra/network.txt;
|
|
test $ipv -eq 6 && ip6tables-save &>> /var/log/extra/network.txt;
|
|
done;
|
|
(for NS in $(ip netns list); do
|
|
for ipv in 4 6; do
|
|
echo "==== $NS (${ipv})====";
|
|
echo "### IPv${ipv} addresses";
|
|
ip netns exec $NS ip -${ipv} a;
|
|
echo "### IPv${ipv} routing";
|
|
ip netns exec $NS ip -${ipv} r;
|
|
echo "### IPTables (IPv${ipv})";
|
|
test $ipv -eq 4 && ip netns exec $NS ip iptables-save;
|
|
test $ipv -eq 6 && ip netns exec $NS ip ip6tables-save;
|
|
done
|
|
PIDS="$(ip netns pids $NS)";
|
|
[[ ! -z "$PIDS" ]] && ps --no-headers -f --pids "$PIDS";
|
|
echo "";
|
|
done) &>> /var/log/extra/network-netns;
|
|
(for NB in $(ovs-vsctl show | grep Bridge |awk '{print $2}'); do
|
|
echo "==== Bridge name - $NB ====";
|
|
ovs-ofctl show $NB;
|
|
ovs-ofctl dump-flows $NB;
|
|
echo "";
|
|
done;
|
|
ovsdb-client dump) &> /var/log/extra/network-bridges;
|
|
|
|
- name: lsof -P -n
|
|
shell: "lsof -P -n &> /var/log/extra/lsof.txt"
|
|
|
|
- name: pstree -p
|
|
shell: "pstree -p &> /var/log/extra/pstree.txt"
|
|
|
|
- name: sysctl -a
|
|
shell: "sysctl -a &> /var/log/extra/sysctl.txt"
|
|
|
|
- name: netstat -lnp
|
|
shell: "netstat -lnp &> /var/log/extra/netstat.txt"
|
|
|
|
- name: openstack-status
|
|
shell: "which openstack-status &> /dev/null && (. ~/keystonerc_admin; openstack-status &> /var/log/extra/openstack-status.txt)"
|
|
when: "'controller' in inventory_hostname"
|
|
|
|
- name: List nova servers on undercloud
|
|
shell: >
|
|
if [[ -e {{ working_dir }}/stackrc ]]; then
|
|
source {{ working_dir }}/stackrc;
|
|
nova list &> /var/log/extra/nova_list.txt;
|
|
fi
|
|
|
|
- name: Get haproxy stats
|
|
shell: >
|
|
pgrep haproxy && \
|
|
test -S /var/lib/haproxy/stats && \
|
|
echo 'show info;show stat;show table' | socat /var/lib/haproxy/stats stdio &> /var/log/extra/haproxy-stats.txt || \
|
|
echo "No HAProxy or no socket on host" > /var/log/extra/haproxy-stats.txt
|
|
|
|
- name: lsmod
|
|
shell: "lsmod &> /var/log/extra/lsmod.txt"
|
|
|
|
- name: lspci
|
|
shell: "lspci &> /var/log/extra/lspci.txt"
|
|
|
|
- name: pip list
|
|
shell: "pip list &> /var/log/extra/pip.txt"
|
|
|
|
- name: lvm debug
|
|
shell: "(vgs; pvs; lvs) &> /var/log/extra/lvm.txt"
|
|
|
|
- name: Collect services status
|
|
shell: |
|
|
systemctl list-units --full --all &> /var/log/extra/services.txt
|
|
systemctl status "*" &>> /var/log/extra/services.txt
|
|
|
|
- name: check if ODL is enabled via docker
|
|
shell: docker ps | grep opendaylight_api
|
|
register: odl_container_enabled
|
|
|
|
- name: check if ODL is enabled via podman
|
|
shell: podman ps | grep opendaylight_api
|
|
register: odl_container_enabled
|
|
when: odl_container_enabled.rc != 0
|
|
|
|
|
|
- name: check if ODL is enabled via rpm
|
|
shell: rpm -qa | grep opendaylight
|
|
register: odl_rpm_enabled
|
|
|
|
- name: Create ODL log directory
|
|
file: dest="{{ odl_extra_log_dir }}" state=directory
|
|
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
|
|
|
|
- name: Create rsync filter file
|
|
template:
|
|
src: "odl_extra_logs.j2"
|
|
dest: "/tmp/odl_extra_logs.sh"
|
|
|
|
- name: Collect OVS outputs for ODL
|
|
shell: "bash /tmp/odl_extra_logs.sh"
|
|
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
|
|
|
|
- name: Collect ODL info and logs (RPM deployment)
|
|
shell: >
|
|
cp /opt/opendaylight/data/log/* /var/log/extra/odl/;
|
|
journalctl -u opendaylight > /var/log/extra/odl/odl_journal.log
|
|
when: odl_rpm_enabled.rc == 0
|
|
|
|
- name: Generate human-readable SAR logs
|
|
shell: "[[ -f /usr/lib64/sa/sa2 ]] && /usr/lib64/sa/sa2 -A"
|
|
|
|
- name: check for dstat log file
|
|
stat: path=/var/log/extra/dstat-csv.log
|
|
register: dstat_logfile
|
|
|
|
- name: kill dstat
|
|
shell: "pkill dstat"
|
|
become: true
|
|
when: dstat_logfile.stat.exists
|
|
|
|
- name: Get dstat_graph tool
|
|
git:
|
|
repo: "https://github.com/Dabz/dstat_graph.git"
|
|
dest: "/tmp/dstat_graph"
|
|
version: master
|
|
when: dstat_logfile.stat.exists
|
|
|
|
- name: Generate HTML dstat graphs if it exists
|
|
shell: "/tmp/dstat_graph/generate_page.sh /var/log/extra/dstat-csv.log > /var/log/extra/dstat.html"
|
|
when: dstat_logfile.stat.exists
|
|
args:
|
|
chdir: "/tmp/dstat_graph"
|
|
|
|
- name: Search for AVC denied
|
|
shell: >
|
|
grep -i denied /var/log/audit/audit* &&
|
|
grep -i denied /var/log/audit/audit* > /var/log/extra/denials.txt
|
|
|
|
- name: Search for segfaults in logs
|
|
shell: >
|
|
grep -v ansible-command /var/log/messages | grep segfault &&
|
|
grep -v ansible-command /var/log/messages | grep segfault > /var/log/extra/segfaults.txt
|
|
|
|
- name: Search for oom-killer instances in logs
|
|
shell: >
|
|
grep -v ansible-command /var/log/messages | grep oom-killer &&
|
|
grep -v ansible-command /var/log/messages | grep oom-killer > /var/log/extra/oom-killers.txt
|
|
|
|
- name: Ensure sos package is installed when collect sosreport(s)
|
|
package:
|
|
name: sos
|
|
state: present
|
|
when: artcl_collect_sosreport|bool
|
|
|
|
- name: Collect sosreport
|
|
command: >
|
|
sosreport {{ artcl_sosreport_options }}
|
|
when: artcl_collect_sosreport|bool
|
|
|
|
- name: Collect delorean logs
|
|
shell: >
|
|
if [[ -e /home/{{ undercloud_user }}/DLRN/data/repos ]]; then
|
|
rm -rf /tmp/delorean_logs && mkdir /tmp/delorean_logs;
|
|
find /home/{{ undercloud_user }}/DLRN/data/repos/ -name '*.log' -exec cp --parents \{\} /tmp/delorean_logs/ \; ;
|
|
find /tmp/delorean_logs -name '*.log' -exec gzip \{\} \; ;
|
|
find /tmp/delorean_logs -name '*.log.gz' -exec sh -c 'x="{}"; mv "$x" "${x%.log.gz}.log.txt.gz"' \; ;
|
|
rm -rf {{ artcl_collect_dir }}/delorean_logs && mkdir {{ artcl_collect_dir }}/delorean_logs;
|
|
mv /tmp/delorean_logs/home/{{ undercloud_user }}/DLRN/data/repos/* {{ artcl_collect_dir }}/delorean_logs/;
|
|
fi
|
|
|
|
- name: Collect container info and logs
|
|
shell: >
|
|
for engine in docker podman; do
|
|
|
|
if [ $engine = 'docker' ]; then
|
|
(command -v docker && systemctl is-active docker) || continue
|
|
# container_cp CONTAINER SRC DEST
|
|
container_cp() {
|
|
docker cp ${1}:${2} $3
|
|
}
|
|
fi
|
|
|
|
if [ $engine = 'podman' ]; then
|
|
command -v podman || continue
|
|
# NOTE(cjeanner): podman has no "cp" subcommand, we hence have to mount the container, copy,
|
|
# umount it. More info: https://www.mankier.com/1/podman-cp
|
|
# See also: https://github.com/containers/libpod/issues/613
|
|
container_cp() {
|
|
mnt=$(podman mount $1)
|
|
cp -rT ${mnt}${2} $3
|
|
podman umount $1
|
|
}
|
|
fi
|
|
|
|
BASE_CONTAINER_EXTRA=/var/log/extra/${engine};
|
|
mkdir -p $BASE_CONTAINER_EXTRA;
|
|
ALL_FILE=$BASE_CONTAINER_EXTRA/${engine}_allinfo.log;
|
|
|
|
CONTAINER_INFO_CMDS=(
|
|
"${engine} ps --all --size"
|
|
"${engine} images"
|
|
"${engine} stats --all --no-stream"
|
|
"${engine} version"
|
|
"${engine} info"
|
|
);
|
|
if [ $engine = 'docker' ]; then
|
|
CONTAINER_INFO_CMDS+=("${engine} volume ls")
|
|
fi
|
|
for cmd in "${CONTAINER_INFO_CMDS[@]}"; do
|
|
echo "+ $cmd" >> $ALL_FILE;
|
|
$cmd >> $ALL_FILE;
|
|
echo "" >> $ALL_FILE;
|
|
echo "" >> $ALL_FILE;
|
|
done;
|
|
|
|
# Get only failed containers, in a dedicated file
|
|
${engine} ps -a | grep -vE ' (IMAGE|Exited \(0\)|Up) ' &>> /var/log/extra/failed_containers.log;
|
|
|
|
for cont in $(${engine} ps | awk {'print $NF'} | grep -v NAMES); do
|
|
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
|
|
mkdir -p $INFO_DIR;
|
|
(
|
|
set -x;
|
|
if [ $engine = 'docker' ]; then
|
|
${engine} top $cont auxw;
|
|
# NOTE(cjeanner): `podman top` does not support `ps` options.
|
|
elif [ $engine = 'podman' ]; then
|
|
${engine} top $cont;
|
|
fi
|
|
${engine} exec $cont top -bwn1;
|
|
${engine} exec $cont bash -c "\$(command -v dnf || command -v yum) list installed";
|
|
${engine} inspect $cont;
|
|
) &> $INFO_DIR/${engine}_info.log;
|
|
|
|
container_cp $cont /var/lib/kolla/config_files/config.json $INFO_DIR/config.json;
|
|
|
|
# NOTE(flaper87): This should go away. Services should be
|
|
# using a `logs` volume
|
|
# NOTE(mandre) Do not copy logs if the containers is bind mounting /var/log directory
|
|
if ! ${engine} exec $cont stat $BASE_CONTAINER_EXTRA 2>1 > /dev/null; then
|
|
container_cp $cont /var/log $INFO_DIR/log;
|
|
fi;
|
|
|
|
# Delete symlinks because they break log collection and are generally
|
|
# not useful
|
|
find $INFO_DIR -type l -delete;
|
|
done;
|
|
|
|
# NOTE(cjeanner) previous loop cannot have the "-a" flag because of the
|
|
# "exec" calls. So we just loop a second time, over ALL containers,
|
|
# in order to get all the logs we can. For instance, the previous loop
|
|
# would not allow to know why a container is "Exited (1)", preventing
|
|
# efficient debugging.
|
|
for cont in $(${engine} ps -a | awk {'print $NF'} | grep -v NAMES); do
|
|
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
|
|
mkdir -p $INFO_DIR;
|
|
${engine} logs $cont &> $INFO_DIR/stdout.log;
|
|
done;
|
|
|
|
# NOTE(flaper87) Copy contents from the logs volume. We can expect this
|
|
# volume to exist in a containerized environment.
|
|
# NOTE(cjeanner): Rather test the eXistenZ of the volume, as podman does not
|
|
# have such thing
|
|
if [ -d /var/lib/docker/volumes/logs/_data ]; then
|
|
cp -r /var/lib/docker/volumes/logs/_data $BASE_CONTAINER_EXTRA/logs;
|
|
fi
|
|
done
|
|
|
|
- name: Collect config-data
|
|
shell: cp -r /var/lib/config-data/puppet-generated /var/log/config-data
|
|
|
|
- name: Collect text version of the journal from last four hours
|
|
shell: journalctl --since=-4h --lines=100000 > /var/log/journal.txt
|
|
|
|
- name: Collect errors and rename if more than 10 MB
|
|
shell: >
|
|
grep -rE '^[-0-9]+ [0-9:\.]+ [0-9 ]*ERROR ' /var/log/ |
|
|
sed "s/\(.*\)\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.[0-9]\+\)\(.*\)/\2 ERROR \1\3/g" > /tmp/errors.txt;
|
|
if (( $(stat -c "%s" /tmp/errors.txt) > 10485760 )); then
|
|
ERR_NAME=big-errors.txt;
|
|
else
|
|
ERR_NAME=errors.txt;
|
|
fi;
|
|
mv /tmp/errors.txt /var/log/extra/${ERR_NAME}.txt
|
|
|
|
- name: Create a index file for logstash
|
|
shell: >
|
|
for i in {{ artcl_logstash_files|default([])|join(" ") }}; do
|
|
cat $i; done | grep "^20.*|" | sort -sk1,2 |
|
|
sed "s/\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.*[0-9]*\)\(.*\)/\1 INFO \2/g" > /var/log/extra/logstash.txt
|
|
|
|
- name: Set default collect list
|
|
set_fact:
|
|
collect_list: "{{ artcl_collect_list }} + {{ artcl_collect_list_append|default([]) }}"
|
|
|
|
- name: Override collect list
|
|
set_fact:
|
|
collect_list: "{{ artcl_collect_override[inventory_hostname] }}"
|
|
when:
|
|
- artcl_collect_override is defined
|
|
- artcl_collect_override[inventory_hostname] is defined
|
|
|
|
- name: Create temp directory before gathering logs
|
|
file:
|
|
dest: "/tmp/{{ inventory_hostname }}"
|
|
state: directory
|
|
|
|
- name: Create rsync filter file
|
|
template:
|
|
src: "rsync-filter.j2"
|
|
dest: "/tmp/{{ inventory_hostname }}-rsync-filter"
|
|
|
|
- name: Gather the logs to /tmp
|
|
become: true
|
|
shell: >
|
|
set -o pipefail &&
|
|
rsync --quiet --recursive --copy-links --prune-empty-dirs
|
|
--filter '. /tmp/{{ inventory_hostname }}-rsync-filter' / /tmp/{{ inventory_hostname }};
|
|
find /tmp/{{ inventory_hostname }} -type d -print0 | xargs -0 chmod 755;
|
|
find /tmp/{{ inventory_hostname }} -type f -print0 | xargs -0 chmod 644;
|
|
find /tmp/{{ inventory_hostname }} -not -type f -not -type d -delete;
|
|
chown -R {{ ansible_user }}: /tmp/{{ inventory_hostname }};
|
|
|
|
- name: Compress logs to tar.gz
|
|
shell: >
|
|
chdir=/tmp
|
|
tar czf {{ inventory_hostname }}.tar.gz {{ inventory_hostname }};
|
|
when: artcl_tar_gz|bool
|
|
|
|
- name: gzip logs individually and tar them
|
|
shell: >
|
|
chdir=/tmp
|
|
gzip -r ./{{ inventory_hostname }};
|
|
tar cf {{ inventory_hostname }}.tar {{ inventory_hostname }};
|
|
when: artcl_gzip_only|bool
|
|
|
|
- name: Fetch log archive (tar.gz)
|
|
fetch:
|
|
src: "/tmp/{{ inventory_hostname }}.tar.gz"
|
|
dest: "{{ artcl_collect_dir }}/{{ inventory_hostname }}.tar.gz"
|
|
flat: true
|
|
validate_checksum: false
|
|
when: artcl_tar_gz|bool
|
|
|
|
- name: Fetch log archive (tar)
|
|
fetch:
|
|
src: "/tmp/{{ inventory_hostname }}.tar"
|
|
dest: "{{ artcl_collect_dir }}/{{ inventory_hostname }}.tar"
|
|
flat: true
|
|
validate_checksum: false
|
|
when: artcl_gzip_only|bool
|
|
|
|
- name: Delete temporary log directory after collection
|
|
file:
|
|
path: "/tmp/{{ inventory_hostname }}"
|
|
state: absent
|
|
ignore_errors: true
|
|
|
|
- delegate_to: localhost
|
|
when: artcl_gzip_only|bool
|
|
block:
|
|
- name: Extract the logs
|
|
shell: >
|
|
chdir={{ artcl_collect_dir }}
|
|
tar xf {{ inventory_hostname }}.tar;
|
|
|
|
- name: delete the tar file after extraction
|
|
file:
|
|
path: "{{ artcl_collect_dir }}/{{ inventory_hostname }}.tar"
|
|
state: absent
|