tripleo-quickstart-extras/roles/collect-logs/tasks/collect.yml

421 lines
16 KiB
YAML

---
- become: true
ignore_errors: true
block:
- name: Ensure required rpms for logging are installed
package:
state: present
name:
- gzip
- rsync
- socat
- tar
- name: Prepare directory with extra logs
file: dest=/var/log/extra state=directory
- name: rpm -qa
shell: rpm -qa | sort -f >/var/log/extra/rpm-list.txt
- name: package list installed
shell: "{{ ansible_pkg_mgr }} list installed >/var/log/extra/package-list-installed.txt"
- name: Collecting /proc/cpuinfo|meminfo|swaps
shell: "cat /proc/{{item}} &> /var/log/extra/{{item}}.txt"
with_items:
- cpuinfo
- meminfo
- swaps
- name: Collect installed cron jobs
shell: |
for user in $(cut -f1 -d':' /etc/passwd); do \
echo $user; crontab -u $user -l | grep -v '^$\|^\s*\#\|^\s*PATH'; done \
&> /var/log/extra/installed_crons.txt
# used by OSP Release Engineering to import into internal builds
- name: package import delorean
shell: |
repoquery --disablerepo='*' --enablerepo='delorean'\
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean.txt
# used by OSP Release Engineering to import into internal builds
- name: package import delorean-testing
shell: |
repoquery --disablerepo='*' --enablerepo='delorean-*-testing'\
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g' >> /var/log/extra/import-delorean-testing.txt
- name: Collect logs from all failed systemd services
shell: >
systemctl -t service --failed --no-legend | awk '{print $1}'
| xargs -r -n1 journalctl -u > /var/log/extra/services.txt 2>&1
- name: Collect network status info
shell: >
echo "netstat" > /var/log/extra/network.txt;
netstat -i &> /var/log/extra/network.txt;
for ipv in 4 6; do
echo "### IPv${ipv} addresses" >> /var/log/extra/network.txt;
ip -${ipv} a &>> /var/log/extra/network.txt;
echo "### IPv${ipv} routing" >> /var/log/extra/network.txt;
ip -${ipv} r &>> /var/log/extra/network.txt;
echo "### IPTables (IPv${ipv})" &>> /var/log/extra/network.txt;
test $ipv -eq 4 && iptables-save &>> /var/log/extra/network.txt;
test $ipv -eq 6 && ip6tables-save &>> /var/log/extra/network.txt;
done;
(for NS in $(ip netns list); do
for ipv in 4 6; do
echo "==== $NS (${ipv})====";
echo "### IPv${ipv} addresses";
ip netns exec $NS ip -${ipv} a;
echo "### IPv${ipv} routing";
ip netns exec $NS ip -${ipv} r;
echo "### IPTables (IPv${ipv})";
test $ipv -eq 4 && ip netns exec $NS ip iptables-save;
test $ipv -eq 6 && ip netns exec $NS ip ip6tables-save;
done
PIDS="$(ip netns pids $NS)";
[[ ! -z "$PIDS" ]] && ps --no-headers -f --pids "$PIDS";
echo "";
done) &>> /var/log/extra/network-netns;
(for NB in $(ovs-vsctl show | grep Bridge |awk '{print $2}'); do
echo "==== Bridge name - $NB ====";
ovs-ofctl show $NB;
ovs-ofctl dump-flows $NB;
echo "";
done;
ovsdb-client dump) &> /var/log/extra/network-bridges;
- name: lsof -P -n
shell: "lsof -P -n &> /var/log/extra/lsof.txt"
- name: pstree -p
shell: "pstree -p &> /var/log/extra/pstree.txt"
- name: sysctl -a
shell: "sysctl -a &> /var/log/extra/sysctl.txt"
- name: netstat -lnp
shell: "netstat -lnp &> /var/log/extra/netstat.txt"
- name: openstack-status
shell: "which openstack-status &> /dev/null && (. ~/keystonerc_admin; openstack-status &> /var/log/extra/openstack-status.txt)"
when: "'controller' in inventory_hostname"
- name: List nova servers on undercloud
shell: >
if [[ -e {{ working_dir }}/stackrc ]]; then
source {{ working_dir }}/stackrc;
nova list &> /var/log/extra/nova_list.txt;
fi
- name: Get haproxy stats
shell: >
pgrep haproxy && \
test -S /var/lib/haproxy/stats && \
echo 'show info;show stat;show table' | socat /var/lib/haproxy/stats stdio &> /var/log/extra/haproxy-stats.txt || \
echo "No HAProxy or no socket on host" > /var/log/extra/haproxy-stats.txt
- name: lsmod
shell: "lsmod &> /var/log/extra/lsmod.txt"
- name: lspci
shell: "lspci &> /var/log/extra/lspci.txt"
- name: pip list
shell: "pip list &> /var/log/extra/pip.txt"
- name: lvm debug
shell: "(vgs; pvs; lvs) &> /var/log/extra/lvm.txt"
- name: check if ODL is enabled via docker
shell: docker ps | grep opendaylight_api
register: odl_container_enabled
- name: check if ODL is enabled via podman
shell: podman ps | grep opendaylight_api
register: odl_container_enabled
when: odl_container_enabled.rc != 0
- name: check if ODL is enabled via rpm
shell: rpm -qa | grep opendaylight
register: odl_rpm_enabled
- name: Create ODL log directory
file: dest="{{ odl_extra_log_dir }}" state=directory
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
- name: Create rsync filter file
template:
src: "odl_extra_logs.j2"
dest: "/tmp/odl_extra_logs.sh"
- name: Collect OVS outputs for ODL
shell: "bash /tmp/odl_extra_logs.sh"
when: (odl_rpm_enabled.rc == 0) or (odl_container_enabled.rc == 0)
- name: Collect ODL info and logs (RPM deployment)
shell: >
cp /opt/opendaylight/data/log/* /var/log/extra/odl/;
journalctl -u opendaylight > /var/log/extra/odl/odl_journal.log
when: odl_rpm_enabled.rc == 0
- name: Generate human-readable SAR logs
shell: "[[ -f /usr/lib64/sa/sa2 ]] && /usr/lib64/sa/sa2 -A"
- name: check for dstat log file
stat: path=/var/log/extra/dstat-csv.log
register: dstat_logfile
- name: kill dstat
shell: "pkill dstat"
become: true
when: dstat_logfile.stat.exists
- name: Get dstat_graph tool
git:
repo: "https://github.com/Dabz/dstat_graph.git"
dest: "/tmp/dstat_graph"
version: master
when: dstat_logfile.stat.exists
- name: Generate HTML dstat graphs if it exists
shell: "/tmp/dstat_graph/generate_page.sh /var/log/extra/dstat-csv.log > /var/log/extra/dstat.html"
when: dstat_logfile.stat.exists
args:
chdir: "/tmp/dstat_graph"
- name: Search for AVC denied
shell: >
grep -i denied /var/log/audit/audit* &&
grep -i denied /var/log/audit/audit* > /var/log/extra/denials.txt
- name: Search for segfaults in logs
shell: >
grep -v ansible-command /var/log/messages | grep segfault &&
grep -v ansible-command /var/log/messages | grep segfault > /var/log/extra/segfaults.txt
- name: Search for oom-killer instances in logs
shell: >
grep -v ansible-command /var/log/messages | grep oom-killer &&
grep -v ansible-command /var/log/messages | grep oom-killer > /var/log/extra/oom-killers.txt
- name: Ensure sos package is installed when collect sosreport(s)
package:
name: sos
state: present
when: artcl_collect_sosreport|bool
- name: Collect sosreport
command: >
sosreport {{ artcl_sosreport_options }}
when: artcl_collect_sosreport|bool
- name: Collect delorean logs
shell: >
if [[ -e /home/{{ undercloud_user }}/DLRN/data/repos ]]; then
rm -rf /tmp/delorean_logs && mkdir /tmp/delorean_logs;
find /home/{{ undercloud_user }}/DLRN/data/repos/ -name '*.log' -exec cp --parents \{\} /tmp/delorean_logs/ \; ;
find /tmp/delorean_logs -name '*.log' -exec gzip \{\} \; ;
find /tmp/delorean_logs -name '*.log.gz' -exec sh -c 'x="{}"; mv "$x" "${x%.log.gz}.log.txt.gz"' \; ;
rm -rf {{ artcl_collect_dir }}/delorean_logs && mkdir {{ artcl_collect_dir }}/delorean_logs;
mv /tmp/delorean_logs/home/{{ undercloud_user }}/DLRN/data/repos/* {{ artcl_collect_dir }}/delorean_logs/;
fi
- name: Collect container info and logs
shell: >
for engine in docker podman; do
if [ $engine = 'docker' ]; then
(command -v docker && systemctl is-active docker) || continue
# container_cp CONTAINER SRC DEST
container_cp() {
docker cp ${1}:${2} $3
}
fi
if [ $engine = 'podman' ]; then
command -v podman || continue
# NOTE(cjeanner): podman has no "cp" subcommand, we hence have to mount the container, copy,
# umount it. More info: https://www.mankier.com/1/podman-cp
# See also: https://github.com/containers/libpod/issues/613
container_cp() {
mnt=$(podman mount $1)
cp -rT ${mnt}${2} $3
podman umount $1
}
fi
BASE_CONTAINER_EXTRA=/var/log/extra/${engine};
mkdir -p $BASE_CONTAINER_EXTRA;
ALL_FILE=$BASE_CONTAINER_EXTRA/${engine}_allinfo.log;
CONTAINER_INFO_CMDS=(
"${engine} ps --all --size"
"${engine} images"
"${engine} stats --all --no-stream"
"${engine} version"
"${engine} info"
);
if [ $engine = 'docker' ]; then
CONTAINER_INFO_CMDS+=("${engine} volume ls")
fi
for cmd in "${CONTAINER_INFO_CMDS[@]}"; do
echo "+ $cmd" >> $ALL_FILE;
$cmd >> $ALL_FILE;
echo "" >> $ALL_FILE;
echo "" >> $ALL_FILE;
done;
# Get only failed containers, in a dedicated file
${engine} ps -a | grep -vE ' (IMAGE|Exited \(0\)|Up) ' &>> /var/log/extra/failed_containers.log;
for cont in $(${engine} ps | awk {'print $NF'} | grep -v NAMES); do
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
mkdir -p $INFO_DIR;
(
set -x;
if [ $engine = 'docker' ]; then
${engine} top $cont auxw;
# NOTE(cjeanner): `podman top` does not support `ps` options.
elif [ $engine = 'podman' ]; then
${engine} top $cont;
fi
${engine} exec $cont top -bwn1;
${engine} exec $cont bash -c "\$(command -v dnf || command -v yum) list installed";
${engine} inspect $cont;
) &> $INFO_DIR/${engine}_info.log;
container_cp $cont /var/lib/kolla/config_files/config.json $INFO_DIR/config.json;
# NOTE(flaper87): This should go away. Services should be
# using a `logs` volume
# NOTE(mandre) Do not copy logs if the containers is bind mounting /var/log directory
if ! ${engine} exec $cont stat $BASE_CONTAINER_EXTRA 2>1 > /dev/null; then
container_cp $cont /var/log $INFO_DIR/log;
fi;
# Delete symlinks because they break log collection and are generally
# not useful
find $INFO_DIR -type l -delete;
done;
# NOTE(cjeanner) previous loop cannot have the "-a" flag because of the
# "exec" calls. So we just loop a second time, over ALL containers,
# in order to get all the logs we can. For instance, the previous loop
# would not allow to know why a container is "Exited (1)", preventing
# efficient debugging.
for cont in $(${engine} ps -a | awk {'print $NF'} | grep -v NAMES); do
INFO_DIR=$BASE_CONTAINER_EXTRA/containers/${cont};
mkdir -p $INFO_DIR;
${engine} logs $cont &> $INFO_DIR/stdout.log;
done;
# NOTE(flaper87) Copy contents from the logs volume. We can expect this
# volume to exist in a containerized environment.
# NOTE(cjeanner): Rather test the eXistenZ of the volume, as podman does not
# have such thing
if [ -d /var/lib/docker/volumes/logs/_data ]; then
cp -r /var/lib/docker/volumes/logs/_data $BASE_CONTAINER_EXTRA/logs;
fi
done
- name: Collect config-data
shell: cp -r /var/lib/config-data/puppet-generated /var/log/config-data
- name: Collect text version of the journal from last four hours
shell: journalctl --since=-4h --lines=100000 > /var/log/journal.txt
- name: Collect errors
shell: >
grep -rE '^[-0-9]+ [0-9:\.]+ [0-9 ]*ERROR ' /var/log/ |
sed "s/\(.*\)\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.[0-9]\+\)\(.*\)/\2 ERROR \1\3/g" > /tmp/errors.txt;
mv /tmp/errors.txt /var/log/extra/errors.txt
- name: Create a index file for logstash
shell: >
for i in {{ artcl_logstash_files|default([])|join(" ") }}; do
cat $i; done | grep "^20.*|" | sort -sk1,2 |
sed "s/\(20[0-9][0-9]-[0-9][0-9]-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\.*[0-9]*\)\(.*\)/\1 INFO \2/g" > /var/log/extra/logstash.txt
- name: Set default collect list
set_fact:
collect_list: "{{ artcl_collect_list }} + {{ artcl_collect_list_append|default([]) }}"
- name: Override collect list
set_fact:
collect_list: "{{ artcl_collect_override[inventory_hostname] }}"
when:
- artcl_collect_override is defined
- artcl_collect_override[inventory_hostname] is defined
- name: Create temp directory before gathering logs
file:
dest: "/tmp/{{ inventory_hostname }}"
state: directory
- name: Create rsync filter file
template:
src: "rsync-filter.j2"
dest: "/tmp/{{ inventory_hostname }}-rsync-filter"
- name: Gather the logs to /tmp
become: true
shell: >
set -o pipefail &&
rsync --quiet --recursive --copy-links --prune-empty-dirs
--filter '. /tmp/{{ inventory_hostname }}-rsync-filter' / /tmp/{{ inventory_hostname }};
find /tmp/{{ inventory_hostname }} -type d -print0 | xargs -0 chmod 755;
find /tmp/{{ inventory_hostname }} -type f -print0 | xargs -0 chmod 644;
find /tmp/{{ inventory_hostname }} -not -type f -not -type d -delete;
chown -R {{ ansible_user }}: /tmp/{{ inventory_hostname }};
- name: Compress logs to tar.gz
shell: >
chdir=/tmp
tar czf {{ inventory_hostname }}.tar.gz {{ inventory_hostname }};
when: artcl_tar_gz|bool
- name: gzip logs individually and tar them
shell: >
chdir=/tmp
gzip -r ./{{ inventory_hostname }};
tar cf {{ inventory_hostname }}.tar {{ inventory_hostname }};
when: artcl_gzip_only|bool
- name: Fetch log archive (tar.gz)
fetch:
src: "/tmp/{{ inventory_hostname }}.tar.gz"
dest: "{{ artcl_collect_dir }}/{{ inventory_hostname }}.tar.gz"
flat: true
validate_checksum: false
when: artcl_tar_gz|bool
- name: Fetch log archive (tar)
fetch:
src: "/tmp/{{ inventory_hostname }}.tar"
dest: "{{ artcl_collect_dir }}/{{ inventory_hostname }}.tar"
flat: true
validate_checksum: false
when: artcl_gzip_only|bool
- name: Delete temporary log directory after collection
file:
path: "/tmp/{{ inventory_hostname }}"
state: absent
ignore_errors: true
- delegate_to: localhost
when: artcl_gzip_only|bool
block:
- name: Extract the logs
shell: >
chdir={{ artcl_collect_dir }}
tar xf {{ inventory_hostname }}.tar;
- name: delete the tar file after extraction
file:
path: "{{ artcl_collect_dir }}/{{ inventory_hostname }}.tar"
state: absent