ansible-role-collect-logs/roles/collect_logs/defaults/main.yml

448 lines
14 KiB
YAML

---
# formally in tq common and tqe extras-common
# zuul does not allow use of lookup env plugin (security), so we cannot use them
# defaults but we can load non-zuul values from vars.
local_working_dir: "{{ zuul_work_dir | default('~') }}/.quickstart"
artcl_collect_dir: "{{ local_working_dir }}/collected_files"
working_dir: "/home/{{ undercloud_user }}"
undercloud_user: stack
artcl_build_tag: "{{ zuul.build | default('') }}" # overriden by vars/unsecure.yml
artcl_collect: true
artcl_collect_list:
- /var/lib/heat-config/
- /var/lib/kolla/config_files
- /var/lib/mistral/
- /var/lib/oooq-images/*/*.log
- /var/lib/oooq-images/*/*.sh
- /var/lib/pacemaker/cib/cib*
- /var/lib/pacemaker/pengine/pe-input*
- /var/log/atop*
- /var/log/dmesg.txt
- /var/log/host_info.txt
- /var/log/journal.txt
- /var/log/postci.txt
- /var/log/secure
- /var/log/bootstrap-subnodes.log
- /var/log/unbound.log
- /var/log/{{ ansible_pkg_mgr }}.log
- /var/log/cloud-init*.log
- /var/log/aodh/
- /var/log/audit/
- /var/log/barbican/
- /var/log/ceilometer/
- /var/log/ceph/
- /var/log/cinder/
- /var/log/cloudkitty/
- /var/log/cluster/
- /var/log/config-data/
- /var/log/congress/
- /var/log/containers/
- /var/log/deployed-server-enable-ssh-admin.log
- /var/log/deployed-server-os-collect-config.log
- /var/log/designate/
- /var/log/dmesg/
- /var/log/extra/
- /var/log/ec2api/
- /var/log/glance/
- /var/log/gnocchi/
- /var/log/heat/
- /var/log/heat-launcher/
- /var/log/horizon/
- /var/log/httpd/
- /var/log/ironic/
- /var/log/ironic-inspector/
- /var/log/libvirt/
- /var/log/keystone/
- /var/log/manila/
- /var/log/mariadb/
- /var/log/mistral/
- /var/log/monasca/
- /var/log/murano/
- /var/log/neutron/
- /var/log/nova/
- /var/log/novajoin/
- /var/log/octavia/
- /var/log/openvswitch/
- /var/log/ovn/
- /var/log/pacemaker/
- /var/log/panko/
- /var/log/qdr/
- /var/log/rabbitmq/
- /var/log/redis/
- /var/log/sahara/
- /var/log/sensu/
- /var/log/swift/
- /var/log/tacker/
- /var/log/tempest/
- /var/log/trove/
- /var/log/tripleo-container-image-prepare.log
- /var/log/vitrage/
- /var/log/watcher/
- /var/log/zaqar/
- /var/tmp/sosreport*
- /etc/
- /home/*/undercloud-ansible-*
- /home/*/.instack/install-undercloud.log
- /home/*/*rc
- /home/*/*rc.v3
- /home/*/*.log
- /home/*/*.json
- /home/*/*.conf
- /home/*/*.yml
- /home/*/*.yaml
- /home/*/*.sh
- /home/*/*.rst
- /home/*/*.pem
- /home/*/network-environment.yaml
- /home/*/skip_file
- /home/*/*.subunit
- /home/*/tempest/*.xml
- /home/*/tempest/*.html
- /home/*/tempest/*.log
- /home/*/tempest/etc/*.conf
- /home/*/tempest/*.subunit
- /home/*/tempest/*.json
- /home/*/tripleo-heat-installer-templates/
- /home/*/local_tht/
- /home/*/gating_repo.tar.gz
- /home/*/browbeat/
- /usr/share/openstack-tripleo-heat-templates/
- /home/*/tripleo-heat-templates/
- /tmp/tripleoclient*
# The next 2 items are temporary until config-download is executed
# from a Mistral workflow (WIP in Queens)
- /home/*/inventory
- /home/*/tripleo-config-download/
artcl_exclude_list:
- /etc/udev/hwdb.bin
- /etc/puppet/modules
- /etc/project-config
- /etc/services
- /etc/selinux/targeted
- /etc/pki/ca-trust/extracted
- /etc/alternatives
- /var/log/journal
- overlay*
- root
- console*primary.log
- anaconda*
# if true, a rsync filter file is generated for rsync to collect files,
# if false, find is used to generate list of files to collect for rsync.
artcl_rsync_collect_list: true
artcl_find_maxdepth: 4
# size in MBs
artcl_find_max_size: 256
# os specific values loaded from tasks/main.yaml
artcl_collect_pkg_list: []
# In upstream logs the compression is handled
# by the storage servers themselves and this
# can be false. In other storage servers
# the role must compress files.
artcl_gzip: false
## publishing related vars
artcl_publish: false
artcl_env: default
artcl_readme_path: "{{ working_dir }}/src/opendev.org/openstack/tripleo-ci/docs/tripleo-quickstart-logs.html"
artcl_readme_file: "{{ artcl_collect_dir }}/README.html"
artcl_txt_rename: false
# give up log upload after 30 minutes
artcl_publish_timeout: 1800
artcl_artifact_url: "file://{{ local_working_dir }}"
artcl_full_artifact_url: "{{ artcl_artifact_url }}/{{ artcl_build_tag }}/"
artcl_use_rsync: false
artcl_rsync_use_daemon: false
artcl_container_collect_timeout: 1800 # 30 mins
artcl_use_swift: false
# clean up the logs after 31 days
artcl_swift_delete_after: 2678400
artcl_swift_container: logs
artcl_use_zuul_swift_upload: false
artcl_zuul_swift_upload_path: /usr/local/bin
artcl_collect_sosreport: false
artcl_sosreport_options: "--batch"
# User defined commands to be executed, combined with default ones.
artcl_commands_extras: {}
# Used to determine which ignore_errors strategy to use. Defaults to true
# but for testing purposes we may want to make it false, to avoid false
# positives.
artcl_ignore_errors: true
# Implicit commands executed by the role. Keep the dict sorted.
artcl_commands:
system:
cpuinfo:
cmd: |
cat /proc/cpuinfo
echo ""
grep -s -H '' /sys/module/{kvm_intel,kvm_amd}/parameters/nested
capture_file: /var/log/extra/cpuinfo.txt
dmesg:
cmd: dmesg
meminfo:
cmd: cat /proc/meminfo
capture_file: /var/log/extra/meminfo.txt
pcs:
cmd: |
if type pcs &>/dev/null; then
echo "+ pcs status"
pcs status
echo "+ pcs config"
pcs config
echo "+ pcs cluster cib"
pcs cluster cib
fi
swaps:
cmd: cat /proc/swaps
capture_file: /var/log/extra/swaps.txt
vmstat:
cmd: vmstat -s
ps:
cmd: ps axfo %mem,size,rss,vsz,pid,args
rpm-list:
cmd: rpm -qa | sort -f
package-list-installed:
cmd: "{{ ansible_pkg_mgr }} list installed"
repolist:
cmd: "{{ ansible_pkg_mgr }} repolist -v"
dnf-module-list:
cmd: "{{ ansible_pkg_mgr }} module list"
when: ansible_distribution_major_version|int >= 8
dnf-module-list-enabled:
cmd: "{{ ansible_pkg_mgr }} module list --enabled"
when: ansible_distribution_major_version|int >= 8
record_available_packages:
cmd: |
for i in `{{ ansible_pkg_mgr }} repolist enabled -v | grep Repo-id | awk '{print $3}' | awk -F / '{print $1}'`;do
repoquery --repoid $i -a
done
capture_file: /var/log/extra/all_available_packages.txt
selinux:
cmd: |
/usr/sbin/sestatus -v
/usr/sbin/sestatus -b
installed_crons:
cmd: |
for user in $(cut -f1 -d':' /etc/passwd); do
echo $user; crontab -u $user -l | grep -v '^$\|^\s*\#\|^\s*PATH'
done
import-delorean:
# used by OSP Release Engineering to import into internal builds
cmd: >
repoquery --disablerepo='*' --enablerepo='delorean'
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g'
import-delorean-deps:
# used by OSP Release Engineering to import into internal builds
cmd: >
repoquery --disablerepo='*' --enablerepo='delorean-*-deps'
-a --qf '%{sourcerpm}'|sort -u|sed 's/.src.rpm//g'
failed_services:
cmd: >
systemctl -t service --failed --no-legend | awk '{print $1}'
| xargs -r -n1 journalctl -u
lsof:
cmd: >
lsof -P -n &> /var/log/extra/lsof.txt
pstree:
cmd: pstree -p
sysctl:
cmd: sysctl -a
haproxy-stats:
cmd: >
pgrep haproxy && \
test -S /var/lib/haproxy/stats && \
echo 'show info;show stat;show table' | socat /var/lib/haproxy/stats stdio || \
echo "No HAProxy or no socket on host"
lsmod:
cmd: lsmod
lspci:
cmd: lspci
pip:
cmd: "{{ ansible_python.executable }} -m pip list"
lvm:
cmd: |
vgs
pvs
lvs
disk:
cmd: |
df -h
shell_variables:
cmd: |
set
services:
cmd: |
systemctl list-units --full --all
systemctl status "*"
selinux_denials:
cmd: >
grep -i denied /var/log/audit/audit*
selinux_denials_detail:
cmd: >
sealert -a /var/log/extra/selinux_denials.txt
seqfaults:
cmd: >
grep -v ansible-command /var/log/messages | grep segfault
oom-killers.txt:
cmd: |
grep -v ansible-command /var/log/messages | grep oom-killer
delorean-logs:
cmd: >
if [[ -e /home/{{ undercloud_user }}/DLRN/data/repos ]]; then
rm -rf /tmp/delorean_logs && mkdir /tmp/delorean_logs;
find /home/{{ undercloud_user }}/DLRN/data/repos/ -name '*.log' -exec cp --parents \{\} /tmp/delorean_logs/ \; ;
find /home/{{ undercloud_user }}/DLRN/ -name 'projects.ini' -exec cp \{\} /tmp/delorean_logs/ \; ;
find /tmp/delorean_logs -name '*.log' -exec gzip \{\} \; ;
find /tmp/delorean_logs -name '*.log.gz' -exec sh -c 'x="{}"; mv "$x" "${x%.log.gz}.log.txt.gz"' \; ;
rm -rf {{ artcl_collect_dir }}/delorean_logs && mkdir {{ artcl_collect_dir }}/delorean_logs;
mv /tmp/delorean_logs/home/{{ undercloud_user }}/DLRN/data/repos/* {{ artcl_collect_dir }}/delorean_logs/;
mv /tmp/delorean_logs/projects.ini {{ artcl_collect_dir }}/delorean_logs/;
fi
capture_disable: true
journal:
cmd: journalctl --since=-4h --lines=100000
journal_errors:
cmd: journalctl --since=-4h -p err --output=short-iso
monitoring: {}
network:
ovn:
cmd: |
if type ovs-vsctl &>/dev/null; then
function sbctl() {
SBDB=$(sudo ovs-vsctl get open . external_ids:ovn-remote | sed -e 's/\"//g');
timeout -k 10 5 sudo podman exec ovn_controller ovn-sbctl --db=$SBDB $1
}
function nbctl() {
NBDB=$(sudo ovs-vsctl get open . external_ids:ovn-remote | sed -e 's/\"//g' | sed -e 's/6642/6641/g');
timeout -k 10 5 sudo podman exec ovn_controller ovn-nbctl --db=$NBDB $1
}
echo "Output of ovn-sbctl show"
sbctl show
echo "\nOutput of ovn-nbctl show"
nbctl show
echo "\nOutput of ovn-sbctl lflow-list"
sbctl lflow-list
fi
openstack:
baremetal_list:
cmd: |
if [[ -e {{ working_dir }}/stackrc ]]; then
source {{ working_dir }}/stackrc
openstack baremetal node list --long
fi
nova_list:
cmd: |
if [[ -e {{ working_dir }}/stackrc ]]; then
source {{ working_dir }}/stackrc
openstack server list --long
fi
openstack-status:
cmd: |
if type openstack-status &> /dev/null; then
. ~/keystonerc_admin
openstack-status
fi
when: "'controller' in inventory_hostname"
container: {}
# Doc generation specific vars
artcl_gen_docs: false
artcl_create_docs_payload:
included_deployment_scripts: []
included_static_docs: []
table_of_contents: []
artcl_docs_source_dir: "{{ local_working_dir }}/share/ansible/roles/collect-logs/docs/source"
artcl_docs_build_dir: "{{ artcl_collect_dir }}/docs/build"
artcl_verify_sphinx_build: false
artcl_logstash_files:
- /home/*/container_image_build.log
- /home/*/deployed_server_prepare.txt
- /home/*/docker_journalctl.log
- /home/*/failed_deployment_list.log
- /home/*/hostname.sh.log
- /home/*/install_built_repo.log
- /home/*/install_packages.sh.log
- /home/*/install-undercloud.log
- /home/*/ironic-python-agent.log
- /home/*/nova_actions_check.log
- /home/*/overcloud_create_ssl_cert.log
- /home/*/overcloud_custom_tht_script.log
- /home/*/overcloud_delete.log
- /home/*/overcloud_deploy.log
- /home/*/overcloud_deploy_post.log
- /home/*/overcloud_failed_prepare_resources.log
- /home/*/overcloud-full.log
- /home/*/build-err.log
- /home/*/overcloud_image_build.log
- /home/*/overcloud_image_upload.log
- /home/*/overcloud_import_nodes.log
- /home/*/overcloud_introspect.log
- /home/*/overcloud_prep_containers.log
- /home/*/overcloud_prep_images.log
- /home/*/overcloud_prep_network.log
- /home/*/overcloud_validate.log
- /home/*/pkg_mgr_mirror_error.log
- /home/*/pkg_mgr_mirror.log
- /home/*/repo_setup.log
- /home/*/repo_setup.sh.*.log
- /home/*/standalone_deploy.log
- /home/*/tempest.log
- /home/*/undercloud_custom_tht_script.log
- /home/*/undercloud_install.log
- /home/*/undercloud_reinstall.log
- /home/*/*update*.log
- /home/*/*upgrade*.log
- /home/*/upgrade-undercloud-repo.sh.log
- /home/*/validate-overcloud-ipmi-connection.log
- /home/*/vxlan_networking.sh.log
- /home/*/workload_launch.log
- /var/log/bootstrap-subnodes.log
- /var/log/ipaserver-install.log
- /var/log/tripleo-container-image-prepare.log
- /var/log/extra/journal_errors.txt
- /var/log/ceph/cephadm.log
# ara_graphite_server: graphite.tripleo.org
# if ara_enabled is false, no ara tasks will be executed
ara_enabled: true
ara_overcloud_db_path: "/var/lib/mistral/overcloud/ara_overcloud.sqlite"
ara_generate_html: true
ara_only_successful_tasks: true
ara_tasks_map:
"overcloud-deploy : Deploy the overcloud": overcloud.deploy.seconds
"undercloud-deploy : Install the undercloud": undercloud.install.seconds
"build-images : run the image build script (direct)": overcloud.images.seconds
"overcloud-prep-images : Prepare the overcloud images for deploy": prepare_images.seconds
"validate-simple : Validate the overcloud": overcloud.ping_test.seconds
"validate-tempest : Execute tempest": overcloud.tempest.seconds
collect_log_types:
- system
- monitoring
- network
- openstack
- container
# InfluxDB module settings
influxdb_only_successful_tasks: true
influxdb_measurement: test
# influxdb_url:
influxdb_port: 8086
influxdb_user:
influxdb_password:
influxdb_dbname: testdb
influxdb_data_file_path: "{{ local_working_dir }}/influxdb_data"
influxdb_create_data_file: true
odl_extra_log_dir: /var/log/extra/odl
odl_extra_info_log: "{{ odl_extra_log_dir }}/odl_info.log"