system-config/playbooks/periodic/404.yaml

38 lines
1.3 KiB
YAML

- hosts: localhost
tasks:
- name: Add static.opendev.org to inventory
add_host:
name: static.opendev.org
ansible_connection: ssh
ansible_host: static.opendev.org
ansible_port: 22
ansible_user: zuul
# NOTE(ianw): 2020-02-25 just for initial testing run this for one log
# in a dumb way. We can scrape a few more sites. Overall, we expect
# this to be replaced with a better analysis tool, see
# https://review.opendev.org/709236
- hosts: static.opendev.org
tasks:
- name: Run 404 scraping script
become: yes
shell: |
SOURCE_FILE=/var/log/docs.openstack.org_access.log
INTERMEDIATE_FILE=$(mktemp)
# Get just the lines with 404s in them
grep ' 404 ' $SOURCE_FILE | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' > $INTERMEDIATE_FILE
if [ -f "$SOURCE_FILE.1" ] ; then
# We get roughly the last days worth of logs by looking at the last two
# log files.
grep ' 404 ' $SOURCE_FILE.1 | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' >> $INTERMEDIATE_FILE
fi
# Process those 404s to count them and return sorted by count
sort $INTERMEDIATE_FILE | uniq -c | sort -rn | grep '\(html\|\/$\)'
rm ${INTERMEDIATE_FILE}
args:
executable: /bin/bash