- hosts: localhost tasks: - name: Add static.opendev.org to inventory add_host: name: static.opendev.org ansible_connection: ssh ansible_host: static.opendev.org ansible_port: 22 ansible_user: zuul # NOTE(ianw): 2020-02-25 just for initial testing run this for one log # in a dumb way. We can scrape a few more sites. Overall, we expect # this to be replaced with a better analysis tool, see # https://review.opendev.org/709236 - hosts: static.opendev.org tasks: - name: Run 404 scraping script become: yes shell: | SOURCE_FILE=/var/log/docs.openstack.org_access.log INTERMEDIATE_FILE=$(mktemp) # Get just the lines with 404s in them grep ' 404 ' $SOURCE_FILE | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' > $INTERMEDIATE_FILE if [ -f "$SOURCE_FILE.1" ] ; then # We get roughly the last days worth of logs by looking at the last two # log files. grep ' 404 ' $SOURCE_FILE.1 | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' >> $INTERMEDIATE_FILE fi # Process those 404s to count them and return sorted by count sort $INTERMEDIATE_FILE | uniq -c | sort -rn | grep '\(html\|\/$\)' rm ${INTERMEDIATE_FILE} args: executable: /bin/bash