Generate list of 404s for docs.o.o

This generates a list of 404s by count for docs.openstack.org.
This simple list will be hosted at https://files.openstack.org/docs-404s
and can be used by projects to update their rewrite rules to properly
redirect old urls to current urls.

Change-Id: Ic4c184b59eab3e5c6080452f8526a26892abe52e
This commit is contained in:
Clark Boylan 2017-08-02 12:42:56 -07:00 committed by Clark Boylan
parent 694540c103
commit c25e91f496
3 changed files with 50 additions and 0 deletions

View File

@ -0,0 +1,21 @@
#!/bin/bash
SOURCE_FILE=$1
OUTPUT_DIR=$2
INTERMEDIATE_FILE=$(mktemp)
# Get just the lines with 404s in them
grep ' 404 ' $SOURCE_FILE | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' > $INTERMEDIATE_FILE
if [ -f "$SOURCE_FILE.1" ] ; then
# We get roughly the last day's worth of logs by looking at the last two
# log files.
grep ' 404 ' $SOURCE_FILE.1 | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' >> $INTERMEDIATE_FILE
fi
# Process those 404s to count them and return sorted by count
sort $INTERMEDIATE_FILE | uniq -c | sort -rn | grep '\(html\|\/$\)' > $OUTPUT_DIR/$(date +%F).txt
# cleanup
rm $INTERMEDIATE_FILE
find $OUTPUT_DIR -type f -name '*.txt' -mtime +30 -delete

View File

@ -97,6 +97,28 @@ class openstack_project::files (
notify => Service['httpd'],
}
file {'/usr/local/bin/404s.sh':
ensure => present,
owner => 'root',
group => 'root',
mode => '0755',
content => 'puppet:///modules/openstack_project/files/404s.sh',
}
file {'/var/www/docs-404s':
ensure => directory,
owner => 'root',
group => 'root',
mode => '0755',
}
cron {'generate_docs_404s':
# This seems to be about half an hour after apache rotates logs.
hour => '7',
minute => '0',
environment => 'PATH=/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin',
command => '404s.sh /var/log/apache2/docs.openstack.org_access.log /var/www/docs-404s/',
require => File['/usr/local/bin/404s.sh'],
}
###########################################################
# docs.openstack.org
@ -132,6 +154,7 @@ class openstack_project::files (
before => File['/etc/ssl/certs/docs.openstack.org.pem'],
}
###########################################################
# developer.openstack.org

View File

@ -23,6 +23,12 @@ NameVirtualHost <%= @vhost_name %>:<%= @port %>
Require all granted
</Directory>
Alias /docs-404s /var/www/docs-404s
<Directory "/var/www/docs-404s">
Options +Indexes
Require all granted
</Directory>
ErrorLog /var/log/<%= scope.lookupvar("httpd::params::apache_name") %>/<%= @name %>_error.log
LogLevel warn
CustomLog /var/log/<%= scope.lookupvar("httpd::params::apache_name") %>/<%= @name %>_access.log combined