Support disallowing robots

Add a disallow_robots parameter which can be used to disallow
indexing of all site content by robots. This is particularly useful
for test deployments where you don't want extra (often stale) copies
of your content to show up in search engines.

Change-Id: Ic62a72555315bd344db338809920a3605f17c8c6
This commit is contained in:
Jeremy Stanley
2016-09-07 20:45:25 +00:00
parent 43b131e06d
commit d79c672d41
3 changed files with 19 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
User-agent: *
Disallow: /

View File

@@ -24,6 +24,7 @@ class mediawiki(
$wg_sitename = undef, $wg_sitename = undef,
$wg_logo = undef, $wg_logo = undef,
$wg_openidforcedprovider = 'https://login.launchpad.net/+openid', $wg_openidforcedprovider = 'https://login.launchpad.net/+openid',
$disallow_robots = false,
) { ) {
if ($role == 'app' or $role == 'all') { if ($role == 'app' or $role == 'all') {
@@ -46,6 +47,17 @@ class mediawiki(
require => File['/srv/mediawiki'], require => File['/srv/mediawiki'],
} }
if $disallow_robots == true {
file { '/srv/mediawiki/robots.txt':
ensure => file,
group => 'root',
mode => '0444',
owner => 'root',
source => 'puppet:///modules/mediawiki/disallow_robots.txt',
require => File['/srv/mediawiki'],
}
}
include ::httpd include ::httpd
include ::mediawiki::php include ::mediawiki::php
include ::mediawiki::app include ::mediawiki::app

View File

@@ -95,6 +95,11 @@
Alias /w <%= scope['mediawiki::mediawiki_location'] %> Alias /w <%= scope['mediawiki::mediawiki_location'] %>
Alias /wiki <%= scope['mediawiki::mediawiki_location'] %>/index.php Alias /wiki <%= scope['mediawiki::mediawiki_location'] %>/index.php
<% if scope['mediawiki::disallow_robots'] == true %>
# Request that search engines not index this site
Alias /robots.txt /srv/mediawiki/robots.txt
<% end %>
# Redirect old /Article_Name urls # Redirect old /Article_Name urls
RewriteEngine on RewriteEngine on
RewriteCond %{REQUEST_URI} !^/w/ RewriteCond %{REQUEST_URI} !^/w/