Add robots.txt to Zuul web

This adds a robots.txt that kindly asks bots to not crawl anything on
zuul. We've seen soem bots crawling which leads to them trolling the
build logs which seems like overkill and increases bandwidth usage in
our donor clouds. Ask them to stop and quiet everything down a bit.

Change-Id: I88d85c7a51159b5b020aa179e24acec55fb42931
This commit is contained in:
Clark Boylan 2024-04-03 09:57:36 -07:00
parent 6ca2b9a7d5
commit 7ad3125814
5 changed files with 32 additions and 0 deletions

View File

@ -0,0 +1,2 @@
User-agent: *
Disallow: /

View File

@ -35,6 +35,22 @@
command: a2enconf log-combined-cache command: a2enconf log-combined-cache
when: _log_combined_cache.changed when: _log_combined_cache.changed
- name: Create robots.txt location dir
file:
path: /var/www/robots
state: directory
owner: root
group: root
mode: '0755'
- name: Copy the robots.txt
copy:
src: robots.txt
dest: /var/www/robots/robots.txt
owner: root
group: root
mode: '0644'
- name: Copy apache config - name: Copy apache config
template: template:
src: zuul.vhost.j2 src: zuul.vhost.j2

View File

@ -47,6 +47,7 @@
RewriteEngine on RewriteEngine on
RewriteRule ^/robots.txt$ /var/www/robots/robots.txt [L]
RewriteRule ^/api/connection/(.*)$ http://127.0.0.1:9000/api/connection/$1 [P,L] RewriteRule ^/api/connection/(.*)$ http://127.0.0.1:9000/api/connection/$1 [P,L]
RewriteRule ^/api/console-stream ws://127.0.0.1:9000/api/tenant/openstack/console-stream [P,L] RewriteRule ^/api/console-stream ws://127.0.0.1:9000/api/tenant/openstack/console-stream [P,L]
RewriteRule ^/api/(.*)$ http://127.0.0.1:9000/api/tenant/openstack/$1 [P,L] RewriteRule ^/api/(.*)$ http://127.0.0.1:9000/api/tenant/openstack/$1 [P,L]
@ -74,4 +75,7 @@
CacheEnable disk CacheEnable disk
</Location> </Location>
<Directory "/var/www/robots">
Require all granted
</Directory>
</VirtualHost> </VirtualHost>

View File

@ -47,6 +47,7 @@
RewriteEngine on RewriteEngine on
RewriteRule ^/robots.txt$ /var/www/robots/robots.txt [L]
RewriteRule ^/api/tenant/(.*)/console-stream ws://127.0.0.1:9000/api/tenant/$1/console-stream [P,L] RewriteRule ^/api/tenant/(.*)/console-stream ws://127.0.0.1:9000/api/tenant/$1/console-stream [P,L]
RewriteRule ^/(.*)$ http://127.0.0.1:9000/$1 [P,L] RewriteRule ^/(.*)$ http://127.0.0.1:9000/$1 [P,L]
@ -72,4 +73,7 @@
CacheEnable disk CacheEnable disk
</Location> </Location>
<Directory "/var/www/robots">
Require all granted
</Directory>
</VirtualHost> </VirtualHost>

View File

@ -26,3 +26,9 @@ def test_iptables(host):
for rule in rules: for rule in rules:
assert '--dport 7900' not in rule assert '--dport 7900' not in rule
def test_zuul_robots(host):
cmd = host.run('curl --insecure '
'--resolve zuul.opendev.org:443:127.0.0.1 '
'https://zuul.opendev.org/robots.txt')
assert 'Disallow: /' in cmd.stdout