From 7ad3125814f3ffe2652e9c8069e63bce7c407f30 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Wed, 3 Apr 2024 09:57:36 -0700 Subject: [PATCH] Add robots.txt to Zuul web This adds a robots.txt that kindly asks bots to not crawl anything on zuul. We've seen soem bots crawling which leads to them trolling the build logs which seems like overkill and increases bandwidth usage in our donor clouds. Ask them to stop and quiet everything down a bit. Change-Id: I88d85c7a51159b5b020aa179e24acec55fb42931 --- playbooks/roles/zuul-web/files/robots.txt | 2 ++ playbooks/roles/zuul-web/tasks/main.yaml | 16 ++++++++++++++++ .../roles/zuul-web/templates/openstack.vhost.j2 | 4 ++++ playbooks/roles/zuul-web/templates/zuul.vhost.j2 | 4 ++++ testinfra/test_zuul_scheduler.py | 6 ++++++ 5 files changed, 32 insertions(+) create mode 100644 playbooks/roles/zuul-web/files/robots.txt diff --git a/playbooks/roles/zuul-web/files/robots.txt b/playbooks/roles/zuul-web/files/robots.txt new file mode 100644 index 0000000000..1f53798bb4 --- /dev/null +++ b/playbooks/roles/zuul-web/files/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / diff --git a/playbooks/roles/zuul-web/tasks/main.yaml b/playbooks/roles/zuul-web/tasks/main.yaml index 59c9f1403b..943b9e6224 100644 --- a/playbooks/roles/zuul-web/tasks/main.yaml +++ b/playbooks/roles/zuul-web/tasks/main.yaml @@ -35,6 +35,22 @@ command: a2enconf log-combined-cache when: _log_combined_cache.changed +- name: Create robots.txt location dir + file: + path: /var/www/robots + state: directory + owner: root + group: root + mode: '0755' + +- name: Copy the robots.txt + copy: + src: robots.txt + dest: /var/www/robots/robots.txt + owner: root + group: root + mode: '0644' + - name: Copy apache config template: src: zuul.vhost.j2 diff --git a/playbooks/roles/zuul-web/templates/openstack.vhost.j2 b/playbooks/roles/zuul-web/templates/openstack.vhost.j2 index 521e798b48..d55019bd70 100644 --- a/playbooks/roles/zuul-web/templates/openstack.vhost.j2 +++ b/playbooks/roles/zuul-web/templates/openstack.vhost.j2 @@ -47,6 +47,7 @@ RewriteEngine on + RewriteRule ^/robots.txt$ /var/www/robots/robots.txt [L] RewriteRule ^/api/connection/(.*)$ http://127.0.0.1:9000/api/connection/$1 [P,L] RewriteRule ^/api/console-stream ws://127.0.0.1:9000/api/tenant/openstack/console-stream [P,L] RewriteRule ^/api/(.*)$ http://127.0.0.1:9000/api/tenant/openstack/$1 [P,L] @@ -74,4 +75,7 @@ CacheEnable disk + + Require all granted + diff --git a/playbooks/roles/zuul-web/templates/zuul.vhost.j2 b/playbooks/roles/zuul-web/templates/zuul.vhost.j2 index d921230ef9..338d8e3a28 100644 --- a/playbooks/roles/zuul-web/templates/zuul.vhost.j2 +++ b/playbooks/roles/zuul-web/templates/zuul.vhost.j2 @@ -47,6 +47,7 @@ RewriteEngine on + RewriteRule ^/robots.txt$ /var/www/robots/robots.txt [L] RewriteRule ^/api/tenant/(.*)/console-stream ws://127.0.0.1:9000/api/tenant/$1/console-stream [P,L] RewriteRule ^/(.*)$ http://127.0.0.1:9000/$1 [P,L] @@ -72,4 +73,7 @@ CacheEnable disk + + Require all granted + diff --git a/testinfra/test_zuul_scheduler.py b/testinfra/test_zuul_scheduler.py index 33930f6366..deba3a0323 100644 --- a/testinfra/test_zuul_scheduler.py +++ b/testinfra/test_zuul_scheduler.py @@ -26,3 +26,9 @@ def test_iptables(host): for rule in rules: assert '--dport 7900' not in rule + +def test_zuul_robots(host): + cmd = host.run('curl --insecure ' + '--resolve zuul.opendev.org:443:127.0.0.1 ' + 'https://zuul.opendev.org/robots.txt') + assert 'Disallow: /' in cmd.stdout