From ec51256e6cb59cf0aa1ae02379e961864ea942ac Mon Sep 17 00:00:00 2001 From: Juan Hernandez Date: Tue, 6 Aug 2013 16:30:50 +0200 Subject: [PATCH] Configurable external robots.txt file Currently the robots.txt file used by the application is inside the .war file and thus difficult to modify for users. This patch adds a new optional configuration parameter that allows the user to specify an external robots file, so that it is easy to modify and easy to preserve during upgrades of the application. If no configuration change is made the original robots file is used, only if the following is added to the gerrit.config file will the external file be used: [httpd] robotsFile = etc/myrobots.txt If the file indicated by this parameter is relative then it will be resolved as sub directory of the site directory, if it is absolute it will be used as is. If the file doesn't exist or can't be read a message will be written to the log and the default file will be used. Bug: issue 1968 Change-Id: Iad02dbd97633e9c45dbce15d1f227f3931255e0a Signed-off-by: Juan Hernandez --- Documentation/config-gerrit.txt | 10 ++ .../com/google/gerrit/httpd/UrlModule.java | 3 + .../gerrit/httpd/raw/RobotsServlet.java | 101 ++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 gerrit-httpd/src/main/java/com/google/gerrit/httpd/raw/RobotsServlet.java diff --git a/Documentation/config-gerrit.txt b/Documentation/config-gerrit.txt index e49ff770b5..2a9c350562 100644 --- a/Documentation/config-gerrit.txt +++ b/Documentation/config-gerrit.txt @@ -1747,6 +1747,16 @@ a trusted username in the `TRUSTED_USER` HTTP Header: filterClass = org.anyorg.MySecureFilter ---- +[[httpd.robotsFile]]httpd.robotsFile:: ++ +Location of an external robots.txt file to be used instead of the one +bundled with the .war of the application. ++ +If not absolute, the path is resolved relative to `$site_path`. ++ +If the file doesn't exist or can't be read the default robots.txt file +bundled with the .war will be used instead. + [[ldap]]Section ldap ~~~~~~~~~~~~~~~~~~~~ diff --git a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java index 05b059b8cf..bf39bfb893 100644 --- a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java +++ b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java @@ -21,6 +21,7 @@ import com.google.gerrit.common.PageLinks; import com.google.gerrit.httpd.raw.CatServlet; import com.google.gerrit.httpd.raw.HostPageServlet; import com.google.gerrit.httpd.raw.LegacyGerritServlet; +import com.google.gerrit.httpd.raw.RobotsServlet; import com.google.gerrit.httpd.raw.SshInfoServlet; import com.google.gerrit.httpd.raw.StaticServlet; import com.google.gerrit.httpd.raw.ToolServlet; @@ -112,6 +113,8 @@ class UrlModule extends ServletModule { if (cfg.deprecatedQuery) { serve("/query").with(DeprecatedChangeQueryServlet.class); } + + serve("/robots.txt").with(RobotsServlet.class); } private Key notFound() { diff --git a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/raw/RobotsServlet.java b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/raw/RobotsServlet.java new file mode 100644 index 0000000000..d19a0ce867 --- /dev/null +++ b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/raw/RobotsServlet.java @@ -0,0 +1,101 @@ +// Copyright (C) 2013 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gerrit.httpd.raw; + +import com.google.common.io.ByteStreams; +import com.google.gerrit.server.config.GerritServerConfig; +import com.google.gerrit.server.config.SitePaths; +import com.google.inject.Inject; +import com.google.inject.Singleton; + +import org.eclipse.jgit.lib.Config; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +/** + * This class provides a mechanism to use a configurable robots.txt file, + * outside of the .war of the application. In order to configure it add the + * following to the httpd section of the gerrit.conf + * file: + * + *
+ * [httpd]
+ *         robotsFile = etc/myrobots.txt
+ * 
+ * + * If the specified file name is relative it will resolved as a sub directory of + * the site directory, if it is absolute it will be used as is. + * + * If the specified file doesn't exist or isn't readable the servlet will + * default to the robots.txt file bundled with the .war file of the + * application. + */ +@SuppressWarnings("serial") +@Singleton +public class RobotsServlet extends HttpServlet { + private static final Logger log = + LoggerFactory.getLogger(RobotsServlet.class); + + private final File robotsFile; + + @Inject + RobotsServlet(@GerritServerConfig final Config config, final SitePaths sitePaths) { + File file = sitePaths.resolve( + config.getString("httpd", null, "robotsFile")); + if (file != null && (!file.exists() || !file.canRead())) { + log.warn("Cannot read httpd.robotsFile, using default"); + file = null; + } + robotsFile = file; + } + + @Override + protected void doGet(final HttpServletRequest req, final HttpServletResponse rsp) + throws IOException { + rsp.setContentType("text/plain"); + InputStream in = openRobotsFile(); + try { + OutputStream out = rsp.getOutputStream(); + try { + ByteStreams.copy(in, out); + } finally { + out.close(); + } + } finally { + in.close(); + } + } + + private InputStream openRobotsFile() { + if (robotsFile != null) { + try { + return new FileInputStream(robotsFile); + } catch (IOException e) { + log.warn("Cannot read " + robotsFile + "; using default", e); + } + } + return getServletContext().getResourceAsStream("/robots.txt"); + } +}