Start parsing links from inbound email

Implement parsing text contained in <a> tags enabling users to also
include links in their comment replies posted using inbound email.

Historically, we were holding back on this for two reasons:
* It makes parsing email more complex and fragile
* Spammers can use this to post spammy links on Gerrit

Inbound email is live on gerrit-review.googlesource.com for almost a
year now and we have seen little to no spam. It is enabled on the
majority of Gerrit sites hosted at Google and we have only seen a single
instance of spammy comments. Therefore, the second concern is gone.

This commit adapts the inbound HTML parser and makes some parts of it
more strict to mitigate the first concern. It also adapts some of the
tests as the HTML was incorrect which is surfaced by the enforcement of
stricter parsing rules.

We parse the text contained in the <a> tag, because we expect users to
just copy and paste a link into their email response and have their
email client turn it into a link automatically rather than have them
mingle with a word that they turn into a link manually.

Bug: Issue 7553
Change-Id: I70ca1977554d9f218280abe188d670ea41daa1f4
This commit is contained in:
Patrick Hiesel
2017-11-27 10:48:19 +01:00
parent a499726ace
commit 78fe705caa
6 changed files with 91 additions and 36 deletions

View File

@@ -71,7 +71,12 @@ public class HtmlParser {
for (Element e : d.body().getAllElements()) { for (Element e : d.body().getAllElements()) {
String elementName = e.tagName(); String elementName = e.tagName();
boolean isInBlockQuote = boolean isInBlockQuote =
e.parents().stream().filter(p -> p.tagName().equals("blockquote")).findAny().isPresent(); e.parents()
.stream()
.anyMatch(
p ->
p.tagName().equals("blockquote")
|| MAIL_PROVIDER_EXTRAS.contains(p.className()));
if (elementName.equals("a")) { if (elementName.equals("a")) {
String href = e.attr("href"); String href = e.attr("href");
@@ -96,17 +101,33 @@ public class HtmlParser {
lastEncounteredComment = perspectiveComment; lastEncounteredComment = perspectiveComment;
iter.next(); iter.next();
} }
continue;
} else if (ParserUtil.isCommentUrl(href, changeUrl, perspectiveComment)) { } else if (ParserUtil.isCommentUrl(href, changeUrl, perspectiveComment)) {
// This is a regular inline comment // This is a regular inline comment
lastEncounteredComment = perspectiveComment; lastEncounteredComment = perspectiveComment;
iter.next(); iter.next();
continue;
} }
} else if (!isInBlockQuote }
&& elementName.equals("div")
&& !MAIL_PROVIDER_EXTRAS.contains(e.className())) { if (isInBlockQuote) {
// There is no user-input in quoted text
continue;
}
if (!elementName.equals("div") && !elementName.equals("a")) {
// We only accept div and a since these tags contain user input
continue;
}
if (elementName.equals("a") && e.attr("href").startsWith("mailto:")) {
// We don't accept mailto: links in general as they often appear in reply-to lines
// (User<user@gmail.com> wrote: ...)
continue;
}
// This is a comment typed by the user // This is a comment typed by the user
// Replace non-breaking spaces and trim string // Replace non-breaking spaces and trim string
String content = e.ownText().replace('\u00a0', ' ').trim(); String content = e.ownText().replace('\u00a0', ' ').trim();
boolean isLink = elementName.equals("a");
if (!Strings.isNullOrEmpty(content)) { if (!Strings.isNullOrEmpty(content)) {
if (lastEncounteredComment == null && lastEncounteredFileName == null) { if (lastEncounteredComment == null && lastEncounteredFileName == null) {
// Remove quotation line, email signature and // Remove quotation line, email signature and
@@ -115,23 +136,31 @@ public class HtmlParser {
// TODO(hiesel) Add more sanitizer // TODO(hiesel) Add more sanitizer
if (!Strings.isNullOrEmpty(content)) { if (!Strings.isNullOrEmpty(content)) {
ParserUtil.appendOrAddNewComment( ParserUtil.appendOrAddNewComment(
new MailComment(content, null, null, MailComment.CommentType.CHANGE_MESSAGE), new MailComment(
content, null, null, MailComment.CommentType.CHANGE_MESSAGE, isLink),
parsedComments); parsedComments);
} }
} else if (lastEncounteredComment == null) { } else if (lastEncounteredComment == null) {
ParserUtil.appendOrAddNewComment( ParserUtil.appendOrAddNewComment(
new MailComment( new MailComment(
content, lastEncounteredFileName, null, MailComment.CommentType.FILE_COMMENT), content,
lastEncounteredFileName,
null,
MailComment.CommentType.FILE_COMMENT,
isLink),
parsedComments); parsedComments);
} else { } else {
ParserUtil.appendOrAddNewComment( ParserUtil.appendOrAddNewComment(
new MailComment( new MailComment(
content, null, lastEncounteredComment, MailComment.CommentType.INLINE_COMMENT), content,
null,
lastEncounteredComment,
MailComment.CommentType.INLINE_COMMENT,
isLink),
parsedComments); parsedComments);
} }
} }
} }
}
return parsedComments; return parsedComments;
} }
} }

View File

@@ -29,14 +29,17 @@ public class MailComment {
Comment inReplyTo; Comment inReplyTo;
String fileName; String fileName;
String message; String message;
boolean isLink;
public MailComment() {} public MailComment() {}
public MailComment(String message, String fileName, Comment inReplyTo, CommentType type) { public MailComment(
String message, String fileName, Comment inReplyTo, CommentType type, boolean isLink) {
this.message = message; this.message = message;
this.fileName = fileName; this.fileName = fileName;
this.inReplyTo = inReplyTo; this.inReplyTo = inReplyTo;
this.type = type; this.type = type;
this.isLink = isLink;
} }
/** /**

View File

@@ -95,8 +95,9 @@ public class ParserUtil {
MailComment lastComment = Iterables.getLast(comments); MailComment lastComment = Iterables.getLast(comments);
if (comment.isSameCommentPath(lastComment)) { if (comment.isSameCommentPath(lastComment)) {
// Merge the two comments // Merge the two comments. Links should just be appended, while regular text that came from
lastComment.message += "\n\n" + comment.message; // different <div> elements should be separated by a paragraph.
lastComment.message += (comment.isLink ? " " : "\n\n") + comment.message;
return; return;
} }

View File

@@ -96,7 +96,8 @@ public class AbstractParserTest {
comments.add(newComment("c1", "gerrit-server/test.txt", "comment", 0)); comments.add(newComment("c1", "gerrit-server/test.txt", "comment", 0));
comments.add(newComment("c2", "gerrit-server/test.txt", "comment", 2)); comments.add(newComment("c2", "gerrit-server/test.txt", "comment", 2));
comments.add(newComment("c3", "gerrit-server/test.txt", "comment", 3)); comments.add(newComment("c3", "gerrit-server/test.txt", "comment", 3));
comments.add(newRangeComment("c4", "gerrit-server/readme.txt", "comment", 3)); comments.add(newComment("c3", "gerrit-server/test.txt", "comment", 115));
comments.add(newRangeComment("c5", "gerrit-server/readme.txt", "comment", 3));
return comments; return comments;
} }
} }

View File

@@ -26,7 +26,7 @@ public class GmailHtmlParserTest extends HtmlParserTest {
+ "On Fri, Nov 18, 2016 at 11:15 AM, foobar (Gerrit) noreply@gerrit.com" + "On Fri, Nov 18, 2016 at 11:15 AM, foobar (Gerrit) noreply@gerrit.com"
+ "<span dir=\"ltr\">&lt;<a href=\"mailto:noreply@gerrit.com\" " + "<span dir=\"ltr\">&lt;<a href=\"mailto:noreply@gerrit.com\" "
+ "target=\"_blank\">noreply@gerrit.com</a>&gt;</span> wrote:<br>" + "target=\"_blank\">noreply@gerrit.com</a>&gt;</span> wrote:<br>"
+ "<blockquote class=\"gmail_quote\" " + "</div></div><blockquote class=\"gmail_quote\" "
+ "<p>foobar <strong>posted comments</strong> on this change.</p>" + "<p>foobar <strong>posted comments</strong> on this change.</p>"
+ "<p><a href=\"" + "<p><a href=\""
+ CHANGE_URL + CHANGE_URL
@@ -41,7 +41,6 @@ public class GmailHtmlParserTest extends HtmlParserTest {
+ "/1/gerrit-server/test.txt\">" + "/1/gerrit-server/test.txt\">"
+ "File gerrit-server/<wbr>test.txt:</a></p>" + "File gerrit-server/<wbr>test.txt:</a></p>"
+ commentBlock(f1) + commentBlock(f1)
+ "<li><p>"
+ "<a href=\"" + "<a href=\""
+ CHANGE_URL + CHANGE_URL
+ "/1/gerrit-server/test.txt\">" + "/1/gerrit-server/test.txt\">"
@@ -105,7 +104,7 @@ public class GmailHtmlParserTest extends HtmlParserTest {
+ "</p><p>Gerrit-MessageType: comment<br>" + "</p><p>Gerrit-MessageType: comment<br>"
+ "Footer omitted</p>" + "Footer omitted</p>"
+ "<div><div></div></div>" + "<div><div></div></div>"
+ "<p>Gerrit-HasComments: Yes</p></blockquote></div><br></div></div>"; + "<p>Gerrit-HasComments: Yes</p></blockquote></div>";
return email; return email;
} }

View File

@@ -38,6 +38,28 @@ public abstract class HtmlParserTest extends AbstractParserTest {
assertChangeMessage("Looks good to me", parsedComments.get(0)); assertChangeMessage("Looks good to me", parsedComments.get(0));
} }
@Test
public void changeMessageWithLink() {
MailMessage.Builder b = newMailMessageBuilder();
b.htmlContent(
newHtmlBody(
"Did you consider this: "
+ "<a href=\"http://gerritcodereview.com\">http://gerritcodereview.com</a>",
null,
null,
null,
null,
null,
null));
List<Comment> comments = defaultComments();
List<MailComment> parsedComments = HtmlParser.parse(b.build(), comments, "");
assertThat(parsedComments).hasSize(1);
assertChangeMessage(
"Did you consider this: http://gerritcodereview.com", parsedComments.get(0));
}
@Test @Test
public void simpleInlineComments() { public void simpleInlineComments() {
MailMessage.Builder b = newMailMessageBuilder(); MailMessage.Builder b = newMailMessageBuilder();
@@ -57,7 +79,7 @@ public abstract class HtmlParserTest extends AbstractParserTest {
assertThat(parsedComments).hasSize(3); assertThat(parsedComments).hasSize(3);
assertChangeMessage("Looks good to me", parsedComments.get(0)); assertChangeMessage("Looks good to me", parsedComments.get(0));
assertInlineComment("I have a comment on this.", parsedComments.get(1), comments.get(1)); assertInlineComment("I have a comment on this.", parsedComments.get(1), comments.get(1));
assertInlineComment("Also have a comment here.", parsedComments.get(2), comments.get(3)); assertInlineComment("Also have a comment here.", parsedComments.get(2), comments.get(4));
} }
@Test @Test
@@ -79,7 +101,7 @@ public abstract class HtmlParserTest extends AbstractParserTest {
assertThat(parsedComments).hasSize(3); assertThat(parsedComments).hasSize(3);
assertChangeMessage("Looks good to me", parsedComments.get(0)); assertChangeMessage("Looks good to me", parsedComments.get(0));
assertFileComment("This is a nice file", parsedComments.get(1), comments.get(1).key.filename); assertFileComment("This is a nice file", parsedComments.get(1), comments.get(1).key.filename);
assertInlineComment("Also have a comment here.", parsedComments.get(2), comments.get(3)); assertInlineComment("Also have a comment here.", parsedComments.get(2), comments.get(4));
} }
@Test @Test
@@ -105,7 +127,7 @@ public abstract class HtmlParserTest extends AbstractParserTest {
assertThat(parsedComments).hasSize(2); assertThat(parsedComments).hasSize(2);
assertFileComment("This is a nice file", parsedComments.get(0), comments.get(1).key.filename); assertFileComment("This is a nice file", parsedComments.get(0), comments.get(1).key.filename);
assertInlineComment("Also have a comment here.", parsedComments.get(1), comments.get(3)); assertInlineComment("Also have a comment here.", parsedComments.get(1), comments.get(4));
} }
@Test @Test
@@ -122,7 +144,7 @@ public abstract class HtmlParserTest extends AbstractParserTest {
assertThat(parsedComments).hasSize(3); assertThat(parsedComments).hasSize(3);
assertChangeMessage(txtMessage, parsedComments.get(0)); assertChangeMessage(txtMessage, parsedComments.get(0));
assertFileComment(txtMessage, parsedComments.get(1), comments.get(1).key.filename); assertFileComment(txtMessage, parsedComments.get(1), comments.get(1).key.filename);
assertInlineComment(txtMessage, parsedComments.get(2), comments.get(3)); assertInlineComment(txtMessage, parsedComments.get(2), comments.get(4));
} }
/** /**