Split off mail data processing out of server package

Much of the mail processing code is not specific to building a code
review server, so it doesn't need to live in the server package.

Change-Id: Ie5b22908fecdfa8b2e26eacb9dabb87123c39e27
This commit is contained in:
Han-Wen Nienhuys
2017-11-14 18:04:28 +01:00
parent d9f2ce3a0e
commit 50dd94e5d8
104 changed files with 295 additions and 181 deletions

View File

@@ -0,0 +1,140 @@
// Copyright (C) 2009 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.gerrit.common.Nullable;
public class Address {
public static Address parse(String in) {
final int lt = in.indexOf('<');
final int gt = in.indexOf('>');
final int at = in.indexOf("@");
if (0 <= lt && lt < gt && lt + 1 < at && at + 1 < gt) {
final String email = in.substring(lt + 1, gt).trim();
final String name = in.substring(0, lt).trim();
int nameStart = 0;
int nameEnd = name.length();
if (name.startsWith("\"")) {
nameStart++;
}
if (name.endsWith("\"")) {
nameEnd--;
}
return new Address(name.length() > 0 ? name.substring(nameStart, nameEnd) : null, email);
}
if (lt < 0 && gt < 0 && 0 < at && at < in.length() - 1) {
return new Address(in);
}
throw new IllegalArgumentException("Invalid email address: " + in);
}
public static Address tryParse(String in) {
try {
return parse(in);
} catch (IllegalArgumentException e) {
return null;
}
}
@Nullable private final String name;
private final String email;
public Address(String email) {
this(null, email);
}
public Address(String name, String email) {
this.name = name;
this.email = email;
}
@Nullable
public String getName() {
return name;
}
public String getEmail() {
return email;
}
@Override
public int hashCode() {
return email.hashCode();
}
@Override
public boolean equals(Object other) {
if (other instanceof Address) {
return email.equals(((Address) other).email);
}
return false;
}
@Override
public String toString() {
return toHeaderString();
}
public String toHeaderString() {
if (name != null) {
return quotedPhrase(name) + " <" + email + ">";
} else if (isSimple()) {
return email;
}
return "<" + email + ">";
}
private static final String MUST_QUOTE_EMAIL = "()<>,;:\\\"[]";
private static final String MUST_QUOTE_NAME = MUST_QUOTE_EMAIL + "@.";
private boolean isSimple() {
for (int i = 0; i < email.length(); i++) {
final char c = email.charAt(i);
if (c <= ' ' || 0x7F <= c || MUST_QUOTE_EMAIL.indexOf(c) != -1) {
return false;
}
}
return true;
}
private static String quotedPhrase(String name) {
if (EmailHeader.needsQuotedPrintable(name)) {
return EmailHeader.quotedPrintable(name);
}
for (int i = 0; i < name.length(); i++) {
final char c = name.charAt(i);
if (MUST_QUOTE_NAME.indexOf(c) != -1) {
return wrapInQuotes(name);
}
}
return name;
}
private static String wrapInQuotes(String name) {
final StringBuilder r = new StringBuilder(2 + name.length());
r.append('"');
for (int i = 0; i < name.length(); i++) {
char c = name.charAt(i);
if (c == '"' || c == '\\') {
r.append('\\');
}
r.append(c);
}
r.append('"');
return r.toString();
}
}

View File

@@ -0,0 +1,18 @@
java_library(
name = "mail",
srcs = glob(["*.java"]),
visibility = ["//visibility:public"],
deps = [
"//java/com/google/gerrit/common:annotations",
"//java/com/google/gerrit/reviewdb:server",
"//lib:guava",
"//lib/auto:auto-value",
"//lib/auto:auto-value-annotations",
"//lib/flogger:api",
"//lib/jsoup",
"//lib/log:jsonevent-layout",
"//lib/log:log4j",
"//lib/mime4j:core",
"//lib/mime4j:dom",
],
)

View File

@@ -0,0 +1,233 @@
// Copyright (C) 2009 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.base.MoreObjects;
import java.io.IOException;
import java.io.Writer;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
public abstract class EmailHeader {
public abstract boolean isEmpty();
public abstract void write(Writer w) throws IOException;
public static class String extends EmailHeader {
private final java.lang.String value;
public String(java.lang.String v) {
value = v;
}
public java.lang.String getString() {
return value;
}
@Override
public boolean isEmpty() {
return value == null || value.length() == 0;
}
@Override
public void write(Writer w) throws IOException {
if (needsQuotedPrintable(value)) {
w.write(quotedPrintable(value));
} else {
w.write(value);
}
}
@Override
public int hashCode() {
return Objects.hashCode(value);
}
@Override
public boolean equals(Object o) {
return (o instanceof String) && Objects.equals(value, ((String) o).value);
}
@Override
public java.lang.String toString() {
return MoreObjects.toStringHelper(this).addValue(value).toString();
}
}
public static boolean needsQuotedPrintable(java.lang.String value) {
for (int i = 0; i < value.length(); i++) {
if (value.charAt(i) < ' ' || '~' < value.charAt(i)) {
return true;
}
}
return false;
}
static boolean needsQuotedPrintableWithinPhrase(int cp) {
switch (cp) {
case '!':
case '*':
case '+':
case '-':
case '/':
case '=':
case '_':
return false;
default:
if (('a' <= cp && cp <= 'z') || ('A' <= cp && cp <= 'Z') || ('0' <= cp && cp <= '9')) {
return false;
}
return true;
}
}
public static java.lang.String quotedPrintable(java.lang.String value) {
final StringBuilder r = new StringBuilder();
r.append("=?UTF-8?Q?");
for (int i = 0; i < value.length(); i++) {
final int cp = value.codePointAt(i);
if (cp == ' ') {
r.append('_');
} else if (needsQuotedPrintableWithinPhrase(cp)) {
byte[] buf = new java.lang.String(Character.toChars(cp)).getBytes(UTF_8);
for (byte b : buf) {
r.append('=');
r.append(Integer.toHexString((b >>> 4) & 0x0f).toUpperCase());
r.append(Integer.toHexString(b & 0x0f).toUpperCase());
}
} else {
r.append(Character.toChars(cp));
}
}
r.append("?=");
return r.toString();
}
public static class Date extends EmailHeader {
private final java.util.Date value;
public Date(java.util.Date v) {
value = v;
}
public java.util.Date getDate() {
return value;
}
@Override
public boolean isEmpty() {
return value == null;
}
@Override
public void write(Writer w) throws IOException {
final SimpleDateFormat fmt;
// Mon, 1 Jun 2009 10:49:44 -0700
fmt = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
w.write(fmt.format(value));
}
@Override
public int hashCode() {
return Objects.hashCode(value);
}
@Override
public boolean equals(Object o) {
return (o instanceof Date) && Objects.equals(value, ((Date) o).value);
}
@Override
public java.lang.String toString() {
return MoreObjects.toStringHelper(this).addValue(value).toString();
}
}
public static class AddressList extends EmailHeader {
private final List<Address> list = new ArrayList<>();
public AddressList() {}
public AddressList(Address addr) {
add(addr);
}
public List<Address> getAddressList() {
return Collections.unmodifiableList(list);
}
public void add(Address addr) {
list.add(addr);
}
public void remove(java.lang.String email) {
list.removeIf(address -> address.getEmail().equals(email));
}
@Override
public boolean isEmpty() {
return list.isEmpty();
}
@Override
public void write(Writer w) throws IOException {
int len = 8;
boolean firstAddress = true;
boolean needComma = false;
for (Address addr : list) {
java.lang.String s = addr.toHeaderString();
if (firstAddress) {
firstAddress = false;
} else if (72 < len + s.length()) {
w.write(",\r\n\t");
len = 8;
needComma = false;
}
if (needComma) {
w.write(", ");
}
w.write(s);
len += s.length();
needComma = true;
}
}
@Override
public int hashCode() {
return Objects.hashCode(list);
}
@Override
public boolean equals(Object o) {
return (o instanceof AddressList) && Objects.equals(list, ((AddressList) o).list);
}
@Override
public java.lang.String toString() {
return MoreObjects.toStringHelper(this).addValue(list).toString();
}
}
}

View File

@@ -0,0 +1,173 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import com.google.gerrit.reviewdb.client.Comment;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
/** Provides functionality for parsing the HTML part of a {@link MailMessage}. */
public class HtmlParser {
private static final ImmutableSet<String> MAIL_PROVIDER_EXTRAS =
ImmutableSet.of(
"gmail_extra", // "On 01/01/2017 User<user@gmail.com> wrote:"
"gmail_quote" // Used for quoting original content
);
private static final ImmutableSet<String> WHITELISTED_HTML_TAGS =
ImmutableSet.of(
"div", // Most user-typed comments are contained in a <div> tag
"a", // We allow links to be contained in a comment
"font" // Some email clients like nesting input in a new font tag
);
private HtmlParser() {}
/**
* Parses comments from html email.
*
* <p>This parser goes though all html elements in the email and checks for matching patterns. It
* keeps track of the last file and comments it encountered to know in which context a parsed
* comment belongs. It uses the href attributes of <a> tags to identify comments sent out by
* Gerrit as these are generally more reliable then the text captions.
*
* @param email the message as received from the email service
* @param comments a specific set of comments as sent out in the original notification email.
* Comments are expected to be in the same order as they were sent out to in the email.
* @param changeUrl canonical change URL that points to the change on this Gerrit instance.
* Example: https://go-review.googlesource.com/#/c/91570
* @return list of MailComments parsed from the html part of the email
*/
public static List<MailComment> parse(
MailMessage email, Collection<Comment> comments, String changeUrl) {
// TODO(hiesel) Add support for Gmail Mobile
// TODO(hiesel) Add tests for other popular email clients
// This parser goes though all html elements in the email and checks for
// matching patterns. It keeps track of the last file and comments it
// encountered to know in which context a parsed comment belongs.
// It uses the href attributes of <a> tags to identify comments sent out by
// Gerrit as these are generally more reliable then the text captions.
List<MailComment> parsedComments = new ArrayList<>();
Document d = Jsoup.parse(email.htmlContent());
PeekingIterator<Comment> iter = Iterators.peekingIterator(comments.iterator());
String lastEncounteredFileName = null;
Comment lastEncounteredComment = null;
for (Element e : d.body().getAllElements()) {
String elementName = e.tagName();
boolean isInBlockQuote =
e.parents()
.stream()
.anyMatch(
p ->
p.tagName().equals("blockquote")
|| MAIL_PROVIDER_EXTRAS.contains(p.className()));
if (elementName.equals("a")) {
String href = e.attr("href");
// Check if there is still a next comment that could be contained in
// this <a> tag
if (!iter.hasNext()) {
continue;
}
Comment perspectiveComment = iter.peek();
if (href.equals(ParserUtil.filePath(changeUrl, perspectiveComment))) {
if (lastEncounteredFileName == null
|| !lastEncounteredFileName.equals(perspectiveComment.key.filename)) {
// Not a file-level comment, but users could have typed a comment
// right after this file annotation to create a new file-level
// comment. If this file has a file-level comment, we have already
// set lastEncounteredComment to that file-level comment when we
// encountered the file link and should not reset it now.
lastEncounteredFileName = perspectiveComment.key.filename;
lastEncounteredComment = null;
} else if (perspectiveComment.lineNbr == 0) {
// This was originally a file-level comment
lastEncounteredComment = perspectiveComment;
iter.next();
}
continue;
} else if (ParserUtil.isCommentUrl(href, changeUrl, perspectiveComment)) {
// This is a regular inline comment
lastEncounteredComment = perspectiveComment;
iter.next();
continue;
}
}
if (isInBlockQuote) {
// There is no user-input in quoted text
continue;
}
if (!WHITELISTED_HTML_TAGS.contains(elementName)) {
// We only accept a set of whitelisted tags that can contain user input
continue;
}
if (elementName.equals("a") && e.attr("href").startsWith("mailto:")) {
// We don't accept mailto: links in general as they often appear in reply-to lines
// (User<user@gmail.com> wrote: ...)
continue;
}
// This is a comment typed by the user
// Replace non-breaking spaces and trim string
String content = e.ownText().replace('\u00a0', ' ').trim();
boolean isLink = elementName.equals("a");
if (!Strings.isNullOrEmpty(content)) {
if (lastEncounteredComment == null && lastEncounteredFileName == null) {
// Remove quotation line, email signature and
// "Sent from my xyz device"
content = ParserUtil.trimQuotation(content);
// TODO(hiesel) Add more sanitizer
if (!Strings.isNullOrEmpty(content)) {
ParserUtil.appendOrAddNewComment(
new MailComment(
content, null, null, MailComment.CommentType.CHANGE_MESSAGE, isLink),
parsedComments);
}
} else if (lastEncounteredComment == null) {
ParserUtil.appendOrAddNewComment(
new MailComment(
content,
lastEncounteredFileName,
null,
MailComment.CommentType.FILE_COMMENT,
isLink),
parsedComments);
} else {
ParserUtil.appendOrAddNewComment(
new MailComment(
content,
null,
lastEncounteredComment,
MailComment.CommentType.INLINE_COMMENT,
isLink),
parsedComments);
}
}
}
return parsedComments;
}
}

View File

@@ -0,0 +1,70 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.gerrit.reviewdb.client.Comment;
import java.util.Objects;
/** A comment parsed from inbound email */
public class MailComment {
public enum CommentType {
CHANGE_MESSAGE,
FILE_COMMENT,
INLINE_COMMENT
}
CommentType type;
Comment inReplyTo;
String fileName;
String message;
boolean isLink;
public MailComment() {}
public MailComment(
String message, String fileName, Comment inReplyTo, CommentType type, boolean isLink) {
this.message = message;
this.fileName = fileName;
this.inReplyTo = inReplyTo;
this.type = type;
this.isLink = isLink;
}
public CommentType getType() {
return type;
}
public Comment getInReplyTo() {
return inReplyTo;
}
public String getFileName() {
return fileName;
}
public String getMessage() {
return message;
}
/**
* Checks if the provided comment concerns the same exact spot in the change. This is basically an
* equals method except that the message is not checked.
*/
public boolean isSameCommentPath(MailComment c) {
return Objects.equals(fileName, c.fileName)
&& Objects.equals(inReplyTo, c.inReplyTo)
&& Objects.equals(type, c.type);
}
}

View File

@@ -0,0 +1,71 @@
// Copyright (C) 2018 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
/** Variables used by emails to hold data */
public enum MailHeader {
// Gerrit metadata holders
ASSIGNEE("Gerrit-Assignee"),
BRANCH("Gerrit-Branch"),
CC("Gerrit-CC"),
COMMENT_IN_REPLY_TO("Comment-In-Reply-To"),
COMMENT_DATE("Gerrit-Comment-Date"),
CHANGE_ID("Gerrit-Change-Id"),
CHANGE_NUMBER("Gerrit-Change-Number"),
CHANGE_URL("Gerrit-ChangeURL"),
COMMIT("Gerrit-Commit"),
HAS_COMMENTS("Gerrit-HasComments"),
HAS_LABELS("Gerrit-Has-Labels"),
MESSAGE_TYPE("Gerrit-MessageType"),
OWNER("Gerrit-Owner"),
PATCH_SET("Gerrit-PatchSet"),
PROJECT("Gerrit-Project"),
REVIEWER("Gerrit-Reviewer"),
// Commonly used Email headers
AUTO_SUBMITTED("Auto-Submitted"),
PRECEDENCE("Precedence"),
REFERENCES("References");
private final String name;
private final String fieldName;
MailHeader(String name) {
boolean customHeader = name.startsWith("Gerrit-");
this.name = name;
if (customHeader) {
this.fieldName = "X-" + name;
} else {
this.fieldName = name;
}
}
public String fieldWithDelimiter() {
return fieldName() + ": ";
}
public String withDelimiter() {
return name + ": ";
}
public String fieldName() {
return fieldName;
}
public String getName() {
return name;
}
}

View File

@@ -0,0 +1,108 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.flogger.FluentLogger;
import com.google.common.primitives.Ints;
import java.sql.Timestamp;
import java.time.Instant;
import java.time.format.DateTimeParseException;
/** Parse metadata from inbound email */
public class MailHeaderParser {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
public static MailMetadata parse(MailMessage m) {
MailMetadata metadata = new MailMetadata();
// Find author
metadata.author = m.from().getEmail();
// Check email headers for X-Gerrit-<Name>
for (String header : m.additionalHeaders()) {
if (header.startsWith(MailHeader.CHANGE_NUMBER.fieldWithDelimiter())) {
String num = header.substring(MailHeader.CHANGE_NUMBER.fieldWithDelimiter().length());
metadata.changeNumber = Ints.tryParse(num);
} else if (header.startsWith(MailHeader.PATCH_SET.fieldWithDelimiter())) {
String ps = header.substring(MailHeader.PATCH_SET.fieldWithDelimiter().length());
metadata.patchSet = Ints.tryParse(ps);
} else if (header.startsWith(MailHeader.COMMENT_DATE.fieldWithDelimiter())) {
String ts = header.substring(MailHeader.COMMENT_DATE.fieldWithDelimiter().length()).trim();
try {
metadata.timestamp =
Timestamp.from(MailProcessingUtil.rfcDateformatter.parse(ts, Instant::from));
} catch (DateTimeParseException e) {
logger.atSevere().withCause(e).log(
"Mail: Error while parsing timestamp from header of message %s", m.id());
}
} else if (header.startsWith(MailHeader.MESSAGE_TYPE.fieldWithDelimiter())) {
metadata.messageType =
header.substring(MailHeader.MESSAGE_TYPE.fieldWithDelimiter().length());
}
}
if (metadata.hasRequiredFields()) {
return metadata;
}
// If the required fields were not yet found, continue to parse the text
if (!Strings.isNullOrEmpty(m.textContent())) {
Iterable<String> lines = Splitter.on('\n').split(m.textContent().replace("\r\n", "\n"));
extractFooters(lines, metadata, m);
if (metadata.hasRequiredFields()) {
return metadata;
}
}
// If the required fields were not yet found, continue to parse the HTML
// HTML footer are contained inside a <div> tag
if (!Strings.isNullOrEmpty(m.htmlContent())) {
Iterable<String> lines = Splitter.on("</div>").split(m.htmlContent().replace("\r\n", "\n"));
extractFooters(lines, metadata, m);
if (metadata.hasRequiredFields()) {
return metadata;
}
}
return metadata;
}
private static void extractFooters(Iterable<String> lines, MailMetadata metadata, MailMessage m) {
for (String line : lines) {
if (metadata.changeNumber == null && line.contains(MailHeader.CHANGE_NUMBER.getName())) {
metadata.changeNumber =
Ints.tryParse(extractFooter(MailHeader.CHANGE_NUMBER.withDelimiter(), line));
} else if (metadata.patchSet == null && line.contains(MailHeader.PATCH_SET.getName())) {
metadata.patchSet =
Ints.tryParse(extractFooter(MailHeader.PATCH_SET.withDelimiter(), line));
} else if (metadata.timestamp == null && line.contains(MailHeader.COMMENT_DATE.getName())) {
String ts = extractFooter(MailHeader.COMMENT_DATE.withDelimiter(), line);
try {
metadata.timestamp =
Timestamp.from(MailProcessingUtil.rfcDateformatter.parse(ts, Instant::from));
} catch (DateTimeParseException e) {
logger.atSevere().withCause(e).log(
"Mail: Error while parsing timestamp from footer of message %s", m.id());
}
} else if (metadata.messageType == null && line.contains(MailHeader.MESSAGE_TYPE.getName())) {
metadata.messageType = extractFooter(MailHeader.MESSAGE_TYPE.withDelimiter(), line);
}
}
}
private static String extractFooter(String key, String line) {
return line.substring(line.indexOf(key) + key.length(), line.length()).trim();
}
}

View File

@@ -0,0 +1,107 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import com.google.gerrit.common.Nullable;
import java.time.Instant;
/**
* A simplified representation of an RFC 2045-2047 mime email message used for representing received
* emails inside Gerrit. It is populated by the MailParser after MailReceiver has received a
* message. Transformations done by the parser include stitching mime parts together, transforming
* all content to UTF-16 and removing attachments.
*
* <p>A valid {@link MailMessage} contains at least the following fields: id, from, to, subject and
* dateReceived.
*/
@AutoValue
public abstract class MailMessage {
// Unique Identifier
public abstract String id();
// Envelop Information
public abstract Address from();
public abstract ImmutableList<Address> to();
public abstract ImmutableList<Address> cc();
// Metadata
public abstract Instant dateReceived();
public abstract ImmutableList<String> additionalHeaders();
// Content
public abstract String subject();
@Nullable
public abstract String textContent();
@Nullable
public abstract String htmlContent();
// Raw content as received over the wire
@Nullable
public abstract ImmutableList<Integer> rawContent();
@Nullable
public abstract String rawContentUTF();
public static Builder builder() {
return new AutoValue_MailMessage.Builder();
}
public abstract Builder toBuilder();
@AutoValue.Builder
public abstract static class Builder {
public abstract Builder id(String val);
public abstract Builder from(Address val);
public abstract ImmutableList.Builder<Address> toBuilder();
public Builder addTo(Address val) {
toBuilder().add(val);
return this;
}
public abstract ImmutableList.Builder<Address> ccBuilder();
public Builder addCc(Address val) {
ccBuilder().add(val);
return this;
}
public abstract Builder dateReceived(Instant instant);
public abstract ImmutableList.Builder<String> additionalHeadersBuilder();
public Builder addAdditionalHeader(String val) {
additionalHeadersBuilder().add(val);
return this;
}
public abstract Builder subject(String val);
public abstract Builder textContent(String val);
public abstract Builder htmlContent(String val);
public abstract Builder rawContent(ImmutableList<Integer> val);
public abstract Builder rawContentUTF(String val);
public abstract MailMessage build();
}
}

View File

@@ -0,0 +1,46 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.common.base.MoreObjects;
import java.sql.Timestamp;
/** MailMetadata represents metadata parsed from inbound email. */
public class MailMetadata {
public Integer changeNumber;
public Integer patchSet;
public String author; // Author of the email
public Timestamp timestamp;
public String messageType; // we expect comment here
public boolean hasRequiredFields() {
return changeNumber != null
&& patchSet != null
&& author != null
&& timestamp != null
&& messageType != null;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("Change-Number", changeNumber)
.add("Patch-Set", patchSet)
.add("Author", author)
.add("Timestamp", timestamp)
.add("Message-Type", messageType)
.toString();
}
}

View File

@@ -0,0 +1,28 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
/** An {@link Exception} indicating that an email could not be parsed. */
public class MailParsingException extends Exception {
private static final long serialVersionUID = 1L;
public MailParsingException(String msg) {
super(msg);
}
public MailParsingException(String msg, Throwable cause) {
super(msg, cause);
}
}

View File

@@ -0,0 +1,23 @@
// Copyright (C) 2018 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import java.time.format.DateTimeFormatter;
public class MailProcessingUtil {
public static DateTimeFormatter rfcDateformatter =
DateTimeFormatter.ofPattern("EEE, dd MMM yyyy HH:mm:ss ZZZ");
}

View File

@@ -0,0 +1,134 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.gerrit.reviewdb.client.Comment;
import java.util.List;
import java.util.StringJoiner;
import java.util.regex.Pattern;
public class ParserUtil {
private static final Pattern SIMPLE_EMAIL_PATTERN =
Pattern.compile(
"[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9-]+"
+ "(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})");
private ParserUtil() {}
/**
* Trims the quotation that email clients add Example: On Sun, Nov 20, 2016 at 10:33 PM,
* <gerrit@gerritcodereview.com> wrote:
*
* @param comment Comment parsed from an email.
* @return Trimmed comment.
*/
public static String trimQuotation(String comment) {
StringJoiner j = new StringJoiner("\n");
List<String> lines = Splitter.on('\n').splitToList(comment);
for (int i = 0; i < lines.size() - 2; i++) {
j.add(lines.get(i));
}
// Check if the last line contains the full quotation pattern (date + email)
String lastLine = lines.get(lines.size() - 1);
if (containsQuotationPattern(lastLine)) {
if (lines.size() > 1) {
j.add(lines.get(lines.size() - 2));
}
return j.toString().trim();
}
// Check if the second last line + the last line contain the full quotation pattern. This is
// necessary, as the quotation line can be split across the last two lines if it gets too long.
if (lines.size() > 1) {
String lastLines = lines.get(lines.size() - 2) + lastLine;
if (containsQuotationPattern(lastLines)) {
return j.toString().trim();
}
}
// Add the last two lines
if (lines.size() > 1) {
j.add(lines.get(lines.size() - 2));
}
j.add(lines.get(lines.size() - 1));
return j.toString().trim();
}
/** Check if string is an inline comment url on a patch set or the base */
public static boolean isCommentUrl(String str, String changeUrl, Comment comment) {
int lineNbr = comment.range == null ? comment.lineNbr : comment.range.startLine;
return str.equals(filePath(changeUrl, comment) + "@" + lineNbr)
|| str.equals(filePath(changeUrl, comment) + "@a" + lineNbr);
}
/** Generate the fully qualified filepath */
public static String filePath(String changeUrl, Comment comment) {
return changeUrl + "/" + comment.key.patchSetId + "/" + comment.key.filename;
}
/**
* When parsing mail content, we need to append comments prematurely since we are parsing
* block-by-block and never know what comes next. This can result in a comment being parsed as two
* comments when it spans multiple blocks. This method takes care of merging those blocks or
* adding a new comment to the list of appropriate.
*/
public static void appendOrAddNewComment(MailComment comment, List<MailComment> comments) {
if (comments.isEmpty()) {
comments.add(comment);
return;
}
MailComment lastComment = Iterables.getLast(comments);
if (comment.isSameCommentPath(lastComment)) {
// Merge the two comments. Links should just be appended, while regular text that came from
// different <div> elements should be separated by a paragraph.
lastComment.message += (comment.isLink ? " " : "\n\n") + comment.message;
return;
}
comments.add(comment);
}
private static boolean containsQuotationPattern(String s) {
// Identifying the quotation line is hard, as it can be in any language.
// We identify this line by it's characteristics: It usually contains a
// valid email address, some digits for the date in groups of 1-4 in a row
// as well as some characters.
// Count occurrences of digit groups
int numConsecutiveDigits = 0;
int maxConsecutiveDigits = 0;
int numDigitGroups = 0;
for (char c : s.toCharArray()) {
if (c >= '0' && c <= '9') {
numConsecutiveDigits++;
} else if (numConsecutiveDigits > 0) {
maxConsecutiveDigits = Integer.max(maxConsecutiveDigits, numConsecutiveDigits);
numConsecutiveDigits = 0;
numDigitGroups++;
}
}
if (numDigitGroups < 4 || maxConsecutiveDigits > 4) {
return false;
}
// Check if the string contains an email address
return SIMPLE_EMAIL_PATTERN.matcher(s).find();
}
}

View File

@@ -0,0 +1,177 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.io.CharStreams;
import com.google.common.primitives.Ints;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.dom.Entity;
import org.apache.james.mime4j.dom.Message;
import org.apache.james.mime4j.dom.MessageBuilder;
import org.apache.james.mime4j.dom.Multipart;
import org.apache.james.mime4j.dom.TextBody;
import org.apache.james.mime4j.dom.address.Mailbox;
import org.apache.james.mime4j.message.DefaultMessageBuilder;
/** Parses raw email content received through POP3 or IMAP into an internal {@link MailMessage}. */
public class RawMailParser {
private static final ImmutableSet<String> MAIN_HEADERS =
ImmutableSet.of("to", "from", "cc", "date", "message-id", "subject", "content-type");
private RawMailParser() {}
/**
* Parses a MailMessage from a string.
*
* @param raw {@link String} payload as received over the wire
* @return parsed {@link MailMessage}
* @throws MailParsingException in case parsing fails
*/
public static MailMessage parse(String raw) throws MailParsingException {
MailMessage.Builder messageBuilder = MailMessage.builder();
messageBuilder.rawContentUTF(raw);
Message mimeMessage;
try {
MessageBuilder builder = new DefaultMessageBuilder();
mimeMessage = builder.parseMessage(new ByteArrayInputStream(raw.getBytes(UTF_8)));
} catch (IOException | MimeException e) {
throw new MailParsingException("Can't parse email", e);
}
// Add general headers
if (mimeMessage.getMessageId() != null) {
messageBuilder.id(mimeMessage.getMessageId());
}
if (mimeMessage.getSubject() != null) {
messageBuilder.subject(mimeMessage.getSubject());
}
if (mimeMessage.getDate() != null) {
messageBuilder.dateReceived(mimeMessage.getDate().toInstant());
}
// Add From, To and Cc
if (mimeMessage.getFrom() != null && mimeMessage.getFrom().size() > 0) {
Mailbox from = mimeMessage.getFrom().get(0);
messageBuilder.from(new Address(from.getName(), from.getAddress()));
}
if (mimeMessage.getTo() != null) {
for (Mailbox m : mimeMessage.getTo().flatten()) {
messageBuilder.addTo(new Address(m.getName(), m.getAddress()));
}
}
if (mimeMessage.getCc() != null) {
for (Mailbox m : mimeMessage.getCc().flatten()) {
messageBuilder.addCc(new Address(m.getName(), m.getAddress()));
}
}
// Add additional headers
mimeMessage
.getHeader()
.getFields()
.stream()
.filter(f -> !MAIN_HEADERS.contains(f.getName().toLowerCase()))
.forEach(f -> messageBuilder.addAdditionalHeader(f.getName() + ": " + f.getBody()));
// Add text and html body parts
StringBuilder textBuilder = new StringBuilder();
StringBuilder htmlBuilder = new StringBuilder();
try {
handleMimePart(mimeMessage, textBuilder, htmlBuilder);
} catch (IOException e) {
throw new MailParsingException("Can't parse email", e);
}
messageBuilder.textContent(Strings.emptyToNull(textBuilder.toString()));
messageBuilder.htmlContent(Strings.emptyToNull(htmlBuilder.toString()));
try {
// build() will only succeed if all required attributes were set. We wrap
// the IllegalStateException in a MailParsingException indicating that
// required attributes are missing, so that the caller doesn't fall over.
return messageBuilder.build();
} catch (IllegalStateException e) {
throw new MailParsingException("Missing required attributes after email was parsed", e);
}
}
/**
* Parses a MailMessage from an array of characters. Note that the character array is int-typed.
* This method is only used by POP3, which specifies that all transferred characters are US-ASCII
* (RFC 6856). When reading the input in Java, io.Reader yields ints. These can be safely
* converted to chars as all US-ASCII characters fit in a char. If emails contain non-ASCII
* characters, such as UTF runes, these will be encoded in ASCII using either Base64 or
* quoted-printable encoding.
*
* @param chars Array as received over the wire
* @return Parsed {@link MailMessage}
* @throws MailParsingException in case parsing fails
*/
public static MailMessage parse(int[] chars) throws MailParsingException {
StringBuilder b = new StringBuilder(chars.length);
for (int c : chars) {
b.append((char) c);
}
MailMessage.Builder messageBuilder = parse(b.toString()).toBuilder();
messageBuilder.rawContent(ImmutableList.copyOf(Ints.asList(chars)));
return messageBuilder.build();
}
/**
* Traverses a mime tree and parses out text and html parts. All other parts will be dropped.
*
* @param part {@code MimePart} to parse
* @param textBuilder {@link StringBuilder} to append all plaintext parts
* @param htmlBuilder {@link StringBuilder} to append all html parts
* @throws IOException in case of a failure while transforming the input to a {@link String}
*/
private static void handleMimePart(
Entity part, StringBuilder textBuilder, StringBuilder htmlBuilder) throws IOException {
if (isPlainOrHtml(part.getMimeType()) && !isAttachment(part.getDispositionType())) {
TextBody tb = (TextBody) part.getBody();
String result =
CharStreams.toString(new InputStreamReader(tb.getInputStream(), tb.getMimeCharset()));
if (part.getMimeType().equals("text/plain")) {
textBuilder.append(result);
} else if (part.getMimeType().equals("text/html")) {
htmlBuilder.append(result);
}
} else if (isMultipart(part.getMimeType())) {
Multipart multipart = (Multipart) part.getBody();
for (Entity e : multipart.getBodyParts()) {
handleMimePart(e, textBuilder, htmlBuilder);
}
}
}
private static boolean isPlainOrHtml(String mimeType) {
return (mimeType.equals("text/plain") || mimeType.equals("text/html"));
}
private static boolean isMultipart(String mimeType) {
return mimeType.startsWith("multipart/");
}
private static boolean isAttachment(String dispositionType) {
return dispositionType != null && dispositionType.equals("attachment");
}
}

View File

@@ -0,0 +1,147 @@
// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.mail;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import com.google.gerrit.reviewdb.client.Comment;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/** Provides parsing functionality for plaintext email. */
public class TextParser {
private TextParser() {}
/**
* Parses comments from plaintext email.
*
* @param email @param email the message as received from the email service
* @param comments list of {@link Comment}s previously persisted on the change that caused the
* original notification email to be sent out. Ordering must be the same as in the outbound
* email
* @param changeUrl canonical change url that points to the change on this Gerrit instance.
* Example: https://go-review.googlesource.com/#/c/91570
* @return list of MailComments parsed from the plaintext part of the email
*/
public static List<MailComment> parse(
MailMessage email, Collection<Comment> comments, String changeUrl) {
String body = email.textContent();
// Replace CR-LF by \n
body = body.replace("\r\n", "\n");
List<MailComment> parsedComments = new ArrayList<>();
// Some email clients (like GMail) use >> for enquoting text when there are
// inline comments that the users typed. These will then be enquoted by a
// single >. We sanitize this by unifying it into >. Inline comments typed
// by the user will not be enquoted.
//
// Example:
// Some comment
// >> Quoted Text
// >> Quoted Text
// > A comment typed in the email directly
String singleQuotePattern = "\n> ";
String doubleQuotePattern = "\n>> ";
if (countOccurrences(body, doubleQuotePattern) > countOccurrences(body, singleQuotePattern)) {
body = body.replace(doubleQuotePattern, singleQuotePattern);
}
PeekingIterator<Comment> iter = Iterators.peekingIterator(comments.iterator());
MailComment currentComment = null;
String lastEncounteredFileName = null;
Comment lastEncounteredComment = null;
for (String line : Splitter.on('\n').split(body)) {
if (line.equals(">")) {
// Skip empty lines
continue;
}
if (line.startsWith("> ")) {
line = line.substring("> ".length()).trim();
// This is not a comment, try to advance the file/comment pointers and
// add previous comment to list if applicable
if (currentComment != null) {
if (currentComment.type == MailComment.CommentType.CHANGE_MESSAGE) {
currentComment.message = ParserUtil.trimQuotation(currentComment.message);
}
if (!Strings.isNullOrEmpty(currentComment.message)) {
ParserUtil.appendOrAddNewComment(currentComment, parsedComments);
}
currentComment = null;
}
if (!iter.hasNext()) {
continue;
}
Comment perspectiveComment = iter.peek();
if (line.equals(ParserUtil.filePath(changeUrl, perspectiveComment))) {
if (lastEncounteredFileName == null
|| !lastEncounteredFileName.equals(perspectiveComment.key.filename)) {
// This is the annotation of a file
lastEncounteredFileName = perspectiveComment.key.filename;
lastEncounteredComment = null;
} else if (perspectiveComment.lineNbr == 0) {
// This was originally a file-level comment
lastEncounteredComment = perspectiveComment;
iter.next();
}
} else if (ParserUtil.isCommentUrl(line, changeUrl, perspectiveComment)) {
lastEncounteredComment = perspectiveComment;
iter.next();
}
} else {
// This is a comment. Try to append to previous comment if applicable or
// create a new comment.
if (currentComment == null) {
// Start new comment
currentComment = new MailComment();
currentComment.message = line;
if (lastEncounteredComment == null) {
if (lastEncounteredFileName == null) {
// Change message
currentComment.type = MailComment.CommentType.CHANGE_MESSAGE;
} else {
// File comment not sent in reply to another comment
currentComment.type = MailComment.CommentType.FILE_COMMENT;
currentComment.fileName = lastEncounteredFileName;
}
} else {
// Comment sent in reply to another comment
currentComment.inReplyTo = lastEncounteredComment;
currentComment.type = MailComment.CommentType.INLINE_COMMENT;
}
} else {
// Attach to previous comment
currentComment.message += "\n" + line;
}
}
}
// There is no need to attach the currentComment after this loop as all
// emails have footers and other enquoted text after the last comment
// appeared and the last comment will have already been added to the list
// at this point.
return parsedComments;
}
/** Counts the occurrences of pattern in s */
private static int countOccurrences(String s, String pattern) {
return (s.length() - s.replace(pattern, "").length()) / pattern.length();
}
}