Make MetadataParser more resistent
In this commit: - Make MetadataParser resistent to blanks - Convert all CRLF to LF for unified processing - For HTML processing split on <div> instead of <p> to adapt the parser to the new email format we send out to users. Change-Id: I7e0316915765dab256771467e0e8345ca75802aa
This commit is contained in:
@@ -23,9 +23,14 @@ import com.google.gerrit.server.mail.MailUtil;
|
|||||||
import com.google.gerrit.server.mail.MetadataName;
|
import com.google.gerrit.server.mail.MetadataName;
|
||||||
import java.sql.Timestamp;
|
import java.sql.Timestamp;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
|
import java.time.format.DateTimeParseException;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
/** Parse metadata from inbound email */
|
/** Parse metadata from inbound email */
|
||||||
public class MetadataParser {
|
public class MetadataParser {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MailProcessor.class.getName());
|
||||||
|
|
||||||
public static MailMetadata parse(MailMessage m) {
|
public static MailMetadata parse(MailMessage m) {
|
||||||
MailMetadata metadata = new MailMetadata();
|
MailMetadata metadata = new MailMetadata();
|
||||||
// Find author
|
// Find author
|
||||||
@@ -40,8 +45,12 @@ public class MetadataParser {
|
|||||||
String ps = header.substring(toHeaderWithDelimiter(MetadataName.PATCH_SET).length());
|
String ps = header.substring(toHeaderWithDelimiter(MetadataName.PATCH_SET).length());
|
||||||
metadata.patchSet = Ints.tryParse(ps);
|
metadata.patchSet = Ints.tryParse(ps);
|
||||||
} else if (header.startsWith(toHeaderWithDelimiter(MetadataName.TIMESTAMP))) {
|
} else if (header.startsWith(toHeaderWithDelimiter(MetadataName.TIMESTAMP))) {
|
||||||
String ts = header.substring(toHeaderWithDelimiter(MetadataName.TIMESTAMP).length());
|
String ts = header.substring(toHeaderWithDelimiter(MetadataName.TIMESTAMP).length()).trim();
|
||||||
|
try {
|
||||||
metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
|
metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
|
||||||
|
} catch (DateTimeParseException e) {
|
||||||
|
log.error("Mail: Error while parsing timestamp from header of message " + m.id(), e);
|
||||||
|
}
|
||||||
} else if (header.startsWith(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE))) {
|
} else if (header.startsWith(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE))) {
|
||||||
metadata.messageType =
|
metadata.messageType =
|
||||||
header.substring(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE).length());
|
header.substring(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE).length());
|
||||||
@@ -53,18 +62,18 @@ public class MetadataParser {
|
|||||||
|
|
||||||
// If the required fields were not yet found, continue to parse the text
|
// If the required fields were not yet found, continue to parse the text
|
||||||
if (!Strings.isNullOrEmpty(m.textContent())) {
|
if (!Strings.isNullOrEmpty(m.textContent())) {
|
||||||
String[] lines = m.textContent().split("\n");
|
String[] lines = m.textContent().replace("\r\n", "\n").split("\n");
|
||||||
extractFooters(lines, metadata);
|
extractFooters(lines, metadata, m);
|
||||||
if (metadata.hasRequiredFields()) {
|
if (metadata.hasRequiredFields()) {
|
||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the required fields were not yet found, continue to parse the HTML
|
// If the required fields were not yet found, continue to parse the HTML
|
||||||
// HTML footer are contained inside a <p> tag
|
// HTML footer are contained inside a <div> tag
|
||||||
if (!Strings.isNullOrEmpty(m.htmlContent())) {
|
if (!Strings.isNullOrEmpty(m.htmlContent())) {
|
||||||
String[] lines = m.htmlContent().split("</p>");
|
String[] lines = m.htmlContent().replace("\r\n", "\n").split("</div>");
|
||||||
extractFooters(lines, metadata);
|
extractFooters(lines, metadata, m);
|
||||||
if (metadata.hasRequiredFields()) {
|
if (metadata.hasRequiredFields()) {
|
||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
@@ -73,7 +82,7 @@ public class MetadataParser {
|
|||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void extractFooters(String[] lines, MailMetadata metadata) {
|
private static void extractFooters(String[] lines, MailMetadata metadata, MailMessage m) {
|
||||||
for (String line : lines) {
|
for (String line : lines) {
|
||||||
if (metadata.changeId == null && line.contains(MetadataName.CHANGE_ID)) {
|
if (metadata.changeId == null && line.contains(MetadataName.CHANGE_ID)) {
|
||||||
metadata.changeId = extractFooter(toFooterWithDelimiter(MetadataName.CHANGE_ID), line);
|
metadata.changeId = extractFooter(toFooterWithDelimiter(MetadataName.CHANGE_ID), line);
|
||||||
@@ -82,7 +91,11 @@ public class MetadataParser {
|
|||||||
Ints.tryParse(extractFooter(toFooterWithDelimiter(MetadataName.PATCH_SET), line));
|
Ints.tryParse(extractFooter(toFooterWithDelimiter(MetadataName.PATCH_SET), line));
|
||||||
} else if (metadata.timestamp == null && line.contains(MetadataName.TIMESTAMP)) {
|
} else if (metadata.timestamp == null && line.contains(MetadataName.TIMESTAMP)) {
|
||||||
String ts = extractFooter(toFooterWithDelimiter(MetadataName.TIMESTAMP), line);
|
String ts = extractFooter(toFooterWithDelimiter(MetadataName.TIMESTAMP), line);
|
||||||
|
try {
|
||||||
metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
|
metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
|
||||||
|
} catch (DateTimeParseException e) {
|
||||||
|
log.error("Mail: Error while parsing timestamp from footer of message " + m.id(), e);
|
||||||
|
}
|
||||||
} else if (metadata.messageType == null && line.contains(MetadataName.MESSAGE_TYPE)) {
|
} else if (metadata.messageType == null && line.contains(MetadataName.MESSAGE_TYPE)) {
|
||||||
metadata.messageType =
|
metadata.messageType =
|
||||||
extractFooter(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE), line);
|
extractFooter(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE), line);
|
||||||
@@ -91,6 +104,6 @@ public class MetadataParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static String extractFooter(String key, String line) {
|
private static String extractFooter(String key, String line) {
|
||||||
return line.substring(line.indexOf(key) + key.length(), line.length());
|
return line.substring(line.indexOf(key) + key.length(), line.length()).trim();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -62,11 +62,11 @@ public class MetadataParserTest {
|
|||||||
b.subject("");
|
b.subject("");
|
||||||
|
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
stringBuilder.append(toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "\n");
|
stringBuilder.append(toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "\r\n");
|
||||||
stringBuilder.append(toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "\n");
|
stringBuilder.append("> " + toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "\n");
|
||||||
stringBuilder.append(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "\n");
|
stringBuilder.append(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "\n");
|
||||||
stringBuilder.append(
|
stringBuilder.append(
|
||||||
toFooterWithDelimiter(MetadataName.TIMESTAMP) + "Tue, 25 Oct 2016 02:11:35 -0700" + "\n");
|
toFooterWithDelimiter(MetadataName.TIMESTAMP) + "Tue, 25 Oct 2016 02:11:35 -0700" + "\r\n");
|
||||||
b.textContent(stringBuilder.toString());
|
b.textContent(stringBuilder.toString());
|
||||||
|
|
||||||
Address author = new Address("Diffy", "test@gerritcodereview.com");
|
Address author = new Address("Diffy", "test@gerritcodereview.com");
|
||||||
@@ -91,15 +91,16 @@ public class MetadataParserTest {
|
|||||||
b.subject("");
|
b.subject("");
|
||||||
|
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
stringBuilder.append("<p>" + toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "</p>");
|
|
||||||
stringBuilder.append("<p>" + toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "</p>");
|
|
||||||
stringBuilder.append(
|
stringBuilder.append(
|
||||||
"<p>" + toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "</p>");
|
"<div id\"someid\">" + toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "</div>");
|
||||||
|
stringBuilder.append("<div>" + toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "</div>");
|
||||||
stringBuilder.append(
|
stringBuilder.append(
|
||||||
"<p>"
|
"<div>" + toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "</div>");
|
||||||
|
stringBuilder.append(
|
||||||
|
"<div>"
|
||||||
+ toFooterWithDelimiter(MetadataName.TIMESTAMP)
|
+ toFooterWithDelimiter(MetadataName.TIMESTAMP)
|
||||||
+ "Tue, 25 Oct 2016 02:11:35 -0700"
|
+ "Tue, 25 Oct 2016 02:11:35 -0700"
|
||||||
+ "</p>");
|
+ "</div>");
|
||||||
b.htmlContent(stringBuilder.toString());
|
b.htmlContent(stringBuilder.toString());
|
||||||
|
|
||||||
Address author = new Address("Diffy", "test@gerritcodereview.com");
|
Address author = new Address("Diffy", "test@gerritcodereview.com");
|
||||||
|
|||||||
Reference in New Issue
Block a user