Make MetadataParser more resistent
In this commit: - Make MetadataParser resistent to blanks - Convert all CRLF to LF for unified processing - For HTML processing split on <div> instead of <p> to adapt the parser to the new email format we send out to users. Change-Id: I7e0316915765dab256771467e0e8345ca75802aa
This commit is contained in:
		@@ -285,7 +285,7 @@ public class MailProcessorIT extends AbstractDaemonTest {
 | 
				
			|||||||
        + "Gerrit-MessageType: comment\n"
 | 
					        + "Gerrit-MessageType: comment\n"
 | 
				
			||||||
        + "Gerrit-Comment-Date: "
 | 
					        + "Gerrit-Comment-Date: "
 | 
				
			||||||
        + timestamp
 | 
					        + timestamp
 | 
				
			||||||
        + "\n";
 | 
					        + " \n";
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private MailMessage.Builder messageBuilderWithDefaultFields() {
 | 
					  private MailMessage.Builder messageBuilderWithDefaultFields() {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -23,9 +23,14 @@ import com.google.gerrit.server.mail.MailUtil;
 | 
				
			|||||||
import com.google.gerrit.server.mail.MetadataName;
 | 
					import com.google.gerrit.server.mail.MetadataName;
 | 
				
			||||||
import java.sql.Timestamp;
 | 
					import java.sql.Timestamp;
 | 
				
			||||||
import java.time.Instant;
 | 
					import java.time.Instant;
 | 
				
			||||||
 | 
					import java.time.format.DateTimeParseException;
 | 
				
			||||||
 | 
					import org.slf4j.Logger;
 | 
				
			||||||
 | 
					import org.slf4j.LoggerFactory;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/** Parse metadata from inbound email */
 | 
					/** Parse metadata from inbound email */
 | 
				
			||||||
public class MetadataParser {
 | 
					public class MetadataParser {
 | 
				
			||||||
 | 
					  private static final Logger log = LoggerFactory.getLogger(MailProcessor.class.getName());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  public static MailMetadata parse(MailMessage m) {
 | 
					  public static MailMetadata parse(MailMessage m) {
 | 
				
			||||||
    MailMetadata metadata = new MailMetadata();
 | 
					    MailMetadata metadata = new MailMetadata();
 | 
				
			||||||
    // Find author
 | 
					    // Find author
 | 
				
			||||||
@@ -40,8 +45,12 @@ public class MetadataParser {
 | 
				
			|||||||
        String ps = header.substring(toHeaderWithDelimiter(MetadataName.PATCH_SET).length());
 | 
					        String ps = header.substring(toHeaderWithDelimiter(MetadataName.PATCH_SET).length());
 | 
				
			||||||
        metadata.patchSet = Ints.tryParse(ps);
 | 
					        metadata.patchSet = Ints.tryParse(ps);
 | 
				
			||||||
      } else if (header.startsWith(toHeaderWithDelimiter(MetadataName.TIMESTAMP))) {
 | 
					      } else if (header.startsWith(toHeaderWithDelimiter(MetadataName.TIMESTAMP))) {
 | 
				
			||||||
        String ts = header.substring(toHeaderWithDelimiter(MetadataName.TIMESTAMP).length());
 | 
					        String ts = header.substring(toHeaderWithDelimiter(MetadataName.TIMESTAMP).length()).trim();
 | 
				
			||||||
 | 
					        try {
 | 
				
			||||||
          metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
 | 
					          metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
 | 
				
			||||||
 | 
					        } catch (DateTimeParseException e) {
 | 
				
			||||||
 | 
					          log.error("Mail: Error while parsing timestamp from header of message " + m.id(), e);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
      } else if (header.startsWith(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE))) {
 | 
					      } else if (header.startsWith(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE))) {
 | 
				
			||||||
        metadata.messageType =
 | 
					        metadata.messageType =
 | 
				
			||||||
            header.substring(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE).length());
 | 
					            header.substring(toHeaderWithDelimiter(MetadataName.MESSAGE_TYPE).length());
 | 
				
			||||||
@@ -53,18 +62,18 @@ public class MetadataParser {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // If the required fields were not yet found, continue to parse the text
 | 
					    // If the required fields were not yet found, continue to parse the text
 | 
				
			||||||
    if (!Strings.isNullOrEmpty(m.textContent())) {
 | 
					    if (!Strings.isNullOrEmpty(m.textContent())) {
 | 
				
			||||||
      String[] lines = m.textContent().split("\n");
 | 
					      String[] lines = m.textContent().replace("\r\n", "\n").split("\n");
 | 
				
			||||||
      extractFooters(lines, metadata);
 | 
					      extractFooters(lines, metadata, m);
 | 
				
			||||||
      if (metadata.hasRequiredFields()) {
 | 
					      if (metadata.hasRequiredFields()) {
 | 
				
			||||||
        return metadata;
 | 
					        return metadata;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // If the required fields were not yet found, continue to parse the HTML
 | 
					    // If the required fields were not yet found, continue to parse the HTML
 | 
				
			||||||
    // HTML footer are contained inside a <p> tag
 | 
					    // HTML footer are contained inside a <div> tag
 | 
				
			||||||
    if (!Strings.isNullOrEmpty(m.htmlContent())) {
 | 
					    if (!Strings.isNullOrEmpty(m.htmlContent())) {
 | 
				
			||||||
      String[] lines = m.htmlContent().split("</p>");
 | 
					      String[] lines = m.htmlContent().replace("\r\n", "\n").split("</div>");
 | 
				
			||||||
      extractFooters(lines, metadata);
 | 
					      extractFooters(lines, metadata, m);
 | 
				
			||||||
      if (metadata.hasRequiredFields()) {
 | 
					      if (metadata.hasRequiredFields()) {
 | 
				
			||||||
        return metadata;
 | 
					        return metadata;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
@@ -73,7 +82,7 @@ public class MetadataParser {
 | 
				
			|||||||
    return metadata;
 | 
					    return metadata;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private static void extractFooters(String[] lines, MailMetadata metadata) {
 | 
					  private static void extractFooters(String[] lines, MailMetadata metadata, MailMessage m) {
 | 
				
			||||||
    for (String line : lines) {
 | 
					    for (String line : lines) {
 | 
				
			||||||
      if (metadata.changeId == null && line.contains(MetadataName.CHANGE_ID)) {
 | 
					      if (metadata.changeId == null && line.contains(MetadataName.CHANGE_ID)) {
 | 
				
			||||||
        metadata.changeId = extractFooter(toFooterWithDelimiter(MetadataName.CHANGE_ID), line);
 | 
					        metadata.changeId = extractFooter(toFooterWithDelimiter(MetadataName.CHANGE_ID), line);
 | 
				
			||||||
@@ -82,7 +91,11 @@ public class MetadataParser {
 | 
				
			|||||||
            Ints.tryParse(extractFooter(toFooterWithDelimiter(MetadataName.PATCH_SET), line));
 | 
					            Ints.tryParse(extractFooter(toFooterWithDelimiter(MetadataName.PATCH_SET), line));
 | 
				
			||||||
      } else if (metadata.timestamp == null && line.contains(MetadataName.TIMESTAMP)) {
 | 
					      } else if (metadata.timestamp == null && line.contains(MetadataName.TIMESTAMP)) {
 | 
				
			||||||
        String ts = extractFooter(toFooterWithDelimiter(MetadataName.TIMESTAMP), line);
 | 
					        String ts = extractFooter(toFooterWithDelimiter(MetadataName.TIMESTAMP), line);
 | 
				
			||||||
 | 
					        try {
 | 
				
			||||||
          metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
 | 
					          metadata.timestamp = Timestamp.from(MailUtil.rfcDateformatter.parse(ts, Instant::from));
 | 
				
			||||||
 | 
					        } catch (DateTimeParseException e) {
 | 
				
			||||||
 | 
					          log.error("Mail: Error while parsing timestamp from footer of message " + m.id(), e);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
      } else if (metadata.messageType == null && line.contains(MetadataName.MESSAGE_TYPE)) {
 | 
					      } else if (metadata.messageType == null && line.contains(MetadataName.MESSAGE_TYPE)) {
 | 
				
			||||||
        metadata.messageType =
 | 
					        metadata.messageType =
 | 
				
			||||||
            extractFooter(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE), line);
 | 
					            extractFooter(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE), line);
 | 
				
			||||||
@@ -91,6 +104,6 @@ public class MetadataParser {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private static String extractFooter(String key, String line) {
 | 
					  private static String extractFooter(String key, String line) {
 | 
				
			||||||
    return line.substring(line.indexOf(key) + key.length(), line.length());
 | 
					    return line.substring(line.indexOf(key) + key.length(), line.length()).trim();
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -62,11 +62,11 @@ public class MetadataParserTest {
 | 
				
			|||||||
    b.subject("");
 | 
					    b.subject("");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    StringBuilder stringBuilder = new StringBuilder();
 | 
					    StringBuilder stringBuilder = new StringBuilder();
 | 
				
			||||||
    stringBuilder.append(toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "\n");
 | 
					    stringBuilder.append(toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "\r\n");
 | 
				
			||||||
    stringBuilder.append(toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "\n");
 | 
					    stringBuilder.append("> " + toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "\n");
 | 
				
			||||||
    stringBuilder.append(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "\n");
 | 
					    stringBuilder.append(toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "\n");
 | 
				
			||||||
    stringBuilder.append(
 | 
					    stringBuilder.append(
 | 
				
			||||||
        toFooterWithDelimiter(MetadataName.TIMESTAMP) + "Tue, 25 Oct 2016 02:11:35 -0700" + "\n");
 | 
					        toFooterWithDelimiter(MetadataName.TIMESTAMP) + "Tue, 25 Oct 2016 02:11:35 -0700" + "\r\n");
 | 
				
			||||||
    b.textContent(stringBuilder.toString());
 | 
					    b.textContent(stringBuilder.toString());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Address author = new Address("Diffy", "test@gerritcodereview.com");
 | 
					    Address author = new Address("Diffy", "test@gerritcodereview.com");
 | 
				
			||||||
@@ -91,15 +91,16 @@ public class MetadataParserTest {
 | 
				
			|||||||
    b.subject("");
 | 
					    b.subject("");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    StringBuilder stringBuilder = new StringBuilder();
 | 
					    StringBuilder stringBuilder = new StringBuilder();
 | 
				
			||||||
    stringBuilder.append("<p>" + toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "</p>");
 | 
					 | 
				
			||||||
    stringBuilder.append("<p>" + toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "</p>");
 | 
					 | 
				
			||||||
    stringBuilder.append(
 | 
					    stringBuilder.append(
 | 
				
			||||||
        "<p>" + toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "</p>");
 | 
					        "<div id\"someid\">" + toFooterWithDelimiter(MetadataName.CHANGE_ID) + "cid" + "</div>");
 | 
				
			||||||
 | 
					    stringBuilder.append("<div>" + toFooterWithDelimiter(MetadataName.PATCH_SET) + "1" + "</div>");
 | 
				
			||||||
    stringBuilder.append(
 | 
					    stringBuilder.append(
 | 
				
			||||||
        "<p>"
 | 
					        "<div>" + toFooterWithDelimiter(MetadataName.MESSAGE_TYPE) + "comment" + "</div>");
 | 
				
			||||||
 | 
					    stringBuilder.append(
 | 
				
			||||||
 | 
					        "<div>"
 | 
				
			||||||
            + toFooterWithDelimiter(MetadataName.TIMESTAMP)
 | 
					            + toFooterWithDelimiter(MetadataName.TIMESTAMP)
 | 
				
			||||||
            + "Tue, 25 Oct 2016 02:11:35 -0700"
 | 
					            + "Tue, 25 Oct 2016 02:11:35 -0700"
 | 
				
			||||||
            + "</p>");
 | 
					            + "</div>");
 | 
				
			||||||
    b.htmlContent(stringBuilder.toString());
 | 
					    b.htmlContent(stringBuilder.toString());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Address author = new Address("Diffy", "test@gerritcodereview.com");
 | 
					    Address author = new Address("Diffy", "test@gerritcodereview.com");
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user