Line data Source code
1 : // Copyright (C) 2016 The Android Open Source Project 2 : // 3 : // Licensed under the Apache License, Version 2.0 (the "License"); 4 : // you may not use this file except in compliance with the License. 5 : // You may obtain a copy of the License at 6 : // 7 : // http://www.apache.org/licenses/LICENSE-2.0 8 : // 9 : // Unless required by applicable law or agreed to in writing, software 10 : // distributed under the License is distributed on an "AS IS" BASIS, 11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 : // See the License for the specific language governing permissions and 13 : // limitations under the License. 14 : 15 : package com.google.gerrit.mail; 16 : 17 : import com.google.common.base.Splitter; 18 : import com.google.common.collect.Iterables; 19 : import com.google.gerrit.entities.Comment; 20 : import java.util.List; 21 : import java.util.StringJoiner; 22 : import java.util.regex.Pattern; 23 : 24 : public class ParserUtil { 25 3 : private static final Pattern SIMPLE_EMAIL_PATTERN = 26 3 : Pattern.compile( 27 : "[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9-]+" 28 : + "(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})"); 29 : 30 : private ParserUtil() {} 31 : 32 : /** 33 : * Trims the quotation that email clients add Example: On Sun, Nov 20, 2016 at 10:33 PM, 34 : * <gerrit@gerritcodereview.com> wrote: 35 : * 36 : * @param comment Comment parsed from an email. 37 : * @return Trimmed comment. 38 : */ 39 : public static String trimQuotation(String comment) { 40 3 : StringJoiner j = new StringJoiner("\n"); 41 3 : List<String> lines = Splitter.on('\n').splitToList(comment); 42 3 : for (int i = 0; i < lines.size() - 2; i++) { 43 1 : j.add(lines.get(i)); 44 : } 45 : 46 : // Check if the last line contains the full quotation pattern (date + email) 47 3 : String lastLine = lines.get(lines.size() - 1); 48 3 : if (containsQuotationPattern(lastLine)) { 49 1 : if (lines.size() > 1) { 50 1 : j.add(lines.get(lines.size() - 2)); 51 : } 52 1 : return j.toString().trim(); 53 : } 54 : 55 : // Check if the second last line + the last line contain the full quotation pattern. This is 56 : // necessary, as the quotation line can be split across the last two lines if it gets too long. 57 3 : if (lines.size() > 1) { 58 1 : String lastLines = lines.get(lines.size() - 2) + lastLine; 59 1 : if (containsQuotationPattern(lastLines)) { 60 1 : return j.toString().trim(); 61 : } 62 : } 63 : 64 : // Add the last two lines 65 3 : if (lines.size() > 1) { 66 1 : j.add(lines.get(lines.size() - 2)); 67 : } 68 3 : j.add(lines.get(lines.size() - 1)); 69 : 70 3 : return j.toString().trim(); 71 : } 72 : 73 : /** Check if string is an inline comment url on a patch set or the base */ 74 : public static boolean isCommentUrl(String str, String changeUrl, Comment comment) { 75 3 : int lineNbr = comment.range == null ? comment.lineNbr : comment.range.startLine; 76 3 : return str.equals(filePath(changeUrl, comment) + "@" + lineNbr) 77 3 : || str.equals(filePath(changeUrl, comment) + "@a" + lineNbr); 78 : } 79 : 80 : /** Generate the fully qualified filepath */ 81 : public static String filePath(String changeUrl, Comment comment) { 82 3 : return changeUrl + "/" + comment.key.patchSetId + "/" + comment.key.filename; 83 : } 84 : 85 : /** 86 : * When parsing mail content, we need to append comments prematurely since we are parsing 87 : * block-by-block and never know what comes next. This can result in a comment being parsed as two 88 : * comments when it spans multiple blocks. This method takes care of merging those blocks or 89 : * adding a new comment to the list of appropriate. 90 : */ 91 : public static void appendOrAddNewComment(MailComment comment, List<MailComment> comments) { 92 3 : if (comments.isEmpty()) { 93 3 : comments.add(comment); 94 3 : return; 95 : } 96 2 : MailComment lastComment = Iterables.getLast(comments); 97 : 98 2 : if (comment.isSameCommentPath(lastComment)) { 99 : // Merge the two comments. Links should just be appended, while regular text that came from 100 : // different <div> elements should be separated by a paragraph. 101 1 : lastComment.message += (comment.isLink ? " " : "\n\n") + comment.message; 102 1 : return; 103 : } 104 : 105 2 : comments.add(comment); 106 2 : } 107 : 108 : private static boolean containsQuotationPattern(String s) { 109 : // Identifying the quotation line is hard, as it can be in any language. 110 : // We identify this line by it's characteristics: It usually contains a 111 : // valid email address, some digits for the date in groups of 1-4 in a row 112 : // as well as some characters. 113 : 114 : // Count occurrences of digit groups 115 3 : int numConsecutiveDigits = 0; 116 3 : int maxConsecutiveDigits = 0; 117 3 : int numDigitGroups = 0; 118 3 : for (int i = 0; i < s.length(); i++) { 119 3 : char c = s.charAt(i); 120 3 : if (c >= '0' && c <= '9') { 121 2 : numConsecutiveDigits++; 122 3 : } else if (numConsecutiveDigits > 0) { 123 2 : maxConsecutiveDigits = Integer.max(maxConsecutiveDigits, numConsecutiveDigits); 124 2 : numConsecutiveDigits = 0; 125 2 : numDigitGroups++; 126 : } 127 : } 128 3 : if (numDigitGroups < 4 || maxConsecutiveDigits > 4) { 129 3 : return false; 130 : } 131 : 132 : // Check if the string contains an email address 133 1 : return SIMPLE_EMAIL_PATTERN.matcher(s).find(); 134 : } 135 : }