Line data Source code
1 : // Copyright (C) 2016 The Android Open Source Project 2 : // 3 : // Licensed under the Apache License, Version 2.0 (the "License"); 4 : // you may not use this file except in compliance with the License. 5 : // You may obtain a copy of the License at 6 : // 7 : // http://www.apache.org/licenses/LICENSE-2.0 8 : // 9 : // Unless required by applicable law or agreed to in writing, software 10 : // distributed under the License is distributed on an "AS IS" BASIS, 11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 : // See the License for the specific language governing permissions and 13 : // limitations under the License. 14 : 15 : package com.google.gerrit.mail; 16 : 17 : import static java.nio.charset.StandardCharsets.UTF_8; 18 : 19 : import com.google.common.base.Strings; 20 : import com.google.common.collect.ImmutableList; 21 : import com.google.common.collect.ImmutableSet; 22 : import com.google.common.io.CharStreams; 23 : import com.google.common.primitives.Ints; 24 : import com.google.gerrit.entities.Address; 25 : import java.io.ByteArrayInputStream; 26 : import java.io.IOException; 27 : import java.io.InputStreamReader; 28 : import java.time.Instant; 29 : import org.apache.james.mime4j.MimeException; 30 : import org.apache.james.mime4j.dom.Entity; 31 : import org.apache.james.mime4j.dom.Message; 32 : import org.apache.james.mime4j.dom.MessageBuilder; 33 : import org.apache.james.mime4j.dom.Multipart; 34 : import org.apache.james.mime4j.dom.TextBody; 35 : import org.apache.james.mime4j.dom.address.Mailbox; 36 : import org.apache.james.mime4j.message.DefaultMessageBuilder; 37 : 38 : /** Parses raw email content received through POP3 or IMAP into an internal {@link MailMessage}. */ 39 : public class RawMailParser { 40 2 : private static final ImmutableSet<String> MAIN_HEADERS = 41 2 : ImmutableSet.of("to", "from", "cc", "date", "message-id", "subject", "content-type"); 42 : 43 : private RawMailParser() {} 44 : 45 : /** 46 : * Parses a MailMessage from a string. 47 : * 48 : * @param raw {@link String} payload as received over the wire 49 : * @return parsed {@link MailMessage} 50 : * @throws MailParsingException in case parsing fails 51 : */ 52 : public static MailMessage parse(String raw) throws MailParsingException { 53 2 : MailMessage.Builder messageBuilder = MailMessage.builder(); 54 2 : messageBuilder.rawContentUTF(raw); 55 : Message mimeMessage; 56 : try { 57 2 : MessageBuilder builder = new DefaultMessageBuilder(); 58 2 : mimeMessage = builder.parseMessage(new ByteArrayInputStream(raw.getBytes(UTF_8))); 59 0 : } catch (IOException | MimeException e) { 60 0 : throw new MailParsingException("Can't parse email", e); 61 2 : } 62 : // Add general headers 63 2 : if (mimeMessage.getMessageId() != null) { 64 2 : messageBuilder.id(mimeMessage.getMessageId()); 65 : } 66 2 : if (mimeMessage.getSubject() != null) { 67 2 : messageBuilder.subject(mimeMessage.getSubject()); 68 : } 69 2 : if (mimeMessage.getDate() != null) { 70 : @SuppressWarnings("JdkObsolete") 71 2 : Instant mimeMessageInstant = mimeMessage.getDate().toInstant(); 72 2 : messageBuilder.dateReceived(mimeMessageInstant); 73 : } 74 : 75 : // Add From, To and Cc 76 2 : if (mimeMessage.getFrom() != null && !mimeMessage.getFrom().isEmpty()) { 77 2 : Mailbox from = mimeMessage.getFrom().get(0); 78 2 : messageBuilder.from(Address.create(from.getName(), from.getAddress())); 79 : } 80 2 : if (mimeMessage.getTo() != null) { 81 2 : for (Mailbox m : mimeMessage.getTo().flatten()) { 82 2 : messageBuilder.addTo(Address.create(m.getName(), m.getAddress())); 83 2 : } 84 : } 85 2 : if (mimeMessage.getCc() != null) { 86 1 : for (Mailbox m : mimeMessage.getCc().flatten()) { 87 1 : messageBuilder.addCc(Address.create(m.getName(), m.getAddress())); 88 1 : } 89 : } 90 : 91 : // Add additional headers 92 2 : mimeMessage.getHeader().getFields().stream() 93 2 : .filter(f -> !MAIN_HEADERS.contains(f.getName().toLowerCase())) 94 2 : .forEach(f -> messageBuilder.addAdditionalHeader(f.getName() + ": " + f.getBody())); 95 : 96 : // Add text and html body parts 97 2 : StringBuilder textBuilder = new StringBuilder(); 98 2 : StringBuilder htmlBuilder = new StringBuilder(); 99 : try { 100 2 : handleMimePart(mimeMessage, textBuilder, htmlBuilder); 101 0 : } catch (IOException e) { 102 0 : throw new MailParsingException("Can't parse email", e); 103 2 : } 104 2 : messageBuilder.textContent(Strings.emptyToNull(textBuilder.toString())); 105 2 : messageBuilder.htmlContent(Strings.emptyToNull(htmlBuilder.toString())); 106 : 107 : try { 108 : // build() will only succeed if all required attributes were set. We wrap 109 : // the IllegalStateException in a MailParsingException indicating that 110 : // required attributes are missing, so that the caller doesn't fall over. 111 2 : return messageBuilder.build(); 112 0 : } catch (IllegalStateException e) { 113 0 : throw new MailParsingException("Missing required attributes after email was parsed", e); 114 : } 115 : } 116 : 117 : /** 118 : * Parses a MailMessage from an array of characters. Note that the character array is int-typed. 119 : * This method is only used by POP3, which specifies that all transferred characters are US-ASCII 120 : * (RFC 6856). When reading the input in Java, io.Reader yields ints. These can be safely 121 : * converted to chars as all US-ASCII characters fit in a char. If emails contain non-ASCII 122 : * characters, such as UTF runes, these will be encoded in ASCII using either Base64 or 123 : * quoted-printable encoding. 124 : * 125 : * @param chars Array as received over the wire 126 : * @return Parsed {@link MailMessage} 127 : * @throws MailParsingException in case parsing fails 128 : */ 129 : public static MailMessage parse(int[] chars) throws MailParsingException { 130 2 : StringBuilder b = new StringBuilder(chars.length); 131 2 : for (int c : chars) { 132 2 : b.append((char) c); 133 : } 134 : 135 2 : MailMessage.Builder messageBuilder = parse(b.toString()).toBuilder(); 136 2 : messageBuilder.rawContent(ImmutableList.copyOf(Ints.asList(chars))); 137 2 : return messageBuilder.build(); 138 : } 139 : 140 : /** 141 : * Traverses a mime tree and parses out text and html parts. All other parts will be dropped. 142 : * 143 : * @param part {@code MimePart} to parse 144 : * @param textBuilder {@link StringBuilder} to append all plaintext parts 145 : * @param htmlBuilder {@link StringBuilder} to append all html parts 146 : * @throws IOException in case of a failure while transforming the input to a {@link String} 147 : */ 148 : private static void handleMimePart( 149 : Entity part, StringBuilder textBuilder, StringBuilder htmlBuilder) throws IOException { 150 2 : if (isPlainOrHtml(part.getMimeType()) && !isAttachment(part.getDispositionType())) { 151 2 : TextBody tb = (TextBody) part.getBody(); 152 2 : String result = 153 2 : CharStreams.toString(new InputStreamReader(tb.getInputStream(), tb.getMimeCharset())); 154 2 : if (part.getMimeType().equals("text/plain")) { 155 2 : textBuilder.append(result); 156 1 : } else if (part.getMimeType().equals("text/html")) { 157 1 : htmlBuilder.append(result); 158 : } 159 2 : } else if (isMultipart(part.getMimeType())) { 160 1 : Multipart multipart = (Multipart) part.getBody(); 161 1 : for (Entity e : multipart.getBodyParts()) { 162 1 : handleMimePart(e, textBuilder, htmlBuilder); 163 1 : } 164 : } 165 2 : } 166 : 167 : private static boolean isPlainOrHtml(String mimeType) { 168 2 : return (mimeType.equals("text/plain") || mimeType.equals("text/html")); 169 : } 170 : 171 : private static boolean isMultipart(String mimeType) { 172 1 : return mimeType.startsWith("multipart/"); 173 : } 174 : 175 : private static boolean isAttachment(String dispositionType) { 176 2 : return dispositionType != null && dispositionType.equals("attachment"); 177 : } 178 : }