使用 JavaMail 从电子邮件中提取 MIME 编码的内容

Extract MIME encoded content from email using JavaMail

我有一封电子邮件 contentType: TEXT/PLAIN; charset="=?utf-8?B?ICJVVEYtOCI=?="

我需要提取什么内容来消除java.io.UnsupportedEncodingException: =?utf-8?B?ICJVVEYtOCI=?=

我试过以下方法:

import java.io.IOException;
import javax.mail.BodyPart;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.internet.MimeMultipart;

public class ExtractContentText
{
    private static String extractContent(MimeMultipart mimeMultipartContent) throws MessagingException
    {
        String msgContentText = null;

        Exception cause = null;

        try
        {
            int numParts = mimeMultipartContent.getCount();

            for (int partNum = 0; msgContentText == null
                    && partNum < numParts; partNum++)
            {
                BodyPart part = mimeMultipartContent.getBodyPart(partNum);
                System.out.println("BodyContent.PartNum: "
                        + partNum + " has contentType:  " + part.getContentType());

                // TODO: Eliminate java.io.UnsupportedEncodingException: =?utf-8?B?ICJVVEYtOCI=?=
                Object partContent = part.getContent();
                if (partContent instanceof MimeMultipart)
                {
                    try
                    {
                        System.out.println("Processing inner MimeMultipart");
                        msgContentText = extractContent((MimeMultipart) partContent);
                        System.out.println("Using content found in inner MimeMultipart");
                    }
                    catch (MessagingException e)
                    {
                        System.out.println("Ignoring failure while trying to extract message content for inner MimeMultipart: "
                                + e.getMessage());
                    }
                }
                else
                {
                    try
                    {
                        msgContentText = (String) part.getContent();
                        System.out.println("PartNum: "
                                + partNum + " content [" + msgContentText + "]");
                    }
                    catch (ClassCastException e)
                    {
                        // If it is not a String, ignore the exception and continue looking
                        System.out.println("Ignoring Non-String message content: "
                                + e.getMessage());
                    }
                }
            }
        }
        catch (MessagingException | IOException e)
        {
            cause = e;
            System.out.println("Failure while trying to extract message content: "
                    + e.getMessage());
        }
        finally
        {
            // Fail if content could not be extracted
            if (msgContentText == null)
            {
                MessagingException ex;
                if (cause == null)
                {
                    ex = new MessagingException("Message content could not be extracted");
                }
                else
                {
                    ex = new MessagingException("Message content could not be extracted - "
                            + cause.getMessage(), cause);
                }
                System.out.println(ex);
                throw ex;
            }
        }

        return msgContentText;
    }

    public static void main(String[] args) throws MessagingException, IOException
    {
        Message m = null;
        System.out.println(extractContent((MimeMultipart) m.getContent()));
    }
}

请参阅 JavaMail 常见问题解答:Why do I get the UnsupportedEncodingException when I invoke getContent() on a bodypart that contains text data? You can use the javax.mail.Part.getInputStream() 以访问原始字节并执行您自己的解码。

要修复无效的内容类型 header,您可以使用 javax.mail.internet.ContentType to extract the parameter and use the javax.mail.MimeUtility.decodeText 解码非结构化 headers。

public static String cleanContentType(MimePart mp, String contentType) {
    String ct = "TEXT/PLAIN; charset=\"=?utf-8?B?ICJVVEYtOCI=?=\"";
    ContentType content = new ContentType(ct);
    System.out.println(content.getBaseType());
    System.out.println(content.getParameter("charset"));
    System.out.println(MimeUtility.decodeText(content.getParameter("charset")));
}

javax.mail.internet 包中有一个参数列表,可用于更改一些默认行为。您可以将 mail.mime.parameters.strict 的系统 属性 设置为 false 以放宽一些内容类型的规则。您还可以将 mail.mime.contenttypehandler 设置为指向可以修复内容类型问题的完全限定 class 名称。自定义 class 必须包含以下方法签名:

    public static String cleanContentType(MimePart mp, String contentType) {
        try {
            ContentType content = new ContentType(contentType);
            String charset = MimeUtility.decodeText(content.getParameter("charset"));
            charset = charset.replace("\"", "");
            content.setParameter("charset", charset);
            return content.toString();
        } catch (MessagingException | UnsupportedEncodingException ex) {
            return contentType;
        }
    }